[R] silhouette: clustering labels have to be consecutive intergers starting from 1?

Tao Shi shitao at hotmail.com
Wed Oct 10 02:35:41 CEST 2007


Hi list,

When I was using 'silhouette' from the 'cluster' package to calculate 
clustering performances, R crashed.  I traced the problem to the fact that 
my clustering labels only have 2's and 3's.  when I replaced them with 1's 
and 2's, the problem was solved.  Is the function purposely written in this 
way so when I have clustering labels, "2" and "3", for example, the function 
somehow takes the 'missing' cluster "2" into account when it calculates 
silhouette widths?

Thanks,

....Tao

##============================================
## sorry about the long attachment

>R.Version()
$platform
[1] "i386-pc-mingw32"

$arch
[1] "i386"

$os
[1] "mingw32"

$system
[1] "i386, mingw32"

$status
[1] ""

$major
[1] "2"

$minor
[1] "5.1"

$year
[1] "2007"

$month
[1] "06"

$day
[1] "27"

$`svn rev`
[1] "42083"

$language
[1] "R"

$version.string
[1] "R version 2.5.1 (2007-06-27)"

>library(cluster)
>cl1   ## clustering labels
  [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2
[30] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[59] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[88] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[117] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[146] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[175] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[204] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
>x1  ## 1-d input vector
  [1] 1.5707963 1.5707963 1.5707963 1.5707963 1.5707963
  [6] 1.5707963 1.5707963 1.5707963 1.5707963 1.5707963
[11] 1.5707963 1.5707963 1.5707963 1.5707963 1.5707963
[16] 1.5707963 1.5707963 1.5707963 1.5707963 1.5707963
[21] 1.0163758 0.7657763 0.7370084 0.6999689 0.7366476
[26] 0.7883921 0.6925395 0.7729240 0.7202391 0.7910149
[31] 0.7397698 0.7958092 0.6978596 0.7350255 0.7294362
[36] 0.6125713 0.7174000 0.7413046 0.7044205 0.7568104
[41] 0.7048469 0.7334515 0.7143170 0.7002311 0.7540981
[46] 0.7627527 0.7712762 0.8193611 0.7801148 0.9061762
[51] 0.8248195 0.7932630 0.7248037 0.7423547 0.6419314
[56] 0.6001092 0.7572272 0.7631742 0.7085384 0.8710853
[61] 0.6589563 0.7464943 0.7487340 0.7751280 0.7946542
[66] 0.7666081 0.8508109 0.8314308 0.7442471 0.8006093
[71] 0.7949156 0.7852447 0.7630048 0.7104764 0.6768218
[76] 0.6806351 0.7255355 0.7431389 0.7523627 0.7670515
[81] 0.8118214 0.7215615 0.8186164 0.6941610 0.8285453
[86] 0.8395170 0.8088044 0.8182706 0.7550723 0.7948639
[91] 0.7204830 0.7109068 0.7756949 0.6837856 0.7055604
[96] 0.6126666 0.7201964 0.6849890 0.7779753 0.7845284
[101] 0.9370788 0.8242935 0.6908860 0.6446151 0.7660386
[106] 0.8141526 0.8111984 0.8624186 0.7865335 0.8213035
[111] 0.8059171 0.6735751 0.7815353 0.6972508 0.6699396
[116] 0.6293971 0.7475913 0.7700821 0.8258339 0.8096144
[121] 0.7058171 0.7516635 0.7323909 0.7229136 0.8344846
[126] 0.7205433 0.8287774 0.8322097 0.7767547 0.7402277
[131] 0.7939879 0.7797308 0.7112453 0.7091554 0.6417382
[136] 0.6369171 0.7059020 0.7496380 0.7298359 0.8202566
[141] 0.7331830 0.7344492 0.8316894 0.7323979 0.7977615
[146] 0.7841205 0.7587060 0.8056685 0.7895643 0.8140731
[151] 0.7890221 0.8016008 0.7381577 0.6936453 0.7133525
[156] 0.7121459 0.6851448 0.7946275 0.8077618 0.7899059
[161] 0.7128826 0.7546289 0.7042451 0.6606403 0.7525233
[166] 0.7527548 0.8098887 0.8254190 0.7873064 0.8139340
[171] 0.7903462 0.8377651 0.6709983 0.7423632 0.6632082
[176] 0.5676717 0.6925125 0.7077083 0.7488877 0.7630604
[181] 0.7843001 0.7524471 0.6871823 0.7144443 0.7692206
[186] 0.8690710 0.9282786 0.7844991 0.7094671 0.7578409
[191] 0.8026643 0.7759241 0.6997376 0.6167209 0.6682289
[196] 0.6572018 0.7615807 0.7415752 0.7659161 0.7040360
[201] 0.6874460 0.7052109 0.8290970 0.6915149 0.7173107
[206] 0.7848961 0.7943846 0.8437946 0.7817344 0.8867006
[211] 0.7575857 0.8390473 0.7382348 0.6789859 0.7129010
[216] 0.6938173 0.7384170 0.6747648 0.7203337 0.7278963
>  silhouette(cl1, dist(x1)^2)  #####  CRASHED! ######
>silhouette(ifelse(cl1==3,2,1), dist(x1)^2)
       cluster neighbor sil_width
  [1,]       2        1 1.0000000
  [2,]       2        1 1.0000000
  [3,]       2        1 1.0000000
  [4,]       2        1 1.0000000
  [5,]       2        1 1.0000000
  [6,]       2        1 1.0000000
  [7,]       2        1 1.0000000
  [8,]       2        1 1.0000000
  [9,]       2        1 1.0000000
[10,]       2        1 1.0000000
[11,]       2        1 1.0000000
[12,]       2        1 1.0000000
[13,]       2        1 1.0000000
[14,]       2        1 1.0000000
[15,]       2        1 1.0000000
[16,]       2        1 1.0000000
[17,]       2        1 1.0000000
[18,]       2        1 1.0000000
[19,]       2        1 1.0000000
[20,]       2        1 1.0000000
[21,]       1        2 0.7592857
[22,]       1        2 0.9934455
[23,]       1        2 0.9937880
[24,]       1        2 0.9909544
[25,]       1        2 0.9937769
[26,]       1        2 0.9912442
[27,]       1        2 0.9900156
[28,]       1        2 0.9929499
[29,]       1        2 0.9929125
[30,]       1        2 0.9908637
[31,]       1        2 0.9938610
[32,]       1        2 0.9900958
[33,]       1        2 0.9906993
[34,]       1        2 0.9937227
[35,]       1        2 0.9934823
[36,]       1        2 0.9740954
[37,]       1        2 0.9926948
[38,]       1        2 0.9938924
[39,]       1        2 0.9914623
[40,]       1        2 0.9938250
[41,]       1        2 0.9915088
[42,]       1        2 0.9936633
[43,]       1        2 0.9924367
[44,]       1        2 0.9909855
[45,]       1        2 0.9938891
[46,]       1        2 0.9936028
[47,]       1        2 0.9930799
[48,]       1        2 0.9848568
[49,]       1        2 0.9922685
[50,]       1        2 0.9371272
[51,]       1        2 0.9832647
[52,]       1        2 0.9905154
[53,]       1        2 0.9932217
[54,]       1        2 0.9939101
[55,]       1        2 0.9810071
[56,]       1        2 0.9708675
[57,]       1        2 0.9938131
[58,]       1        2 0.9935827
[59,]       1        2 0.9918943
[60,]       1        2 0.9628701
[61,]       1        2 0.9844965
[62,]       1        2 0.9939491
[63,]       1        2 0.9939495
[64,]       1        2 0.9927610
[65,]       1        2 0.9902895
[66,]       1        2 0.9933968
[67,]       1        2 0.9734481
[68,]       1        2 0.9811285
[69,]       1        2 0.9939341
[70,]       1        2 0.9892304
[71,]       1        2 0.9902461
[72,]       1        2 0.9916649
[73,]       1        2 0.9935909
[74,]       1        2 0.9920846
[75,]       1        2 0.9876779
[76,]       1        2 0.9882868
[77,]       1        2 0.9932665
[78,]       1        2 0.9939213
[79,]       1        2 0.9939182
[80,]       1        2 0.9933699
[81,]       1        2 0.9868129
[82,]       1        2 0.9930074
[83,]       1        2 0.9850624
[84,]       1        2 0.9902300
[85,]       1        2 0.9820895
[86,]       1        2 0.9781906
[87,]       1        2 0.9875197
[88,]       1        2 0.9851569
[89,]       1        2 0.9938688
[90,]       1        2 0.9902547
[91,]       1        2 0.9929304
[92,]       1        2 0.9921257
[93,]       1        2 0.9927096
[94,]       1        2 0.9887702
[95,]       1        2 0.9915856
[96,]       1        2 0.9741195
[97,]       1        2 0.9929094
[98,]       1        2 0.9889500
[99,]       1        2 0.9924910
[100,]       1        2 0.9917552
[101,]       1        2 0.9047049
[102,]       1        2 0.9834247
[103,]       1        2 0.9897916
[104,]       1        2 0.9815845
[105,]       1        2 0.9934304
[106,]       1        2 0.9862375
[107,]       1        2 0.9869624
[108,]       1        2 0.9677353
[109,]       1        2 0.9914973
[110,]       1        2 0.9843076
[111,]       1        2 0.9881568
[112,]       1        2 0.9871393
[113,]       1        2 0.9921114
[114,]       1        2 0.9906240
[115,]       1        2 0.9865148
[116,]       1        2 0.9781846
[117,]       1        2 0.9939511
[118,]       1        2 0.9931681
[119,]       1        2 0.9829519
[120,]       1        2 0.9873341
[121,]       1        2 0.9916130
[122,]       1        2 0.9939273
[123,]       1        2 0.9936196
[124,]       1        2 0.9930999
[125,]       1        2 0.9800620
[126,]       1        2 0.9929347
[127,]       1        2 0.9820138
[128,]       1        2 0.9808614
[129,]       1        2 0.9926103
[130,]       1        2 0.9938711
[131,]       1        2 0.9903987
[132,]       1        2 0.9923097
[133,]       1        2 0.9921578
[134,]       1        2 0.9919558
[135,]       1        2 0.9809652
[136,]       1        2 0.9799023
[137,]       1        2 0.9916220
[138,]       1        2 0.9939454
[139,]       1        2 0.9935022
[140,]       1        2 0.9846059
[141,]       1        2 0.9936526
[142,]       1        2 0.9937017
[143,]       1        2 0.9810402
[144,]       1        2 0.9936199
[145,]       1        2 0.9897557
[146,]       1        2 0.9918058
[147,]       1        2 0.9937665
[148,]       1        2 0.9882099
[149,]       1        2 0.9910776
[150,]       1        2 0.9862575
[151,]       1        2 0.9911553
[152,]       1        2 0.9890393
[153,]       1        2 0.9938209
[154,]       1        2 0.9901624
[155,]       1        2 0.9923515
[156,]       1        2 0.9922418
[157,]       1        2 0.9889731
[158,]       1        2 0.9902939
[159,]       1        2 0.9877542
[160,]       1        2 0.9910280
[161,]       1        2 0.9923092
[162,]       1        2 0.9938784
[163,]       1        2 0.9914431
[164,]       1        2 0.9848184
[165,]       1        2 0.9939159
[166,]       1        2 0.9939125
[167,]       1        2 0.9872706
[168,]       1        2 0.9830805
[169,]       1        2 0.9913937
[170,]       1        2 0.9862925
[171,]       1        2 0.9909633
[172,]       1        2 0.9788584
[173,]       1        2 0.9866989
[174,]       1        2 0.9939102
[175,]       1        2 0.9853007
[176,]       1        2 0.9617883
[177,]       1        2 0.9900120
[178,]       1        2 0.9918102
[179,]       1        2 0.9939489
[180,]       1        2 0.9935882
[181,]       1        2 0.9917836
[182,]       1        2 0.9939170
[183,]       1        2 0.9892708
[184,]       1        2 0.9924478
[185,]       1        2 0.9932287
[186,]       1        2 0.9640487
[187,]       1        2 0.9150126
[188,]       1        2 0.9917589
[189,]       1        2 0.9919865
[190,]       1        2 0.9937946
[191,]       1        2 0.9888295
[192,]       1        2 0.9926884
[193,]       1        2 0.9909269
[194,]       1        2 0.9751339
[195,]       1        2 0.9862132
[196,]       1        2 0.9841566
[197,]       1        2 0.9936557
[198,]       1        2 0.9938973
[199,]       1        2 0.9934375
[200,]       1        2 0.9914201
[201,]       1        2 0.9893087
[202,]       1        2 0.9915481
[203,]       1        2 0.9819092
[204,]       1        2 0.9898774
[205,]       1        2 0.9926876
[206,]       1        2 0.9917091
[207,]       1        2 0.9903339
[208,]       1        2 0.9764847
[209,]       1        2 0.9920887
[210,]       1        2 0.9526866
[211,]       1        2 0.9938025
[212,]       1        2 0.9783714
[213,]       1        2 0.9938230
[214,]       1        2 0.9880267
[215,]       1        2 0.9923108
[216,]       1        2 0.9901850
[217,]       1        2 0.9938279
[218,]       1        2 0.9873388
[219,]       1        2 0.9929195
[220,]       1        2 0.9934017
attr(,"Ordered")
[1] FALSE
attr(,"call")
silhouette.default(x = ifelse(cl1 == 3, 2, 1), dist = dist(x1)^2)
attr(,"class")
[1] "silhouette"

## other examples
>set.seed(1234)
>cl.tmp <- rep(2:3, each=5)
>x.tmp <- c(rep(-1,5), abs(rnorm(5)+3))
>silhouette(cl.tmp, dist(x.tmp))
      cluster neighbor  sil_width
[1,]       2        1        NaN
[2,]       2        1        NaN
[3,]       2        1        NaN
[4,]       2        1        NaN
[5,]       2        1        NaN
[6,]       3        2 -0.5736515
[7,]       3        2 -0.1557143
[8,]       3        2 -0.2922523
[9,]       3        2 -0.8340174
[10,]       3        2 -0.1511875
attr(,"Ordered")
[1] FALSE
attr(,"call")
silhouette.default(x = cl.tmp, dist = dist(x.tmp))
attr(,"class")
[1] "silhouette"
>silhouette(ifelse(cl.tmp==2,1,2), dist(x.tmp))
      cluster neighbor  sil_width
[1,]       1        2  1.0000000
[2,]       1        2  1.0000000
[3,]       1        2  1.0000000
[4,]       1        2  1.0000000
[5,]       1        2  1.0000000
[6,]       2        1  0.4136253
[7,]       2        1  0.7038917
[8,]       2        1  0.6467668
[9,]       2        1 -0.3360695
[10,]       2        1  0.7054709
attr(,"Ordered")
[1] FALSE
attr(,"call")
silhouette.default(x = ifelse(cl.tmp == 2, 1, 2), dist = dist(x.tmp))
attr(,"class")
[1] "silhouette"
>silhouette(ifelse(cl.tmp==2,1,3), dist(x.tmp))
      cluster neighbor  sil_width
[1,]       1        2        NaN
[2,]       1        2        NaN
[3,]       1        2        NaN
[4,]       1        2        NaN
[5,]       1        2        NaN
[6,]       3        1 -0.7694686
[7,]       3        1 -0.8167313
[8,]       3        1 -0.6054665
[9,]       3        1 -0.9037412
[10,]       3        1  0.1875360
attr(,"Ordered")
[1] FALSE
attr(,"call")
silhouette.default(x = ifelse(cl.tmp == 2, 1, 3), dist = dist(x.tmp))
attr(,"class")
[1] "silhouette"

_________________________________________________________________

It’s free. http://im.live.com/messenger/im/home/?source=TAGHM



More information about the R-help mailing list