basic usage

Aviezer Lifshitz

2024-05-15

Basic usage of the package.

Basic usage

First, let’s create 5 clusters normally distributed around 1 to 5, with sd of 0.3:

data <- simulate_data(n = 100, sd = 0.3, nclust = 5, dims = 2)
data
##      id        V1        V2 true_clust
## 1     1 1.0667265 1.2974904          1
## 2     2 1.5669030 1.0507419          1
## 3     3 0.9216493 0.9818881          1
## 4     4 1.1005807 1.0826089          1
## 5     5 1.3043060 0.7386772          1
## 6     6 0.5905979 1.2199023          1
## 7     7 0.5640716 1.2009019          1
## 8     8 0.8026433 1.0143766          1
## 9     9 1.0162398 1.7195164          1
## 10   10 0.5520354 1.0998368          1
## 11   11 1.1953140 1.3663256          1
## 12   12 1.5992284 1.3281665          1
## 13   13 1.2955390 1.1698961          1
## 14   14 1.1234638 0.9912329          1
## 15   15 1.0126584 0.9744042          1
## 16   16 1.3119844 0.8811811          1
## 17   17 1.0552372 1.6257848          1
## 18   18 0.7999655 0.9811239          1
## 19   19 0.9281041 1.1814423          1
## 20   20 1.4051395 1.0918538          1
## 21   21 1.2337457 0.8609941          1
## 22   22 1.6904613 1.4090263          1
## 23   23 0.6058851 0.8359700          1
## 24   24 1.5577314 1.3954873          1
## 25   25 1.0574108 0.8103830          1
## 26   26 1.0821083 1.3491419          1
## 27   27 1.2773750 1.0908505          1
## 28   28 0.9083436 0.9377400          1
## 29   29 1.4753087 1.1393456          1
## 30   30 0.6261183 1.0678832          1
## 31   31 0.9971557 0.7110518          1
## 32   32 1.1286987 0.8751926          1
## 33   33 0.8602882 1.2884457          1
## 34   34 0.8720141 1.3431411          1
## 35   35 0.8564513 1.3415309          1
## 36   36 1.2779707 1.2513594          1
## 37   37 1.3410942 1.1915235          1
## 38   38 1.3170715 0.8314424          1
## 39   39 1.1620800 0.9975508          1
## 40   40 0.9644723 0.4914671          1
## 41   41 0.9637582 1.1634533          1
## 42   42 1.0621891 1.2628968          1
## 43   43 0.4116396 1.3365347          1
## 44   44 0.6080422 1.0226581          1
## 45   45 1.0981710 1.1814792          1
## 46   46 1.4061822 1.1482983          1
## 47   47 1.5635449 1.3122412          1
## 48   48 0.6009701 1.2520516          1
## 49   49 1.3759020 1.0235787          1
## 50   50 1.2488680 1.0132348          1
## 51   51 0.7966284 0.8174224          1
## 52   52 1.2437039 1.1594962          1
## 53   53 0.5141690 1.0295818          1
## 54   54 0.6242043 0.3423182          1
## 55   55 0.8449093 1.0426204          1
## 56   56 1.1172540 0.9446881          1
## 57   57 0.8123217 0.9360699          1
## 58   58 1.0578361 0.6289752          1
## 59   59 1.3776524 0.6354165          1
## 60   60 1.1025650 1.0735222          1
## 61   61 0.8449643 0.9240000          1
## 62   62 0.8131473 1.0000371          1
## 63   63 0.9731536 1.0191583          1
## 64   64 1.1998901 1.0817627          1
## 65   65 0.8762270 1.4395907          1
## 66   66 1.0997815 0.7537613          1
## 67   67 0.9370728 1.0533252          1
## 68   68 0.8842632 0.8134801          1
## 69   69 1.0250015 1.0604346          1
## 70   70 0.9508633 1.4304047          1
## 71   71 1.3018849 0.7042134          1
## 72   72 1.0309243 0.9791680          1
## 73   73 1.4766626 0.8399630          1
## 74   74 0.7077205 1.1032085          1
## 75   75 0.9465240 0.7801880          1
## 76   76 0.2887217 0.7269880          1
## 77   77 1.1768653 0.9117824          1
## 78   78 0.5618219 1.3393789          1
## 79   79 1.2671184 1.0621367          1
## 80   80 1.8189428 1.3351707          1
## 81   81 0.8799897 0.7404425          1
## 82   82 0.7193279 0.5906543          1
## 83   83 1.2715284 0.9753877          1
## 84   84 0.9896182 0.9457459          1
## 85   85 1.1023038 0.9017862          1
## 86   86 0.8354561 1.2557587          1
## 87   87 1.3565034 0.6625389          1
## 88   88 1.4917184 1.7051537          1
## 89   89 0.7806541 0.8768405          1
## 90   90 0.5749884 0.6220421          1
## 91   91 0.5285356 0.9881802          1
## 92   92 0.8048000 0.7652237          1
## 93   93 0.9155977 0.7346152          1
## 94   94 0.5170804 1.3764329          1
## 95   95 0.9132864 1.1759861          1
## 96   96 0.5753730 2.0344629          1
## 97   97 0.6270602 1.0425871          1
## 98   98 1.4039415 0.7822955          1
## 99   99 1.4699544 0.6457397          1
## 100 100 0.1966918 0.8655827          1
## 101 101 1.8650106 1.8228915          2
## 102 102 2.5035777 2.0348018          2
## 103 103 2.0709071 1.5802315          2
## 104 104 1.7802285 2.1852078          2
## 105 105 2.1404162 1.4695159          2
## 106 106 2.4045219 1.8141060          2
## 107 107 2.2754578 1.6387158          2
## 108 108 1.8346851 1.7169400          2
## 109 109 2.2179101 2.1954281          2
## 110 110 2.1495384 1.6535421          2
## 111 111 1.3661693 2.2240482          2
## 112 112 1.7481608 2.3856441          2
## 113 113 2.1163383 2.3305007          2
## 114 114 2.5519002 1.8605425          2
## 115 115 2.1632306 2.1733376          2
## 116 116 2.1993959 1.7466690          2
## 117 117 1.6317558 1.3180457          2
## 118 118 1.8718640 2.0420516          2
## 119 119 1.7576361 1.8369556          2
## 120 120 2.1777485 1.9536790          2
## 121 121 1.5452981 1.6430946          2
## 122 122 1.7374044 1.9629580          2
## 123 123 2.4174326 1.7617914          2
## 124 124 2.3580288 2.1205054          2
## 125 125 2.5894833 2.3024864          2
## 126 126 2.4024874 2.0252050          2
## 127 127 2.3393553 1.9646018          2
## 128 128 1.8570918 1.7799213          2
## 129 129 1.9375662 1.8307695          2
## 130 130 1.4850182 1.8154804          2
## 131 131 0.8317172 1.9615005          2
## 132 132 2.1698507 2.4288118          2
## 133 133 2.1180596 1.5451124          2
## 134 134 1.8866094 2.0823006          2
## 135 135 2.4306652 1.9360392          2
## 136 136 1.3980575 2.4825147          2
## 137 137 1.9360886 2.4161774          2
## 138 138 1.8972829 2.2865548          2
## 139 139 1.9729965 2.2800574          2
## 140 140 2.0121408 2.5490452          2
## 141 141 2.1194526 2.3343004          2
## 142 142 1.7188031 1.5755906          2
## 143 143 2.5065627 1.9425107          2
## 144 144 2.4178548 2.5581941          2
## 145 145 2.0303526 2.0380918          2
## 146 146 1.8127356 1.8608428          2
## 147 147 2.0976676 1.8445404          2
## 148 148 2.3682093 1.9945662          2
## 149 149 1.7638344 2.3282711          2
## 150 150 1.6501673 2.3606737          2
## 151 151 2.0826567 2.5551857          2
## 152 152 2.1724808 1.4973861          2
## 153 153 2.4660017 2.2656568          2
## 154 154 2.5845215 1.7484647          2
## 155 155 1.7014977 1.9251035          2
## 156 156 2.3088995 2.1917565          2
## 157 157 2.4198413 2.1507954          2
## 158 158 2.0380028 1.8934676          2
## 159 159 2.0486454 1.8723494          2
## 160 160 2.1961128 1.8303259          2
## 161 161 1.7449725 1.7456562          2
## 162 162 1.8855708 1.7480916          2
## 163 163 1.7633544 2.0768556          2
## 164 164 2.0724397 2.5699561          2
## 165 165 1.5544595 2.0827752          2
## 166 166 1.6571505 1.5651161          2
## 167 167 2.1461370 1.7485831          2
## 168 168 2.1997976 1.9045547          2
## 169 169 1.8012821 2.2486367          2
## 170 170 1.9516914 1.7959914          2
## 171 171 1.5812671 1.3933264          2
## 172 172 2.2452198 1.7497464          2
## 173 173 2.3633530 1.6304779          2
## 174 174 1.8123147 1.4706795          2
## 175 175 1.8616742 2.0855991          2
## 176 176 1.7058378 1.8463885          2
## 177 177 1.3846781 2.1272488          2
## 178 178 2.2309258 2.5877900          2
## 179 179 1.7186580 1.7562195          2
## 180 180 1.8789516 2.5313413          2
## 181 181 1.4183225 2.2602353          2
## 182 182 0.9804335 1.8343316          2
## 183 183 1.8078194 1.9269470          2
## 184 184 2.2857103 1.6987145          2
## 185 185 2.1261888 1.7775692          2
## 186 186 2.2834598 2.2363101          2
## 187 187 1.8863905 2.3363181          2
## 188 188 1.8296542 1.8167206          2
## 189 189 1.7950462 1.8876026          2
## 190 190 2.1653800 1.9528807          2
## 191 191 2.3011832 1.7582755          2
## 192 192 2.1881185 2.0034041          2
## 193 193 1.6343435 2.1465149          2
## 194 194 2.0790663 1.4291535          2
## 195 195 1.6770096 2.0365001          2
## 196 196 1.5854592 2.3145920          2
## 197 197 2.3392047 2.5347724          2
## 198 198 2.1520123 1.6718351          2
## 199 199 2.3546154 2.2068461          2
## 200 200 2.0965418 1.6386522          2
## 201 201 3.1582948 3.2817822          3
## 202 202 3.0324669 2.9516739          3
## 203 203 2.8183399 3.5035701          3
## 204 204 2.6817798 2.9180582          3
## 205 205 2.7808847 3.0844466          3
## 206 206 2.8808048 2.8296571          3
## 207 207 3.2834376 3.0076797          3
## 208 208 3.2670560 2.3872379          3
## 209 209 2.9112772 3.2753139          3
## 210 210 3.5347984 2.9132633          3
## 211 211 2.9866980 3.5704456          3
## 212 212 2.9475630 3.1103896          3
## 213 213 3.6914637 3.5650306          3
## 214 214 2.8297386 3.0184972          3
## 215 215 3.0064852 2.3909167          3
## 216 216 3.2958027 3.0497020          3
## 217 217 3.2940603 3.0182022          3
## 218 218 2.9120480 2.8546845          3
## 219 219 2.9842337 3.3483988          3
## 220 220 3.3505244 3.5677951          3
## 221 221 2.6410030 2.8457986          3
## 222 222 2.9100960 3.4960909          3
## 223 223 3.1094851 2.8974510          3
## 224 224 3.2596545 3.0012766          3
## 225 225 2.9191755 3.2961545          3
## 226 226 3.0616052 2.9854865          3
## 227 227 3.1084309 3.4781697          3
## 228 228 3.1889907 3.4533466          3
## 229 229 2.8498632 3.1324070          3
## 230 230 3.0476210 3.3198091          3
## 231 231 3.1947123 2.7340885          3
## 232 232 3.2642629 2.7866811          3
## 233 233 3.2633253 2.2783362          3
## 234 234 2.8702660 2.7371004          3
## 235 235 3.0800114 2.6517472          3
## 236 236 2.8861520 3.4423209          3
## 237 237 3.0677828 3.3158418          3
## 238 238 2.7887346 2.2638015          3
## 239 239 3.1258504 2.5792482          3
## 240 240 3.1628658 2.7525779          3
## 241 241 3.2192984 2.5263849          3
## 242 242 3.1183246 2.7606926          3
## 243 243 3.2507699 3.4337573          3
## 244 244 3.0452794 3.2743320          3
## 245 245 2.8066416 2.8469756          3
## 246 246 3.3232539 2.6161428          3
## 247 247 3.1623044 2.8852779          3
## 248 248 3.1590963 2.9107673          3
## 249 249 2.5175207 3.1755530          3
## 250 250 2.9070490 3.3371424          3
## 251 251 3.2223458 2.9656005          3
## 252 252 3.1985484 2.7809807          3
## 253 253 3.0316803 2.9058037          3
## 254 254 2.4238337 3.2605995          3
## 255 255 2.6941865 3.0278952          3
## 256 256 2.5748253 2.7013106          3
## 257 257 3.2986795 3.1174259          3
## 258 258 2.6425800 2.1964654          3
## 259 259 2.5709755 3.0196524          3
## 260 260 3.2014407 3.1667301          3
## 261 261 2.9971207 3.3002295          3
## 262 262 3.4621043 2.9682704          3
## 263 263 2.9321147 2.5971883          3
## 264 264 3.0409185 3.4695090          3
## 265 265 3.0373145 2.4586874          3
## 266 266 3.1736631 2.6361699          3
## 267 267 2.6727348 3.1018170          3
## 268 268 3.0759845 2.9310834          3
## 269 269 2.3170393 3.2172804          3
## 270 270 2.9326255 3.6379068          3
## 271 271 3.5626431 3.4774382          3
## 272 272 3.0507725 3.0392838          3
## 273 273 3.3281075 2.6704701          3
## 274 274 2.9400647 3.5905523          3
## 275 275 2.5671612 2.9392090          3
## 276 276 3.0305182 2.9148003          3
## 277 277 3.0971165 3.2032150          3
## 278 278 3.0239877 2.9992575          3
## 279 279 3.1436436 3.3177178          3
## 280 280 2.7237814 2.7846960          3
## 281 281 2.6488493 2.9403778          3
## 282 282 3.5028389 2.7374304          3
## 283 283 2.9100123 2.9941576          3
## 284 284 2.6551342 3.1751063          3
## 285 285 2.4368569 3.3106858          3
## 286 286 3.2537246 3.0023797          3
## 287 287 2.5251198 3.1648635          3
## 288 288 3.4102319 2.6574130          3
## 289 289 3.1919072 2.8157314          3
## 290 290 3.4064629 3.4305026          3
## 291 291 3.0079670 3.6826106          3
## 292 292 3.7393616 3.1616663          3
## 293 293 3.1898541 3.3348883          3
## 294 294 3.1957236 2.8465667          3
## 295 295 3.0556766 3.2829843          3
## 296 296 3.6260623 2.6645142          3
## 297 297 2.6417746 3.0416318          3
## 298 298 3.2317155 3.1088961          3
## 299 299 3.1938757 2.7928641          3
## 300 300 2.2231145 3.1224288          3
## 301 301 4.5359765 3.7180034          4
## 302 302 3.9555011 4.3461914          4
## 303 303 3.9635956 3.8936239          4
## 304 304 4.0641252 4.0590676          4
## 305 305 3.8826908 3.7890478          4
## 306 306 3.5947894 4.2408776          4
## 307 307 3.8539515 4.1816583          4
## 308 308 3.9953827 4.0160001          4
## 309 309 4.3505713 4.0675379          4
## 310 310 4.1699464 3.9732064          4
## 311 311 3.4242432 3.6522741          4
## 312 312 4.1250367 4.3059641          4
## 313 313 4.2805916 4.5032701          4
## 314 314 3.7358389 4.4479817          4
## 315 315 4.2973374 4.2602548          4
## 316 316 4.0496482 4.6119820          4
## 317 317 4.2761301 4.7507636          4
## 318 318 3.8912117 4.1998976          4
## 319 319 4.1961193 4.1457364          4
## 320 320 3.7188606 3.7647725          4
## 321 321 4.2077306 4.5999424          4
## 322 322 3.7416941 4.1484901          4
## 323 323 3.3222681 4.4997617          4
## 324 324 3.7543036 4.2716073          4
## 325 325 3.9527188 3.6590467          4
## 326 326 4.0381956 3.7186242          4
## 327 327 3.8806544 3.9070130          4
## 328 328 3.9827942 3.9598205          4
## 329 329 4.0226557 3.6212487          4
## 330 330 4.1956919 4.5781945          4
## 331 331 3.8540425 3.9128319          4
## 332 332 4.0616431 4.0036028          4
## 333 333 3.9617479 4.0205777          4
## 334 334 4.1301273 3.8929814          4
## 335 335 3.9303327 3.6822656          4
## 336 336 3.8639665 4.6689760          4
## 337 337 3.8498979 4.3259294          4
## 338 338 3.7456614 4.2190044          4
## 339 339 3.5996916 4.0785205          4
## 340 340 4.2211286 3.6889119          4
## 341 341 3.6359034 3.5863260          4
## 342 342 3.9374437 4.3484828          4
## 343 343 4.4620644 3.7832389          4
## 344 344 3.8583607 3.3736348          4
## 345 345 4.2358756 4.2060441          4
## 346 346 4.4409623 3.7583155          4
## 347 347 3.9408002 3.7366986          4
## 348 348 4.4116127 3.5505830          4
## 349 349 4.2196721 4.0230395          4
## 350 350 4.2743564 4.3818073          4
## 351 351 4.0834435 4.1319625          4
## 352 352 3.8766559 4.5851336          4
## 353 353 4.1009444 3.7675707          4
## 354 354 4.0688708 4.5938085          4
## 355 355 3.5415940 3.6509124          4
## 356 356 3.4175004 4.1507475          4
## 357 357 4.8080342 3.5950536          4
## 358 358 4.2245045 4.0363771          4
## 359 359 4.3707505 3.9898450          4
## 360 360 3.5380312 4.4715776          4
## 361 361 3.9736378 4.0223226          4
## 362 362 4.3070604 4.0731841          4
## 363 363 4.4301646 4.2642174          4
## 364 364 3.5943743 4.3678552          4
## 365 365 4.3518757 4.3297973          4
## 366 366 4.0038028 4.6589989          4
## 367 367 3.4916286 3.9650951          4
## 368 368 3.7401950 3.7304495          4
## 369 369 4.3564057 4.4083939          4
## 370 370 4.1282396 3.7711440          4
## 371 371 4.2755948 3.5237243          4
## 372 372 4.2628353 4.5689097          4
## 373 373 4.1496029 3.4827091          4
## 374 374 4.3043997 4.0827318          4
## 375 375 4.0068700 3.9588135          4
## 376 376 4.0579176 3.3274770          4
## 377 377 3.7807398 3.6628580          4
## 378 378 4.6942437 4.0639392          4
## 379 379 3.8853819 3.9131507          4
## 380 380 4.4660029 3.7494534          4
## 381 381 4.2689441 3.9755479          4
## 382 382 3.9050029 3.5893448          4
## 383 383 3.4540026 3.1477135          4
## 384 384 3.6639532 3.7478090          4
## 385 385 3.9682711 4.1421898          4
## 386 386 4.2559583 4.5019210          4
## 387 387 4.0861906 3.7950035          4
## 388 388 3.9331711 3.9469074          4
## 389 389 3.7591473 4.1226238          4
## 390 390 3.7572780 3.8618581          4
## 391 391 4.0981217 3.8540294          4
## 392 392 3.9805710 4.1916993          4
## 393 393 4.2266629 4.1275205          4
## 394 394 3.7137744 4.2314257          4
## 395 395 3.5422981 3.7211093          4
## 396 396 3.2119115 3.6943840          4
## 397 397 3.9658752 4.3221165          4
## 398 398 3.4408164 3.9808492          4
## 399 399 3.6198537 4.1570393          4
## 400 400 4.0005277 4.2579742          4
## 401 401 4.9629049 4.2422309          5
## 402 402 5.1840698 5.0070322          5
## 403 403 5.0771319 4.9495746          5
## 404 404 5.1922847 4.3193401          5
## 405 405 4.7447964 5.0256347          5
## 406 406 5.2756551 4.9227218          5
## 407 407 4.9220268 4.7658744          5
## 408 408 4.9456442 4.6766726          5
## 409 409 5.0101160 4.7870993          5
## 410 410 4.3928139 5.0376128          5
## 411 411 4.8620394 5.2395791          5
## 412 412 5.1552361 4.3784459          5
## 413 413 4.8395224 5.1605822          5
## 414 414 5.0123625 4.8462883          5
## 415 415 4.9399809 4.6627123          5
## 416 416 4.7543933 5.0816301          5
## 417 417 5.2475480 4.9361446          5
## 418 418 4.9609575 4.9913760          5
## 419 419 4.7977331 4.9085418          5
## 420 420 4.9352900 5.1609235          5
## 421 421 5.0124362 5.5309027          5
## 422 422 4.7023587 5.0047688          5
## 423 423 5.6711915 5.0633338          5
## 424 424 5.2414829 4.4815822          5
## 425 425 5.1636573 4.6373326          5
## 426 426 5.3863081 5.1365235          5
## 427 427 5.2947810 4.7475621          5
## 428 428 5.3187440 5.4816437          5
## 429 429 4.8578147 4.9280052          5
## 430 430 5.2357165 5.5846631          5
## 431 431 4.9859486 4.9714519          5
## 432 432 4.8568890 4.9956828          5
## 433 433 4.6360512 4.7197370          5
## 434 434 5.1394574 5.1915715          5
## 435 435 5.3449901 4.8060268          5
## 436 436 4.6981069 5.4857868          5
## 437 437 5.1869949 4.9236752          5
## 438 438 4.7888578 4.7330719          5
## 439 439 4.6198838 4.3638818          5
## 440 440 5.0116934 4.6114746          5
## 441 441 4.9839307 5.0914839          5
## 442 442 4.3115221 4.6426046          5
## 443 443 5.0603603 5.3649108          5
## 444 444 4.3935056 5.3382878          5
## 445 445 4.7919287 4.9022007          5
## 446 446 5.0069875 4.8662688          5
## 447 447 5.2848705 4.9361746          5
## 448 448 5.1318305 4.2595863          5
## 449 449 4.8061009 5.0554884          5
## 450 450 4.8059121 4.7641212          5
## 451 451 4.9462178 4.8988535          5
## 452 452 4.9971115 5.2482229          5
## 453 453 4.8560232 5.3053471          5
## 454 454 5.0850055 5.2744325          5
## 455 455 4.5219593 5.3878710          5
## 456 456 5.2696316 4.9126692          5
## 457 457 4.8642463 4.9429658          5
## 458 458 5.1449609 5.2164283          5
## 459 459 5.0937129 4.9856145          5
## 460 460 5.1040760 5.4830499          5
## 461 461 4.8166429 4.8559136          5
## 462 462 4.9763918 4.7703782          5
## 463 463 4.5589634 5.7778211          5
## 464 464 5.2792591 4.9234981          5
## 465 465 4.9917173 5.0636682          5
## 466 466 4.5066792 4.9043998          5
## 467 467 5.2588917 4.5656649          5
## 468 468 4.6186006 5.3081865          5
## 469 469 5.2702501 5.4359850          5
## 470 470 4.7221813 5.5162830          5
## 471 471 4.5440553 4.8790472          5
## 472 472 4.6623390 5.1527867          5
## 473 473 4.5438913 5.3159415          5
## 474 474 5.1648708 5.1808000          5
## 475 475 4.8522698 5.0159296          5
## 476 476 5.0374700 4.8647180          5
## 477 477 4.7017291 5.3097452          5
## 478 478 4.7472060 5.1970918          5
## 479 479 4.8637476 4.9439548          5
## 480 480 4.7405159 4.8950215          5
## 481 481 5.6405392 4.8614680          5
## 482 482 4.9037743 5.1033143          5
## 483 483 4.7239543 4.8959548          5
## 484 484 5.5427481 4.8978228          5
## 485 485 5.3157870 4.9245687          5
## 486 486 4.5964368 5.4851325          5
## 487 487 4.6640333 5.4468762          5
## 488 488 4.9004773 5.0444003          5
## 489 489 4.9120045 5.1658056          5
## 490 490 5.3743472 5.1546086          5
## 491 491 4.9536544 4.9044288          5
## 492 492 5.0237452 4.6587183          5
## 493 493 5.5639374 5.4494763          5
## 494 494 5.2886115 4.4582237          5
## 495 495 4.7506781 5.0871638          5
## 496 496 5.2716727 4.9269265          5
## 497 497 4.8795458 5.0449881          5
## 498 498 5.3628868 5.4361093          5
## 499 499 5.2961830 5.5908125          5
## 500 500 5.2156221 5.0953297          5

This is how our data looks like:

data %>% ggplot(aes(x = V1, y = V2, color = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "true cluster")

Now we can cluster it using kmeans++:

rownames(data) <- data$id
data_for_clust <- data %>% select(starts_with("V"))
km <- TGL_kmeans_tidy(data_for_clust,
    k = 5,
    metric = "euclid",
    verbose = TRUE
)
## will generate seeds
## generating seeds
## at seed 0
## add new core from 43 to 0
## at seed 1
## done update min distance
## seed range 350 450
## picked up 448 dist was 2.84921
## add new core from 448 to 1
## at seed 2
## done update min distance
## seed range 300 400
## picked up 222 dist was 1.3774
## add new core from 222 to 2
## at seed 3
## done update min distance
## seed range 250 350
## picked up 333 dist was 0.672893
## add new core from 333 to 3
## at seed 4
## done update min distance
## seed range 200 300
## picked up 108 dist was 0.571462
## add new core from 108 to 4
## reassign after init
## iter 0
## iter 1 changed 2
## iter 1
## iter 2 changed 0

The returned list contains 3 fields:

names(km)
## [1] "centers" "cluster" "size"

km$centers contains a tibble with clust column and the cluster centers:

km$centers
## # A tibble: 5 × 3
##   clust    V1    V2
##   <int> <dbl> <dbl>
## 1     1  2.03  1.98
## 2     2  4.01  4.04
## 3     3  3.04  3.04
## 4     4  1.00  1.06
## 5     5  4.98  5.02

clusters are numbered according to order_func (see ‘Custom cluster ordering’ section).

km$cluster contains tibble with id column with the observation id (1:n if no id column was supplied), and clust column with the observation assigned cluster:

km$cluster
## # A tibble: 500 × 2
##    id    clust
##    <chr> <int>
##  1 1         4
##  2 2         4
##  3 3         4
##  4 4         4
##  5 5         4
##  6 6         4
##  7 7         4
##  8 8         4
##  9 9         4
## 10 10        4
## # ℹ 490 more rows

km$size contains tibble with clust column and n column with the number of points in each cluster:

km$size
## # A tibble: 5 × 2
##   clust     n
##   <int> <int>
## 1     1   101
## 2     2   100
## 3     3    99
## 4     4   101
## 5     5    99

We can now check our clustering performance - fraction of observations that were classified correctly (Note that match_clusters function is internal to the package and is used only in this vignette):

d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.97

And plot the results:

d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "cluster") +
    scale_shape_discrete(name = "true cluster") +
    geom_point(data = km$centers, size = 7, color = "black", shape = "X")

Custom cluster ordering

By default, the clusters where ordered using the following function: hclust(dist(cor(t(centers)))) - hclust of the euclidean distance of the correlation matrix of the centers.

We can supply our own function to order the clusters using reorder_func argument. The function would be applied to each center and he clusters would be ordered by the result.

km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 5,
    metric = "euclid",
    verbose = FALSE,
    reorder_func = median
)
km$centers
## # A tibble: 5 × 4
##   clust    id    V1    V2
##   <int> <dbl> <dbl> <dbl>
## 1     1  51.5  1.04  1.06
## 2     2 153    2.02  2.02
## 3     3 254.   3.06  3.05
## 4     4 353    4.01  4.05
## 5     5 452.   4.98  5.02

Missing data

tglkmeans can deal with missing data, as long as at least one dimension is not missing. for example:

data$V1[sample(1:nrow(data), round(nrow(data) * 0.2))] <- NA
data
##      id        V1        V2 true_clust
## 1     1 1.0667265 1.2974904          1
## 2     2 1.5669030 1.0507419          1
## 3     3 0.9216493 0.9818881          1
## 4     4 1.1005807 1.0826089          1
## 5     5 1.3043060 0.7386772          1
## 6     6        NA 1.2199023          1
## 7     7 0.5640716 1.2009019          1
## 8     8 0.8026433 1.0143766          1
## 9     9        NA 1.7195164          1
## 10   10 0.5520354 1.0998368          1
## 11   11 1.1953140 1.3663256          1
## 12   12 1.5992284 1.3281665          1
## 13   13        NA 1.1698961          1
## 14   14 1.1234638 0.9912329          1
## 15   15 1.0126584 0.9744042          1
## 16   16 1.3119844 0.8811811          1
## 17   17 1.0552372 1.6257848          1
## 18   18        NA 0.9811239          1
## 19   19 0.9281041 1.1814423          1
## 20   20        NA 1.0918538          1
## 21   21        NA 0.8609941          1
## 22   22 1.6904613 1.4090263          1
## 23   23        NA 0.8359700          1
## 24   24        NA 1.3954873          1
## 25   25 1.0574108 0.8103830          1
## 26   26 1.0821083 1.3491419          1
## 27   27 1.2773750 1.0908505          1
## 28   28 0.9083436 0.9377400          1
## 29   29 1.4753087 1.1393456          1
## 30   30        NA 1.0678832          1
## 31   31 0.9971557 0.7110518          1
## 32   32 1.1286987 0.8751926          1
## 33   33 0.8602882 1.2884457          1
## 34   34 0.8720141 1.3431411          1
## 35   35 0.8564513 1.3415309          1
## 36   36        NA 1.2513594          1
## 37   37        NA 1.1915235          1
## 38   38 1.3170715 0.8314424          1
## 39   39 1.1620800 0.9975508          1
## 40   40 0.9644723 0.4914671          1
## 41   41 0.9637582 1.1634533          1
## 42   42 1.0621891 1.2628968          1
## 43   43 0.4116396 1.3365347          1
## 44   44 0.6080422 1.0226581          1
## 45   45        NA 1.1814792          1
## 46   46 1.4061822 1.1482983          1
## 47   47        NA 1.3122412          1
## 48   48 0.6009701 1.2520516          1
## 49   49 1.3759020 1.0235787          1
## 50   50 1.2488680 1.0132348          1
## 51   51        NA 0.8174224          1
## 52   52 1.2437039 1.1594962          1
## 53   53 0.5141690 1.0295818          1
## 54   54 0.6242043 0.3423182          1
## 55   55 0.8449093 1.0426204          1
## 56   56 1.1172540 0.9446881          1
## 57   57 0.8123217 0.9360699          1
## 58   58 1.0578361 0.6289752          1
## 59   59        NA 0.6354165          1
## 60   60 1.1025650 1.0735222          1
## 61   61 0.8449643 0.9240000          1
## 62   62 0.8131473 1.0000371          1
## 63   63 0.9731536 1.0191583          1
## 64   64 1.1998901 1.0817627          1
## 65   65 0.8762270 1.4395907          1
## 66   66 1.0997815 0.7537613          1
## 67   67 0.9370728 1.0533252          1
## 68   68 0.8842632 0.8134801          1
## 69   69 1.0250015 1.0604346          1
## 70   70 0.9508633 1.4304047          1
## 71   71 1.3018849 0.7042134          1
## 72   72        NA 0.9791680          1
## 73   73 1.4766626 0.8399630          1
## 74   74 0.7077205 1.1032085          1
## 75   75 0.9465240 0.7801880          1
## 76   76        NA 0.7269880          1
## 77   77 1.1768653 0.9117824          1
## 78   78 0.5618219 1.3393789          1
## 79   79 1.2671184 1.0621367          1
## 80   80 1.8189428 1.3351707          1
## 81   81 0.8799897 0.7404425          1
## 82   82 0.7193279 0.5906543          1
## 83   83        NA 0.9753877          1
## 84   84 0.9896182 0.9457459          1
## 85   85 1.1023038 0.9017862          1
## 86   86 0.8354561 1.2557587          1
## 87   87 1.3565034 0.6625389          1
## 88   88        NA 1.7051537          1
## 89   89 0.7806541 0.8768405          1
## 90   90 0.5749884 0.6220421          1
## 91   91 0.5285356 0.9881802          1
## 92   92 0.8048000 0.7652237          1
## 93   93 0.9155977 0.7346152          1
## 94   94 0.5170804 1.3764329          1
## 95   95 0.9132864 1.1759861          1
## 96   96 0.5753730 2.0344629          1
## 97   97 0.6270602 1.0425871          1
## 98   98 1.4039415 0.7822955          1
## 99   99 1.4699544 0.6457397          1
## 100 100 0.1966918 0.8655827          1
## 101 101 1.8650106 1.8228915          2
## 102 102        NA 2.0348018          2
## 103 103 2.0709071 1.5802315          2
## 104 104 1.7802285 2.1852078          2
## 105 105 2.1404162 1.4695159          2
## 106 106 2.4045219 1.8141060          2
## 107 107 2.2754578 1.6387158          2
## 108 108 1.8346851 1.7169400          2
## 109 109 2.2179101 2.1954281          2
## 110 110        NA 1.6535421          2
## 111 111 1.3661693 2.2240482          2
## 112 112        NA 2.3856441          2
## 113 113        NA 2.3305007          2
## 114 114 2.5519002 1.8605425          2
## 115 115        NA 2.1733376          2
## 116 116 2.1993959 1.7466690          2
## 117 117 1.6317558 1.3180457          2
## 118 118 1.8718640 2.0420516          2
## 119 119 1.7576361 1.8369556          2
## 120 120        NA 1.9536790          2
## 121 121 1.5452981 1.6430946          2
## 122 122 1.7374044 1.9629580          2
## 123 123 2.4174326 1.7617914          2
## 124 124 2.3580288 2.1205054          2
## 125 125 2.5894833 2.3024864          2
## 126 126        NA 2.0252050          2
## 127 127 2.3393553 1.9646018          2
## 128 128        NA 1.7799213          2
## 129 129 1.9375662 1.8307695          2
## 130 130 1.4850182 1.8154804          2
## 131 131 0.8317172 1.9615005          2
## 132 132 2.1698507 2.4288118          2
## 133 133 2.1180596 1.5451124          2
## 134 134 1.8866094 2.0823006          2
## 135 135 2.4306652 1.9360392          2
## 136 136 1.3980575 2.4825147          2
## 137 137 1.9360886 2.4161774          2
## 138 138 1.8972829 2.2865548          2
## 139 139 1.9729965 2.2800574          2
## 140 140 2.0121408 2.5490452          2
## 141 141        NA 2.3343004          2
## 142 142        NA 1.5755906          2
## 143 143 2.5065627 1.9425107          2
## 144 144 2.4178548 2.5581941          2
## 145 145 2.0303526 2.0380918          2
## 146 146 1.8127356 1.8608428          2
## 147 147 2.0976676 1.8445404          2
## 148 148 2.3682093 1.9945662          2
## 149 149 1.7638344 2.3282711          2
## 150 150        NA 2.3606737          2
## 151 151 2.0826567 2.5551857          2
## 152 152        NA 1.4973861          2
## 153 153 2.4660017 2.2656568          2
## 154 154 2.5845215 1.7484647          2
## 155 155 1.7014977 1.9251035          2
## 156 156 2.3088995 2.1917565          2
## 157 157 2.4198413 2.1507954          2
## 158 158 2.0380028 1.8934676          2
## 159 159 2.0486454 1.8723494          2
## 160 160 2.1961128 1.8303259          2
## 161 161 1.7449725 1.7456562          2
## 162 162        NA 1.7480916          2
## 163 163        NA 2.0768556          2
## 164 164 2.0724397 2.5699561          2
## 165 165 1.5544595 2.0827752          2
## 166 166 1.6571505 1.5651161          2
## 167 167        NA 1.7485831          2
## 168 168 2.1997976 1.9045547          2
## 169 169 1.8012821 2.2486367          2
## 170 170 1.9516914 1.7959914          2
## 171 171 1.5812671 1.3933264          2
## 172 172 2.2452198 1.7497464          2
## 173 173 2.3633530 1.6304779          2
## 174 174 1.8123147 1.4706795          2
## 175 175 1.8616742 2.0855991          2
## 176 176 1.7058378 1.8463885          2
## 177 177 1.3846781 2.1272488          2
## 178 178 2.2309258 2.5877900          2
## 179 179 1.7186580 1.7562195          2
## 180 180 1.8789516 2.5313413          2
## 181 181 1.4183225 2.2602353          2
## 182 182 0.9804335 1.8343316          2
## 183 183        NA 1.9269470          2
## 184 184 2.2857103 1.6987145          2
## 185 185 2.1261888 1.7775692          2
## 186 186 2.2834598 2.2363101          2
## 187 187        NA 2.3363181          2
## 188 188 1.8296542 1.8167206          2
## 189 189        NA 1.8876026          2
## 190 190        NA 1.9528807          2
## 191 191 2.3011832 1.7582755          2
## 192 192 2.1881185 2.0034041          2
## 193 193 1.6343435 2.1465149          2
## 194 194 2.0790663 1.4291535          2
## 195 195 1.6770096 2.0365001          2
## 196 196 1.5854592 2.3145920          2
## 197 197 2.3392047 2.5347724          2
## 198 198 2.1520123 1.6718351          2
## 199 199 2.3546154 2.2068461          2
## 200 200 2.0965418 1.6386522          2
## 201 201 3.1582948 3.2817822          3
## 202 202 3.0324669 2.9516739          3
## 203 203 2.8183399 3.5035701          3
## 204 204 2.6817798 2.9180582          3
## 205 205        NA 3.0844466          3
## 206 206 2.8808048 2.8296571          3
## 207 207 3.2834376 3.0076797          3
## 208 208 3.2670560 2.3872379          3
## 209 209 2.9112772 3.2753139          3
## 210 210 3.5347984 2.9132633          3
## 211 211 2.9866980 3.5704456          3
## 212 212        NA 3.1103896          3
## 213 213        NA 3.5650306          3
## 214 214 2.8297386 3.0184972          3
## 215 215 3.0064852 2.3909167          3
## 216 216 3.2958027 3.0497020          3
## 217 217 3.2940603 3.0182022          3
## 218 218 2.9120480 2.8546845          3
## 219 219 2.9842337 3.3483988          3
## 220 220        NA 3.5677951          3
## 221 221 2.6410030 2.8457986          3
## 222 222 2.9100960 3.4960909          3
## 223 223 3.1094851 2.8974510          3
## 224 224 3.2596545 3.0012766          3
## 225 225        NA 3.2961545          3
## 226 226 3.0616052 2.9854865          3
## 227 227 3.1084309 3.4781697          3
## 228 228 3.1889907 3.4533466          3
## 229 229 2.8498632 3.1324070          3
## 230 230 3.0476210 3.3198091          3
## 231 231 3.1947123 2.7340885          3
## 232 232 3.2642629 2.7866811          3
## 233 233 3.2633253 2.2783362          3
## 234 234 2.8702660 2.7371004          3
## 235 235 3.0800114 2.6517472          3
## 236 236 2.8861520 3.4423209          3
## 237 237 3.0677828 3.3158418          3
## 238 238        NA 2.2638015          3
## 239 239        NA 2.5792482          3
## 240 240 3.1628658 2.7525779          3
## 241 241 3.2192984 2.5263849          3
## 242 242 3.1183246 2.7606926          3
## 243 243        NA 3.4337573          3
## 244 244 3.0452794 3.2743320          3
## 245 245        NA 2.8469756          3
## 246 246 3.3232539 2.6161428          3
## 247 247        NA 2.8852779          3
## 248 248 3.1590963 2.9107673          3
## 249 249 2.5175207 3.1755530          3
## 250 250        NA 3.3371424          3
## 251 251 3.2223458 2.9656005          3
## 252 252        NA 2.7809807          3
## 253 253 3.0316803 2.9058037          3
## 254 254 2.4238337 3.2605995          3
## 255 255 2.6941865 3.0278952          3
## 256 256 2.5748253 2.7013106          3
## 257 257 3.2986795 3.1174259          3
## 258 258 2.6425800 2.1964654          3
## 259 259 2.5709755 3.0196524          3
## 260 260        NA 3.1667301          3
## 261 261 2.9971207 3.3002295          3
## 262 262 3.4621043 2.9682704          3
## 263 263 2.9321147 2.5971883          3
## 264 264 3.0409185 3.4695090          3
## 265 265 3.0373145 2.4586874          3
## 266 266 3.1736631 2.6361699          3
## 267 267        NA 3.1018170          3
## 268 268 3.0759845 2.9310834          3
## 269 269        NA 3.2172804          3
## 270 270 2.9326255 3.6379068          3
## 271 271 3.5626431 3.4774382          3
## 272 272        NA 3.0392838          3
## 273 273 3.3281075 2.6704701          3
## 274 274 2.9400647 3.5905523          3
## 275 275 2.5671612 2.9392090          3
## 276 276 3.0305182 2.9148003          3
## 277 277 3.0971165 3.2032150          3
## 278 278 3.0239877 2.9992575          3
## 279 279 3.1436436 3.3177178          3
## 280 280 2.7237814 2.7846960          3
## 281 281 2.6488493 2.9403778          3
## 282 282 3.5028389 2.7374304          3
## 283 283 2.9100123 2.9941576          3
## 284 284 2.6551342 3.1751063          3
## 285 285 2.4368569 3.3106858          3
## 286 286        NA 3.0023797          3
## 287 287 2.5251198 3.1648635          3
## 288 288 3.4102319 2.6574130          3
## 289 289 3.1919072 2.8157314          3
## 290 290 3.4064629 3.4305026          3
## 291 291 3.0079670 3.6826106          3
## 292 292 3.7393616 3.1616663          3
## 293 293        NA 3.3348883          3
## 294 294 3.1957236 2.8465667          3
## 295 295 3.0556766 3.2829843          3
## 296 296        NA 2.6645142          3
## 297 297 2.6417746 3.0416318          3
## 298 298 3.2317155 3.1088961          3
## 299 299 3.1938757 2.7928641          3
## 300 300 2.2231145 3.1224288          3
## 301 301 4.5359765 3.7180034          4
## 302 302 3.9555011 4.3461914          4
## 303 303 3.9635956 3.8936239          4
## 304 304 4.0641252 4.0590676          4
## 305 305 3.8826908 3.7890478          4
## 306 306 3.5947894 4.2408776          4
## 307 307 3.8539515 4.1816583          4
## 308 308 3.9953827 4.0160001          4
## 309 309 4.3505713 4.0675379          4
## 310 310 4.1699464 3.9732064          4
## 311 311 3.4242432 3.6522741          4
## 312 312        NA 4.3059641          4
## 313 313 4.2805916 4.5032701          4
## 314 314 3.7358389 4.4479817          4
## 315 315 4.2973374 4.2602548          4
## 316 316 4.0496482 4.6119820          4
## 317 317 4.2761301 4.7507636          4
## 318 318 3.8912117 4.1998976          4
## 319 319 4.1961193 4.1457364          4
## 320 320 3.7188606 3.7647725          4
## 321 321 4.2077306 4.5999424          4
## 322 322 3.7416941 4.1484901          4
## 323 323 3.3222681 4.4997617          4
## 324 324 3.7543036 4.2716073          4
## 325 325 3.9527188 3.6590467          4
## 326 326 4.0381956 3.7186242          4
## 327 327 3.8806544 3.9070130          4
## 328 328        NA 3.9598205          4
## 329 329 4.0226557 3.6212487          4
## 330 330 4.1956919 4.5781945          4
## 331 331 3.8540425 3.9128319          4
## 332 332 4.0616431 4.0036028          4
## 333 333        NA 4.0205777          4
## 334 334 4.1301273 3.8929814          4
## 335 335 3.9303327 3.6822656          4
## 336 336        NA 4.6689760          4
## 337 337 3.8498979 4.3259294          4
## 338 338        NA 4.2190044          4
## 339 339 3.5996916 4.0785205          4
## 340 340 4.2211286 3.6889119          4
## 341 341        NA 3.5863260          4
## 342 342 3.9374437 4.3484828          4
## 343 343        NA 3.7832389          4
## 344 344 3.8583607 3.3736348          4
## 345 345 4.2358756 4.2060441          4
## 346 346 4.4409623 3.7583155          4
## 347 347        NA 3.7366986          4
## 348 348 4.4116127 3.5505830          4
## 349 349 4.2196721 4.0230395          4
## 350 350 4.2743564 4.3818073          4
## 351 351 4.0834435 4.1319625          4
## 352 352 3.8766559 4.5851336          4
## 353 353        NA 3.7675707          4
## 354 354        NA 4.5938085          4
## 355 355 3.5415940 3.6509124          4
## 356 356 3.4175004 4.1507475          4
## 357 357 4.8080342 3.5950536          4
## 358 358 4.2245045 4.0363771          4
## 359 359        NA 3.9898450          4
## 360 360 3.5380312 4.4715776          4
## 361 361        NA 4.0223226          4
## 362 362 4.3070604 4.0731841          4
## 363 363        NA 4.2642174          4
## 364 364 3.5943743 4.3678552          4
## 365 365 4.3518757 4.3297973          4
## 366 366 4.0038028 4.6589989          4
## 367 367 3.4916286 3.9650951          4
## 368 368 3.7401950 3.7304495          4
## 369 369        NA 4.4083939          4
## 370 370 4.1282396 3.7711440          4
## 371 371 4.2755948 3.5237243          4
## 372 372 4.2628353 4.5689097          4
## 373 373        NA 3.4827091          4
## 374 374 4.3043997 4.0827318          4
## 375 375 4.0068700 3.9588135          4
## 376 376 4.0579176 3.3274770          4
## 377 377 3.7807398 3.6628580          4
## 378 378        NA 4.0639392          4
## 379 379 3.8853819 3.9131507          4
## 380 380 4.4660029 3.7494534          4
## 381 381        NA 3.9755479          4
## 382 382 3.9050029 3.5893448          4
## 383 383 3.4540026 3.1477135          4
## 384 384        NA 3.7478090          4
## 385 385 3.9682711 4.1421898          4
## 386 386 4.2559583 4.5019210          4
## 387 387 4.0861906 3.7950035          4
## 388 388 3.9331711 3.9469074          4
## 389 389 3.7591473 4.1226238          4
## 390 390 3.7572780 3.8618581          4
## 391 391 4.0981217 3.8540294          4
## 392 392        NA 4.1916993          4
## 393 393        NA 4.1275205          4
## 394 394 3.7137744 4.2314257          4
## 395 395 3.5422981 3.7211093          4
## 396 396 3.2119115 3.6943840          4
## 397 397        NA 4.3221165          4
## 398 398 3.4408164 3.9808492          4
## 399 399 3.6198537 4.1570393          4
## 400 400 4.0005277 4.2579742          4
## 401 401 4.9629049 4.2422309          5
## 402 402        NA 5.0070322          5
## 403 403        NA 4.9495746          5
## 404 404        NA 4.3193401          5
## 405 405 4.7447964 5.0256347          5
## 406 406 5.2756551 4.9227218          5
## 407 407        NA 4.7658744          5
## 408 408 4.9456442 4.6766726          5
## 409 409 5.0101160 4.7870993          5
## 410 410        NA 5.0376128          5
## 411 411 4.8620394 5.2395791          5
## 412 412 5.1552361 4.3784459          5
## 413 413        NA 5.1605822          5
## 414 414 5.0123625 4.8462883          5
## 415 415 4.9399809 4.6627123          5
## 416 416        NA 5.0816301          5
## 417 417        NA 4.9361446          5
## 418 418 4.9609575 4.9913760          5
## 419 419 4.7977331 4.9085418          5
## 420 420 4.9352900 5.1609235          5
## 421 421        NA 5.5309027          5
## 422 422 4.7023587 5.0047688          5
## 423 423 5.6711915 5.0633338          5
## 424 424 5.2414829 4.4815822          5
## 425 425        NA 4.6373326          5
## 426 426 5.3863081 5.1365235          5
## 427 427 5.2947810 4.7475621          5
## 428 428 5.3187440 5.4816437          5
## 429 429 4.8578147 4.9280052          5
## 430 430 5.2357165 5.5846631          5
## 431 431 4.9859486 4.9714519          5
## 432 432 4.8568890 4.9956828          5
## 433 433 4.6360512 4.7197370          5
## 434 434 5.1394574 5.1915715          5
## 435 435        NA 4.8060268          5
## 436 436 4.6981069 5.4857868          5
## 437 437 5.1869949 4.9236752          5
## 438 438 4.7888578 4.7330719          5
## 439 439 4.6198838 4.3638818          5
## 440 440 5.0116934 4.6114746          5
## 441 441        NA 5.0914839          5
## 442 442        NA 4.6426046          5
## 443 443 5.0603603 5.3649108          5
## 444 444 4.3935056 5.3382878          5
## 445 445 4.7919287 4.9022007          5
## 446 446 5.0069875 4.8662688          5
## 447 447 5.2848705 4.9361746          5
## 448 448 5.1318305 4.2595863          5
## 449 449 4.8061009 5.0554884          5
## 450 450 4.8059121 4.7641212          5
## 451 451 4.9462178 4.8988535          5
## 452 452 4.9971115 5.2482229          5
## 453 453 4.8560232 5.3053471          5
## 454 454 5.0850055 5.2744325          5
## 455 455 4.5219593 5.3878710          5
## 456 456 5.2696316 4.9126692          5
## 457 457        NA 4.9429658          5
## 458 458 5.1449609 5.2164283          5
## 459 459 5.0937129 4.9856145          5
## 460 460 5.1040760 5.4830499          5
## 461 461 4.8166429 4.8559136          5
## 462 462        NA 4.7703782          5
## 463 463        NA 5.7778211          5
## 464 464 5.2792591 4.9234981          5
## 465 465        NA 5.0636682          5
## 466 466 4.5066792 4.9043998          5
## 467 467 5.2588917 4.5656649          5
## 468 468 4.6186006 5.3081865          5
## 469 469 5.2702501 5.4359850          5
## 470 470 4.7221813 5.5162830          5
## 471 471 4.5440553 4.8790472          5
## 472 472 4.6623390 5.1527867          5
## 473 473 4.5438913 5.3159415          5
## 474 474        NA 5.1808000          5
## 475 475 4.8522698 5.0159296          5
## 476 476 5.0374700 4.8647180          5
## 477 477 4.7017291 5.3097452          5
## 478 478 4.7472060 5.1970918          5
## 479 479 4.8637476 4.9439548          5
## 480 480 4.7405159 4.8950215          5
## 481 481 5.6405392 4.8614680          5
## 482 482        NA 5.1033143          5
## 483 483 4.7239543 4.8959548          5
## 484 484        NA 4.8978228          5
## 485 485 5.3157870 4.9245687          5
## 486 486 4.5964368 5.4851325          5
## 487 487 4.6640333 5.4468762          5
## 488 488 4.9004773 5.0444003          5
## 489 489 4.9120045 5.1658056          5
## 490 490        NA 5.1546086          5
## 491 491 4.9536544 4.9044288          5
## 492 492 5.0237452 4.6587183          5
## 493 493 5.5639374 5.4494763          5
## 494 494        NA 4.4582237          5
## 495 495 4.7506781 5.0871638          5
## 496 496 5.2716727 4.9269265          5
## 497 497 4.8795458 5.0449881          5
## 498 498 5.3628868 5.4361093          5
## 499 499 5.2961830 5.5908125          5
## 500 500 5.2156221 5.0953297          5
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 5,
    metric = "euclid",
    verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.98

and plotting the results (without the NA’s) we get:

d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "cluster") +
    scale_shape_discrete(name = "true cluster") +
    geom_point(data = km$centers, size = 7, color = "black", shape = "X")
## Warning: Removed 100 rows containing missing values or values outside the scale range
## (`geom_point()`).

High dimensions

Let’s move to higher dimensions (and higher noise):

data <- simulate_data(n = 100, sd = 0.3, nclust = 30, dims = 300)
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE,
    id_column = TRUE
)

Note that here we supplied id_column = TRUE to indicate that the first column is the id column.

d <- tglkmeans:::match_clusters(data, km, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 1

Comparison with R vanilla kmeans

Let’s compare it to R vanilla kmeans:

km_standard <- kmeans(data %>% select(starts_with("V")), 30)
km_standard$clust <- tibble(id = 1:nrow(data), clust = km_standard$cluster)

d <- tglkmeans:::match_clusters(data, km_standard, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.7142857

We can see that kmeans++ clusters significantly better than R vanilla kmeans.

Random seed

we can set the seed for reproducible results:

km1 <- TGL_kmeans_tidy(data %>% select(starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE,
    seed = 60427
)
km2 <- TGL_kmeans_tidy(data %>% select(starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE,
    seed = 60427
)
all(km1$centers[, -1] == km2$centers[, -1])
## [1] TRUE