KMeans 算法

KMeans最主要是思想是

1.播撒K个标识点,K是你要分成几类。

2.网络中所有点对这K个点计算距离,并归入最近的一个标识点中。这样就得到K个类

3.重新计算这K个类的中心距离。

4.是否要更新K个类的中心距离,是否迭代次数为0,如果要更新标识点的位置,并且迭代次数不为0,更新标识点,并跳转回2.

下面是为FPT
写的KMeans算法。这个FPT分裂时只要分成两类,所以就是K=2.单纯的KMeans没有像我写的这样复杂。我这里的距离是计算的两个位数组中的差异位数。

// only divide two parts void KMeans2(FPN * current,FPN * &left, FPN * &right,
const Record& record) { // prepare int k = 2; int count = current->count+1;
KMeansNode<N,order,KHash> nodes = new KMeansNode<N,order,KHash>[count]; int
classLabel = new int[count]; int* totalDistance = new int[count];
memset(totalDistance,0,countsizeof(int));
memset(classLabel,-1,count
sizeof(int)); for(int i = 0; i < current->count;
i++) { nodes[i].Set(current->bfilters[i],current->paths[i],current->branches[i
],current->support[i]); } nodes[count - 1].Set(record,NULL); // train
BloomFilter* centers = new BloomFilter[k]; int* cdistance = new int[k];
// the distance between the center and each node
memset(cdistance,0,k*sizeof(int)); for(int i =0 ; i < k; i++) {
centers[i].Set(rand()%(int)pow(2.0,(double)N-1)); } for(int x = 0; x < 500;
x++) { // iterator for(int i = 0; i < count; i++) { // for each node to
compute the distance with centers int min = N+1; for(int j = 0; j < k; j++) {
int distance = BloomFilter::Distance(nodes[i].record.bfilter,centers[j]);
if(distance < min) { min = distance; classLabel[i] = j; } } } // recompute the
centers // compute the distance between each node in the same class label
for(int i = 0; i < count; i++) { totalDistance[i] = 0; for(int j=0; j < count;
j++) { if(i != j && classLabel[i] == classLabel[j]) { // the same class but
not itself totalDistance[i] +=
BloomFilter::Distance(nodes[i].record.bfilter,nodes[j].record.bfilter); } }
} // compute the distances between the center and each node for(int i = 0; i <
count; i++) { cdistance[classLabel[i]]+=BloomFilter::Distance(centers[class
Label[i]],nodes[i].record.bfilter); } int flag = false; for(int i = 0; i <
count; i++) { if(cdistance[classLabel[i]] > totalDistance[i]) {
centers[i].Set(nodes[i].record.bfilter); flag = true; cdistance[classLabel[i]]
= totalDistance[i]; } } // no update the center then break the loop if(!flag)
{ break; } } right = new FPN(); left = new FPN(); for(int i = 0; i < count;
i++) { if(classLabel[i] == 0) { // insert left
left->Insert(nodes[i].record,nodes[i].branch); } else { // insert right
right->Insert(nodes[i].record,nodes[i].branch); } } delete current; delete []
nodes; delete [] centers; delete [] totalDistance; delete [] classLabel;
delete [] cdistance; }