//直接初始化中心点及簇权重 defsetInitialCenters(centers: Array[Vector], weights: Array[Double]): this.type = { model = newStreamingKMeansModel(centers, weights) this } //随机初始化中心点以及簇权重 defsetRandomCenters(dim: Int, weight: Double, seed: Long = Utils.random.nextLong): this.type = { val random = newXORShiftRandom(seed) val centers = Array.fill(k)(Vectors.dense(Array.fill(dim)(random.nextGaussian()))) val weights = Array.fill(k)(weight) model = newStreamingKMeansModel(centers, weights) this }
val weightsWithIndex = clusterWeights.view.zipWithIndex //获取权重值最大的簇 val (maxWeight, largest) = weightsWithIndex.maxBy(_._1) //获取权重值最小的簇 val (minWeight, smallest) = weightsWithIndex.minBy(_._1) //判断权重最小的簇是否过小,如果过小,就将这两个簇重新划分为两个新的簇,权重为两者的均值 if (minWeight < 1e-8 * maxWeight) { logInfo(s"Cluster $smallest is dying. Split the largest cluster $largest into two.") val weight = (maxWeight + minWeight) / 2.0 clusterWeights(largest) = weight clusterWeights(smallest) = weight val largestClusterCenter = clusterCenters(largest) val smallestClusterCenter = clusterCenters(smallest) var j = 0 while (j < dim) { val x = largestClusterCenter(j) val p = 1e-14 * math.max(math.abs(x), 1.0) largestClusterCenter.toBreeze(j) = x + p smallestClusterCenter.toBreeze(j) = x - p j += 1 } }