网站建设的源代码,网站建设公司的业务范围,公司网页设计文案,动漫做3d游戏下载网站聚类 聚类1 解决什么问题KMean聚类Kmedoids聚类2 java实现计算二维点的聚类案例KMean实现输出 K-medoids实现输出 聚类
1 解决什么问题 假设二维坐标轴上有一些点#xff0c;现在让你把这些点分个类。于是对我们来说#xff0c;这个分类似乎就是把距离相近的点画到一类中去。… 聚类 聚类1 解决什么问题KMean聚类Kmedoids聚类2 java实现计算二维点的聚类案例KMean实现输出 K-medoids实现输出 聚类
1 解决什么问题 假设二维坐标轴上有一些点现在让你把这些点分个类。于是对我们来说这个分类似乎就是把距离相近的点画到一类中去。 KMean聚类
假设要划分N类坐标点M个从M个坐标点随机选取N个点作为每个分类的中心点这N个点的列表记录为centerPointList遍历M个坐标点中的每个点 计算当前点和N个中心点的距离dis1、dis2 ... disN从dis1、dis2 ... disN找到最小的距离的下标。下标记录为cluster那么这个cluster就是这次遍历时候当前点归属的分类。 步骤3结束后每个点都会归属到某个分类。计算每个分类中点集合的均值把这个均值作为新的中心点替换掉centerPointList。重复3、4直到重复次数大于约定次数或者中心点变化较小。此时就可以知道每个点归属的分类。
Kmedoids聚类
假设要划分N类坐标点M个从M个坐标点随机选取N个点作为每个分类的中心点这N个点的列表记录为centerPointList遍历M个坐标点中的每个点 计算当前点和N个中心点的距离dis1、dis2 ... disN从dis1、dis2 ... disN找到最小的距离的下标。下标记录为cluster那么这个cluster就是这次遍历时候当前点归属的分类。 步骤3结束后每个点都会归属到某个分类。计算每个分类中每个点作为中心点时其他点到该中心点的距离和选择距离和最小时对应的中心点 作为当前分类的中心点替换掉centerPointList。重复3、4直到重复次数大于约定次数或者中心点变化较小。此时就可以知道每个点归属的分类。
2 java实现计算二维点的聚类案例
KMean实现
package com.forezp.kmean;import com.google.common.collect.Lists;
import com.google.common.collect.Maps;import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;/*** author yuegang*/
public class KMeanCluster {/*** 表示二维空间中的点*/public static class Point {Integer x 0;Integer y 0;public Point() {}public Point(Integer x, Integer y) {this.x x;this.y y;}public void incX(Integer x) {this.x x;}public void incY(int y) {this.y y;}public Integer getX() {return x;}public void setX(Integer x) {this.x x;}public Integer getY() {return y;}public void setY(Integer y) {this.y y;}Overridepublic String toString() {return ( x , y );}}/*** 表示二维空间中的点* 下标是点的顺序*/private final ListPoint pointIndexDataMap;private final ListListPoint centerPointList Lists.newArrayList(); // 记录每一个分类的中心点private final ListInteger pointClusterMap Lists.newArrayList(); // 点所属的分类private int index 0; // 计算次数private int clusterCount 0; // 分类个数public KMeanCluster(ListPoint pointIndexDataMap, int clusterCount) {this.pointIndexDataMap pointIndexDataMap;this.clusterCount clusterCount;index 0;initCenterPoint();initCluster(pointIndexDataMap);}private void initCluster(ListPoint pointIndexDataMap) {// 初始化每个点的分类设置一个没有意义的值for (int j 0; j pointIndexDataMap.size(); j) {pointClusterMap.add(-1);}}private void initCenterPoint() {ListPoint objects Lists.newArrayListWithExpectedSize(clusterCount);ListInteger yList Lists.newArrayListWithExpectedSize(clusterCount);Random random new Random();for (int i 0; i clusterCount; i) { // 注意这个不能相同int i1 random.nextInt(pointIndexDataMap.size());while (yList.contains(i1)) {i1 random.nextInt(pointIndexDataMap.size());}yList.add(i1);}for (int i 0; i clusterCount; i) {objects.add(pointIndexDataMap.get(yList.get(i)));}centerPointList.add(objects);}public void calc() {ListPoint pointIndices centerPointList.get(index);for (int i 0; i pointIndexDataMap.size(); i) {Point point pointIndexDataMap.get(i);// 计算该点和那个簇最近把把归属到这个簇中。int cluster 0;double min Double.MAX_VALUE;for (int inc 0; inc pointIndices.size(); inc) {Point point1 pointIndices.get(inc);Integer x point.getX();Integer y point.getY();Integer x1 point1.getX();Integer y1 point1.getY();int i1 x - x1;int i2 y - y1;int total i1 * i1 i2 * i2;double sqrt Math.sqrt(total);if (sqrt min) {min sqrt;cluster inc;}}pointClusterMap.set(i, cluster);}// 计算每个族的中心点int size centerPointList.get(0).size();MapInteger, Point map Maps.newTreeMap();MapInteger, Integer cluterCount Maps.newHashMapWithExpectedSize(size);for (int i 0; i pointClusterMap.size(); i) {int cluster pointClusterMap.get(i);Point point map.computeIfAbsent(cluster, sss - new Point());cluterCount.put(cluster, cluterCount.getOrDefault(cluster, 0) 1);Point point1 pointIndexDataMap.get(i);point.incX(point1.getX());point.incY(point1.getY());}for (Map.EntryInteger, Point integerPointEntry : map.entrySet()) {Integer key integerPointEntry.getKey();Point point integerPointEntry.getValue();Integer integer cluterCount.get(key);point.setX(point.getX() / integer);point.setY(point.getY() / integer);}index;MapInteger, ListPoint curClassfiyMap Maps.newTreeMap();for (int i 0; i pointClusterMap.size(); i) {Point point pointIndexDataMap.get(i);Integer classfly pointClusterMap.get(i);ListPoint points curClassfiyMap.computeIfAbsent(classfly, k - Lists.newArrayList());points.add(point);}ListPoint curCenterPointList new ArrayList(map.values());centerPointList.add(curCenterPointList);show(curClassfiyMap, curCenterPointList);}private void show(MapInteger, ListPoint curClassfiyMap, ListPoint curCenterPointList) {System.out.println(计算次数 index);System.out.println(当前分类 curClassfiyMap);System.out.println(当前中心点 curCenterPointList);}public static void main(String[] args) {Point point new Point(100, 100);Point point1 new Point(1, 1);Point point2 new Point(110, 120);Point point3 new Point(10, 20);Point point4 new Point(130, 160);ListPoint pointIndexDataMap Lists.newArrayList(point, point1, point2, point3, point4);KMeanCluster oneCalc new KMeanCluster(pointIndexDataMap, 2);for (int i 0; i 2; i) {oneCalc.calc();}}
}
输出
计算次数1
当前分类{0[(110, 120), (130, 160)], 1[(100, 100), (1, 1), (10, 20)]}
当前中心点[(120, 140), (37, 40)]
计算次数2
当前分类{0[(100, 100), (110, 120), (130, 160)], 1[(1, 1), (10, 20)]}
当前中心点[(113, 126), (5, 10)]K-medoids实现
package com.forezp.kmean;/*** 表示二维空间中的点*/
public class Point {Integer x 0;Integer y 0;public long dis(Point p) {int i getX() - p.getX();int i1 getY() - p.getY();return ((long) i * i) ((long) i1 * i1);}public Point() {}public Point(Integer x, Integer y) {this.x x;this.y y;}public void incX(Integer x) {this.x x;}public void incY(int y) {this.y y;}public Integer getX() {return x;}public void setX(Integer x) {this.x x;}public Integer getY() {return y;}public void setY(Integer y) {this.y y;}Overridepublic String toString() {return ( x , y );}
}
package com.forezp.kmean;import com.google.common.collect.Lists;
import com.google.common.collect.Maps;import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.stream.Collectors;
import java.util.stream.IntStream;/*** author yuegang*/
public class KModiedCluster {/*** 表示二维空间中的点* 下标是点的顺序*/private final ListPoint pointIndexDataMap;private final ListListInteger centerPointList Lists.newArrayList(); // 记录每一个分类的中心点下标private final ListInteger pointClusterMap Lists.newArrayList(); // 点所属的分类private final ListListLong distanceMap Lists.newArrayList();private int index 0; // 计算次数private int clusterCount 0; // 分类个数public KModiedCluster(ListPoint pointIndexDataMap, int clusterCount) {this.pointIndexDataMap pointIndexDataMap;this.clusterCount clusterCount;index 0;initCenterPoint();initCluster(pointIndexDataMap);initDistanceMap();System.out.println(点集合: pointIndexDataMap);System.out.println(初始点中心: centerPointList.get(index).stream().map(pointIndexDataMap::get).collect(Collectors.toList()));}private void initDistanceMap() {int size pointIndexDataMap.size();for (int i 0; i size; i) {ListLong collect IntStream.range(0, size).boxed().map(e - 0L).collect(Collectors.toList());distanceMap.add(collect);}for (int i 0; i size; i) {for (int j i; j size; j) {long dis pointIndexDataMap.get(i).dis(pointIndexDataMap.get(j));distanceMap.get(i).set(j, dis);distanceMap.get(j).set(i, dis);}}}private void initCluster(ListPoint pointIndexDataMap) {// 初始化每个点的分类设置一个没有意义的值for (int j 0; j pointIndexDataMap.size(); j) {pointClusterMap.add(-1);}}private void initCenterPoint() {ListInteger yList Lists.newArrayListWithExpectedSize(clusterCount);Random random new Random();for (int i 0; i clusterCount; i) { // 注意这个不能相同int i1 random.nextInt(pointIndexDataMap.size());while (yList.contains(i1)) {i1 random.nextInt(pointIndexDataMap.size());}yList.add(i1);}centerPointList.add(yList);}public void calc() {ListInteger pointIndices centerPointList.get(index);for (int i 0; i pointIndexDataMap.size(); i) {// 计算该点和那个簇最近把把归属到这个簇中。int cluster 0;double min Double.MAX_VALUE;for (int inc 0; inc pointIndices.size(); inc) {Long dis distanceMap.get(i).get(inc);double sqrt Math.sqrt(dis);if (sqrt min) {min sqrt;cluster inc;}}pointClusterMap.set(i, cluster);}// 计算每个族的中心点MapInteger, ListInteger indexMap Maps.newTreeMap(); // 每个分类中的下标集合for (int i 0; i pointClusterMap.size(); i) {Integer cluster pointClusterMap.get(i);ListInteger integers indexMap.computeIfAbsent(cluster, k - Lists.newArrayList());integers.add(i);}MapInteger, Integer map Maps.newTreeMap();for (Map.EntryInteger, ListInteger integerListEntry : indexMap.entrySet()) {Integer cluster integerListEntry.getKey();ListInteger indexList integerListEntry.getValue();// 计算每个点是否可以作为中心点int newCluster indexList.get(0);long sumDisHistory Long.MAX_VALUE;for (int i 0; i indexList.size(); i) {long sumDis 0;for (int j 0; j indexList.size(); j) {if (i j) {continue;}sumDis distanceMap.get(j).get(i);}if (sumDis sumDisHistory) {newCluster indexList.get(i);sumDisHistory sumDis;}}map.put(cluster, newCluster); // 当前族的新的中心点}MapInteger, ListPoint curClassfiyMap getIntegerListMap();ListInteger curCenterPointList new ArrayList(map.values());centerPointList.add(curCenterPointList);index;show(curClassfiyMap, curCenterPointList);}private MapInteger, ListPoint getIntegerListMap() {MapInteger, ListPoint curClassfiyMap Maps.newTreeMap();for (int i 0; i pointClusterMap.size(); i) {Point point pointIndexDataMap.get(i);Integer classfly pointClusterMap.get(i);ListPoint points curClassfiyMap.computeIfAbsent(classfly, k - Lists.newArrayList());points.add(point);}return curClassfiyMap;}private void show(MapInteger, ListPoint curClassfiyMap, ListInteger curCenterPointList) {System.out.println(计算次数 index);System.out.println(当前分类 curClassfiyMap);System.out.println(当前中心点 curCenterPointList.stream().map(pointIndexDataMap::get).collect(Collectors.toList()));}public static void main(String[] args) {Point point new Point(100, 100);Point point1 new Point(1, 1);Point point2 new Point(110, 120);Point point3 new Point(10, 20);Point point4 new Point(130, 160);ListPoint pointIndexDataMap Lists.newArrayList(point, point1, point2, point3, point4,new Point(100, 160),new Point(9, 160),new Point(50, 20));KModiedCluster oneCalc new KModiedCluster(pointIndexDataMap, 4);for (int i 0; i 2; i) {oneCalc.calc();}}
}
输出
点集合: [(100, 100), (1, 1), (110, 120), (10, 20), (130, 160), (100, 160), (9, 160), (50, 20)]
初始点中心: [(100, 100), (130, 160), (100, 160), (50, 20)]
计算次数1
当前分类{0[(100, 100)], 1[(1, 1)], 2[(110, 120), (130, 160), (100, 160), (9, 160)], 3[(10, 20), (50, 20)]}
当前中心点[(100, 100), (1, 1), (110, 120), (10, 20)]
计算次数2
当前分类{0[(100, 100)], 1[(1, 1)], 2[(110, 120), (130, 160), (100, 160), (9, 160)], 3[(10, 20), (50, 20)]}
当前中心点[(100, 100), (1, 1), (110, 120), (10, 20)]