source: src/main/java/agents/anac/y2019/harddealer/math3/ml/clustering/DBSCANClusterer.java

Last change on this file was 204, checked in by Katsuhide Fujita, 5 years ago

Fixed errors of ANAC2019 agents

  • Property svn:executable set to *
File size: 8.3 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17package agents.anac.y2019.harddealer.math3.ml.clustering;
18
19import java.util.ArrayList;
20import java.util.Collection;
21import java.util.HashMap;
22import java.util.HashSet;
23import java.util.List;
24import java.util.Map;
25import java.util.Set;
26
27import agents.anac.y2019.harddealer.math3.exception.NotPositiveException;
28import agents.anac.y2019.harddealer.math3.exception.NullArgumentException;
29import agents.anac.y2019.harddealer.math3.ml.distance.DistanceMeasure;
30import agents.anac.y2019.harddealer.math3.ml.distance.EuclideanDistance;
31import agents.anac.y2019.harddealer.math3.util.MathUtils;
32
33/**
34 * DBSCAN (density-based spatial clustering of applications with noise) algorithm.
35 * <p>
36 * The DBSCAN algorithm forms clusters based on the idea of density connectivity, i.e.
37 * a point p is density connected to another point q, if there exists a chain of
38 * points p<sub>i</sub>, with i = 1 .. n and p<sub>1</sub> = p and p<sub>n</sub> = q,
39 * such that each pair &lt;p<sub>i</sub>, p<sub>i+1</sub>&gt; is directly density-reachable.
40 * A point q is directly density-reachable from point p if it is in the &epsilon;-neighborhood
41 * of this point.
42 * <p>
43 * Any point that is not density-reachable from a formed cluster is treated as noise, and
44 * will thus not be present in the result.
45 * <p>
46 * The algorithm requires two parameters:
47 * <ul>
48 * <li>eps: the distance that defines the &epsilon;-neighborhood of a point
49 * <li>minPoints: the minimum number of density-connected points required to form a cluster
50 * </ul>
51 *
52 * @param <T> type of the points to cluster
53 * @see <a href="http://en.wikipedia.org/wiki/DBSCAN">DBSCAN (wikipedia)</a>
54 * @see <a href="http://www.dbs.ifi.lmu.de/Publikationen/Papers/KDD-96.final.frame.pdf">
55 * A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases with Noise</a>
56 * @since 3.2
57 */
58public class DBSCANClusterer<T extends Clusterable> extends Clusterer<T> {
59
60 /** Maximum radius of the neighborhood to be considered. */
61 private final double eps;
62
63 /** Minimum number of points needed for a cluster. */
64 private final int minPts;
65
66 /** Status of a point during the clustering process. */
67 private enum PointStatus {
68 /** The point has is considered to be noise. */
69 NOISE,
70 /** The point is already part of a cluster. */
71 PART_OF_CLUSTER
72 }
73
74 /**
75 * Creates a new instance of a DBSCANClusterer.
76 * <p>
77 * The euclidean distance will be used as default distance measure.
78 *
79 * @param eps maximum radius of the neighborhood to be considered
80 * @param minPts minimum number of points needed for a cluster
81 * @throws NotPositiveException if {@code eps < 0.0} or {@code minPts < 0}
82 */
83 public DBSCANClusterer(final double eps, final int minPts)
84 throws NotPositiveException {
85 this(eps, minPts, new EuclideanDistance());
86 }
87
88 /**
89 * Creates a new instance of a DBSCANClusterer.
90 *
91 * @param eps maximum radius of the neighborhood to be considered
92 * @param minPts minimum number of points needed for a cluster
93 * @param measure the distance measure to use
94 * @throws NotPositiveException if {@code eps < 0.0} or {@code minPts < 0}
95 */
96 public DBSCANClusterer(final double eps, final int minPts, final DistanceMeasure measure)
97 throws NotPositiveException {
98 super(measure);
99
100 if (eps < 0.0d) {
101 throw new NotPositiveException(eps);
102 }
103 if (minPts < 0) {
104 throw new NotPositiveException(minPts);
105 }
106 this.eps = eps;
107 this.minPts = minPts;
108 }
109
110 /**
111 * Returns the maximum radius of the neighborhood to be considered.
112 * @return maximum radius of the neighborhood
113 */
114 public double getEps() {
115 return eps;
116 }
117
118 /**
119 * Returns the minimum number of points needed for a cluster.
120 * @return minimum number of points needed for a cluster
121 */
122 public int getMinPts() {
123 return minPts;
124 }
125
126 /**
127 * Performs DBSCAN cluster analysis.
128 *
129 * @param points the points to cluster
130 * @return the list of clusters
131 * @throws NullArgumentException if the data points are null
132 */
133 @Override
134 public List<Cluster<T>> cluster(final Collection<T> points) throws NullArgumentException {
135
136 // sanity checks
137 MathUtils.checkNotNull(points);
138
139 final List<Cluster<T>> clusters = new ArrayList<Cluster<T>>();
140 final Map<Clusterable, PointStatus> visited = new HashMap<Clusterable, PointStatus>();
141
142 for (final T point : points) {
143 if (visited.get(point) != null) {
144 continue;
145 }
146 final List<T> neighbors = getNeighbors(point, points);
147 if (neighbors.size() >= minPts) {
148 // DBSCAN does not care about center points
149 final Cluster<T> cluster = new Cluster<T>();
150 clusters.add(expandCluster(cluster, point, neighbors, points, visited));
151 } else {
152 visited.put(point, PointStatus.NOISE);
153 }
154 }
155
156 return clusters;
157 }
158
159 /**
160 * Expands the cluster to include density-reachable items.
161 *
162 * @param cluster Cluster to expand
163 * @param point Point to add to cluster
164 * @param neighbors List of neighbors
165 * @param points the data set
166 * @param visited the set of already visited points
167 * @return the expanded cluster
168 */
169 private Cluster<T> expandCluster(final Cluster<T> cluster,
170 final T point,
171 final List<T> neighbors,
172 final Collection<T> points,
173 final Map<Clusterable, PointStatus> visited) {
174 cluster.addPoint(point);
175 visited.put(point, PointStatus.PART_OF_CLUSTER);
176
177 List<T> seeds = new ArrayList<T>(neighbors);
178 int index = 0;
179 while (index < seeds.size()) {
180 final T current = seeds.get(index);
181 PointStatus pStatus = visited.get(current);
182 // only check non-visited points
183 if (pStatus == null) {
184 final List<T> currentNeighbors = getNeighbors(current, points);
185 if (currentNeighbors.size() >= minPts) {
186 seeds = merge(seeds, currentNeighbors);
187 }
188 }
189
190 if (pStatus != PointStatus.PART_OF_CLUSTER) {
191 visited.put(current, PointStatus.PART_OF_CLUSTER);
192 cluster.addPoint(current);
193 }
194
195 index++;
196 }
197 return cluster;
198 }
199
200 /**
201 * Returns a list of density-reachable neighbors of a {@code point}.
202 *
203 * @param point the point to look for
204 * @param points possible neighbors
205 * @return the List of neighbors
206 */
207 private List<T> getNeighbors(final T point, final Collection<T> points) {
208 final List<T> neighbors = new ArrayList<T>();
209 for (final T neighbor : points) {
210 if (point != neighbor && distance(neighbor, point) <= eps) {
211 neighbors.add(neighbor);
212 }
213 }
214 return neighbors;
215 }
216
217 /**
218 * Merges two lists together.
219 *
220 * @param one first list
221 * @param two second list
222 * @return merged lists
223 */
224 private List<T> merge(final List<T> one, final List<T> two) {
225 final Set<T> oneSet = new HashSet<T>(one);
226 for (T item : two) {
227 if (!oneSet.contains(item)) {
228 one.add(item);
229 }
230 }
231 return one;
232 }
233}
Note: See TracBrowser for help on using the repository browser.