source: src/main/java/agents/anac/y2019/harddealer/math3/stat/clustering/DBSCANClusterer.java

Last change on this file was 204, checked in by Katsuhide Fujita, 5 years ago

Fixed errors of ANAC2019 agents

  • Property svn:executable set to *
File size: 8.1 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17package agents.anac.y2019.harddealer.math3.stat.clustering;
18
19import java.util.ArrayList;
20import java.util.Collection;
21import java.util.HashMap;
22import java.util.HashSet;
23import java.util.List;
24import java.util.Map;
25import java.util.Set;
26
27import agents.anac.y2019.harddealer.math3.exception.NotPositiveException;
28import agents.anac.y2019.harddealer.math3.exception.NullArgumentException;
29import agents.anac.y2019.harddealer.math3.util.MathUtils;
30
31/**
32 * DBSCAN (density-based spatial clustering of applications with noise) algorithm.
33 * <p>
34 * The DBSCAN algorithm forms clusters based on the idea of density connectivity, i.e.
35 * a point p is density connected to another point q, if there exists a chain of
36 * points p<sub>i</sub>, with i = 1 .. n and p<sub>1</sub> = p and p<sub>n</sub> = q,
37 * such that each pair &lt;p<sub>i</sub>, p<sub>i+1</sub>&gt; is directly density-reachable.
38 * A point q is directly density-reachable from point p if it is in the &epsilon;-neighborhood
39 * of this point.
40 * <p>
41 * Any point that is not density-reachable from a formed cluster is treated as noise, and
42 * will thus not be present in the result.
43 * <p>
44 * The algorithm requires two parameters:
45 * <ul>
46 * <li>eps: the distance that defines the &epsilon;-neighborhood of a point
47 * <li>minPoints: the minimum number of density-connected points required to form a cluster
48 * </ul>
49 * <p>
50 * <b>Note:</b> as DBSCAN is not a centroid-based clustering algorithm, the resulting
51 * {@link Cluster} objects will have no defined center, i.e. {@link Cluster#getCenter()} will
52 * return {@code null}.
53 *
54 * @param <T> type of the points to cluster
55 * @see <a href="http://en.wikipedia.org/wiki/DBSCAN">DBSCAN (wikipedia)</a>
56 * @see <a href="http://www.dbs.ifi.lmu.de/Publikationen/Papers/KDD-96.final.frame.pdf">
57 * A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases with Noise</a>
58 * @since 3.1
59 * @deprecated As of 3.2 (to be removed in 4.0),
60 * use {@link agents.anac.y2019.harddealer.math3.ml.clustering.DBSCANClusterer} instead
61 */
62@Deprecated
63public class DBSCANClusterer<T extends Clusterable<T>> {
64
65 /** Maximum radius of the neighborhood to be considered. */
66 private final double eps;
67
68 /** Minimum number of points needed for a cluster. */
69 private final int minPts;
70
71 /** Status of a point during the clustering process. */
72 private enum PointStatus {
73 /** The point has is considered to be noise. */
74 NOISE,
75 /** The point is already part of a cluster. */
76 PART_OF_CLUSTER
77 }
78
79 /**
80 * Creates a new instance of a DBSCANClusterer.
81 *
82 * @param eps maximum radius of the neighborhood to be considered
83 * @param minPts minimum number of points needed for a cluster
84 * @throws NotPositiveException if {@code eps < 0.0} or {@code minPts < 0}
85 */
86 public DBSCANClusterer(final double eps, final int minPts)
87 throws NotPositiveException {
88 if (eps < 0.0d) {
89 throw new NotPositiveException(eps);
90 }
91 if (minPts < 0) {
92 throw new NotPositiveException(minPts);
93 }
94 this.eps = eps;
95 this.minPts = minPts;
96 }
97
98 /**
99 * Returns the maximum radius of the neighborhood to be considered.
100 *
101 * @return maximum radius of the neighborhood
102 */
103 public double getEps() {
104 return eps;
105 }
106
107 /**
108 * Returns the minimum number of points needed for a cluster.
109 *
110 * @return minimum number of points needed for a cluster
111 */
112 public int getMinPts() {
113 return minPts;
114 }
115
116 /**
117 * Performs DBSCAN cluster analysis.
118 * <p>
119 * <b>Note:</b> as DBSCAN is not a centroid-based clustering algorithm, the resulting
120 * {@link Cluster} objects will have no defined center, i.e. {@link Cluster#getCenter()} will
121 * return {@code null}.
122 *
123 * @param points the points to cluster
124 * @return the list of clusters
125 * @throws NullArgumentException if the data points are null
126 */
127 public List<Cluster<T>> cluster(final Collection<T> points) throws NullArgumentException {
128
129 // sanity checks
130 MathUtils.checkNotNull(points);
131
132 final List<Cluster<T>> clusters = new ArrayList<Cluster<T>>();
133 final Map<Clusterable<T>, PointStatus> visited = new HashMap<Clusterable<T>, PointStatus>();
134
135 for (final T point : points) {
136 if (visited.get(point) != null) {
137 continue;
138 }
139 final List<T> neighbors = getNeighbors(point, points);
140 if (neighbors.size() >= minPts) {
141 // DBSCAN does not care about center points
142 final Cluster<T> cluster = new Cluster<T>(null);
143 clusters.add(expandCluster(cluster, point, neighbors, points, visited));
144 } else {
145 visited.put(point, PointStatus.NOISE);
146 }
147 }
148
149 return clusters;
150 }
151
152 /**
153 * Expands the cluster to include density-reachable items.
154 *
155 * @param cluster Cluster to expand
156 * @param point Point to add to cluster
157 * @param neighbors List of neighbors
158 * @param points the data set
159 * @param visited the set of already visited points
160 * @return the expanded cluster
161 */
162 private Cluster<T> expandCluster(final Cluster<T> cluster,
163 final T point,
164 final List<T> neighbors,
165 final Collection<T> points,
166 final Map<Clusterable<T>, PointStatus> visited) {
167 cluster.addPoint(point);
168 visited.put(point, PointStatus.PART_OF_CLUSTER);
169
170 List<T> seeds = new ArrayList<T>(neighbors);
171 int index = 0;
172 while (index < seeds.size()) {
173 final T current = seeds.get(index);
174 PointStatus pStatus = visited.get(current);
175 // only check non-visited points
176 if (pStatus == null) {
177 final List<T> currentNeighbors = getNeighbors(current, points);
178 if (currentNeighbors.size() >= minPts) {
179 seeds = merge(seeds, currentNeighbors);
180 }
181 }
182
183 if (pStatus != PointStatus.PART_OF_CLUSTER) {
184 visited.put(current, PointStatus.PART_OF_CLUSTER);
185 cluster.addPoint(current);
186 }
187
188 index++;
189 }
190 return cluster;
191 }
192
193 /**
194 * Returns a list of density-reachable neighbors of a {@code point}.
195 *
196 * @param point the point to look for
197 * @param points possible neighbors
198 * @return the List of neighbors
199 */
200 private List<T> getNeighbors(final T point, final Collection<T> points) {
201 final List<T> neighbors = new ArrayList<T>();
202 for (final T neighbor : points) {
203 if (point != neighbor && neighbor.distanceFrom(point) <= eps) {
204 neighbors.add(neighbor);
205 }
206 }
207 return neighbors;
208 }
209
210 /**
211 * Merges two lists together.
212 *
213 * @param one first list
214 * @param two second list
215 * @return merged lists
216 */
217 private List<T> merge(final List<T> one, final List<T> two) {
218 final Set<T> oneSet = new HashSet<T>(one);
219 for (T item : two) {
220 if (!oneSet.contains(item)) {
221 one.add(item);
222 }
223 }
224 return one;
225 }
226}
Note: See TracBrowser for help on using the repository browser.