source: src/main/java/agents/org/apache/commons/math/random/EmpiricalDistributionImpl.java

Last change on this file was 1, checked in by Wouter Pasman, 7 years ago

Initial import : Genius 9.0.0

File size: 16.0 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package agents.org.apache.commons.math.random;
19
20import java.io.BufferedReader;
21import java.io.File;
22import java.io.FileReader;
23import java.io.IOException;
24import java.io.InputStreamReader;
25import java.io.Serializable;
26import java.net.URL;
27import java.util.ArrayList;
28import java.util.List;
29
30import agents.org.apache.commons.math.MathRuntimeException;
31import agents.org.apache.commons.math.exception.util.LocalizedFormats;
32import agents.org.apache.commons.math.stat.descriptive.StatisticalSummary;
33import agents.org.apache.commons.math.stat.descriptive.SummaryStatistics;
34import agents.org.apache.commons.math.util.FastMath;
35
36/**
37 * Implements <code>EmpiricalDistribution</code> interface. This implementation
38 * uses what amounts to the
39 * <a href="http://nedwww.ipac.caltech.edu/level5/March02/Silverman/Silver2_6.html">
40 * Variable Kernel Method</a> with Gaussian smoothing:<p>
41 * <strong>Digesting the input file</strong>
42 * <ol><li>Pass the file once to compute min and max.</li>
43 * <li>Divide the range from min-max into <code>binCount</code> "bins."</li>
44 * <li>Pass the data file again, computing bin counts and univariate
45 * statistics (mean, std dev.) for each of the bins </li>
46 * <li>Divide the interval (0,1) into subintervals associated with the bins,
47 * with the length of a bin's subinterval proportional to its count.</li></ol>
48 * <strong>Generating random values from the distribution</strong><ol>
49 * <li>Generate a uniformly distributed value in (0,1) </li>
50 * <li>Select the subinterval to which the value belongs.
51 * <li>Generate a random Gaussian value with mean = mean of the associated
52 * bin and std dev = std dev of associated bin.</li></ol></p><p>
53 *<strong>USAGE NOTES:</strong><ul>
54 *<li>The <code>binCount</code> is set by default to 1000. A good rule of thumb
55 * is to set the bin count to approximately the length of the input file divided
56 * by 10. </li>
57 *<li>The input file <i>must</i> be a plain text file containing one valid numeric
58 * entry per line.</li>
59 * </ul></p>
60 *
61 * @version $Revision: 1003886 $ $Date: 2010-10-02 23:04:44 +0200 (sam. 02 oct. 2010) $
62 */
63public class EmpiricalDistributionImpl implements Serializable, EmpiricalDistribution {
64
65 /** Serializable version identifier */
66 private static final long serialVersionUID = 5729073523949762654L;
67
68 /** List of SummaryStatistics objects characterizing the bins */
69 private final List<SummaryStatistics> binStats;
70
71 /** Sample statistics */
72 private SummaryStatistics sampleStats = null;
73
74 /** Max loaded value */
75 private double max = Double.NEGATIVE_INFINITY;
76
77 /** Min loaded value */
78 private double min = Double.POSITIVE_INFINITY;
79
80 /** Grid size */
81 private double delta = 0d;
82
83 /** number of bins */
84 private final int binCount;
85
86 /** is the distribution loaded? */
87 private boolean loaded = false;
88
89 /** upper bounds of subintervals in (0,1) "belonging" to the bins */
90 private double[] upperBounds = null;
91
92 /** RandomData instance to use in repeated calls to getNext() */
93 private final RandomData randomData = new RandomDataImpl();
94
95 /**
96 * Creates a new EmpiricalDistribution with the default bin count.
97 */
98 public EmpiricalDistributionImpl() {
99 binCount = 1000;
100 binStats = new ArrayList<SummaryStatistics>();
101 }
102
103 /**
104 * Creates a new EmpiricalDistribution with the specified bin count.
105 *
106 * @param binCount number of bins
107 */
108 public EmpiricalDistributionImpl(int binCount) {
109 this.binCount = binCount;
110 binStats = new ArrayList<SummaryStatistics>();
111 }
112
113 /**
114 * Computes the empirical distribution from the provided
115 * array of numbers.
116 *
117 * @param in the input data array
118 */
119 public void load(double[] in) {
120 DataAdapter da = new ArrayDataAdapter(in);
121 try {
122 da.computeStats();
123 fillBinStats(in);
124 } catch (IOException e) {
125 throw new MathRuntimeException(e);
126 }
127 loaded = true;
128
129 }
130
131 /**
132 * Computes the empirical distribution using data read from a URL.
133 * @param url url of the input file
134 *
135 * @throws IOException if an IO error occurs
136 */
137 public void load(URL url) throws IOException {
138 BufferedReader in =
139 new BufferedReader(new InputStreamReader(url.openStream()));
140 try {
141 DataAdapter da = new StreamDataAdapter(in);
142 da.computeStats();
143 if (sampleStats.getN() == 0) {
144 throw MathRuntimeException.createEOFException(LocalizedFormats.URL_CONTAINS_NO_DATA,
145 url);
146 }
147 in = new BufferedReader(new InputStreamReader(url.openStream()));
148 fillBinStats(in);
149 loaded = true;
150 } finally {
151 try {
152 in.close();
153 } catch (IOException ex) {
154 // ignore
155 }
156 }
157 }
158
159 /**
160 * Computes the empirical distribution from the input file.
161 *
162 * @param file the input file
163 * @throws IOException if an IO error occurs
164 */
165 public void load(File file) throws IOException {
166 BufferedReader in = new BufferedReader(new FileReader(file));
167 try {
168 DataAdapter da = new StreamDataAdapter(in);
169 da.computeStats();
170 in = new BufferedReader(new FileReader(file));
171 fillBinStats(in);
172 loaded = true;
173 } finally {
174 try {
175 in.close();
176 } catch (IOException ex) {
177 // ignore
178 }
179 }
180 }
181
182 /**
183 * Provides methods for computing <code>sampleStats</code> and
184 * <code>beanStats</code> abstracting the source of data.
185 */
186 private abstract class DataAdapter{
187
188 /**
189 * Compute bin stats.
190 *
191 * @throws IOException if an error occurs computing bin stats
192 */
193 public abstract void computeBinStats() throws IOException;
194
195 /**
196 * Compute sample statistics.
197 *
198 * @throws IOException if an error occurs computing sample stats
199 */
200 public abstract void computeStats() throws IOException;
201
202 }
203
204 /**
205 * Factory of <code>DataAdapter</code> objects. For every supported source
206 * of data (array of doubles, file, etc.) an instance of the proper object
207 * is returned.
208 */
209 private class DataAdapterFactory{
210 /**
211 * Creates a DataAdapter from a data object
212 *
213 * @param in object providing access to the data
214 * @return DataAdapter instance
215 */
216 public DataAdapter getAdapter(Object in) {
217 if (in instanceof BufferedReader) {
218 BufferedReader inputStream = (BufferedReader) in;
219 return new StreamDataAdapter(inputStream);
220 } else if (in instanceof double[]) {
221 double[] inputArray = (double[]) in;
222 return new ArrayDataAdapter(inputArray);
223 } else {
224 throw MathRuntimeException.createIllegalArgumentException(
225 LocalizedFormats.INPUT_DATA_FROM_UNSUPPORTED_DATASOURCE,
226 in.getClass().getName(),
227 BufferedReader.class.getName(), double[].class.getName());
228 }
229 }
230 }
231 /**
232 * <code>DataAdapter</code> for data provided through some input stream
233 */
234 private class StreamDataAdapter extends DataAdapter{
235
236 /** Input stream providing access to the data */
237 private BufferedReader inputStream;
238
239 /**
240 * Create a StreamDataAdapter from a BufferedReader
241 *
242 * @param in BufferedReader input stream
243 */
244 public StreamDataAdapter(BufferedReader in){
245 super();
246 inputStream = in;
247 }
248
249 /** {@inheritDoc} */
250 @Override
251 public void computeBinStats() throws IOException {
252 String str = null;
253 double val = 0.0d;
254 while ((str = inputStream.readLine()) != null) {
255 val = Double.parseDouble(str);
256 SummaryStatistics stats = binStats.get(findBin(val));
257 stats.addValue(val);
258 }
259
260 inputStream.close();
261 inputStream = null;
262 }
263
264 /** {@inheritDoc} */
265 @Override
266 public void computeStats() throws IOException {
267 String str = null;
268 double val = 0.0;
269 sampleStats = new SummaryStatistics();
270 while ((str = inputStream.readLine()) != null) {
271 val = Double.valueOf(str).doubleValue();
272 sampleStats.addValue(val);
273 }
274 inputStream.close();
275 inputStream = null;
276 }
277 }
278
279 /**
280 * <code>DataAdapter</code> for data provided as array of doubles.
281 */
282 private class ArrayDataAdapter extends DataAdapter {
283
284 /** Array of input data values */
285 private double[] inputArray;
286
287 /**
288 * Construct an ArrayDataAdapter from a double[] array
289 *
290 * @param in double[] array holding the data
291 */
292 public ArrayDataAdapter(double[] in){
293 super();
294 inputArray = in;
295 }
296
297 /** {@inheritDoc} */
298 @Override
299 public void computeStats() throws IOException {
300 sampleStats = new SummaryStatistics();
301 for (int i = 0; i < inputArray.length; i++) {
302 sampleStats.addValue(inputArray[i]);
303 }
304 }
305
306 /** {@inheritDoc} */
307 @Override
308 public void computeBinStats() throws IOException {
309 for (int i = 0; i < inputArray.length; i++) {
310 SummaryStatistics stats =
311 binStats.get(findBin(inputArray[i]));
312 stats.addValue(inputArray[i]);
313 }
314 }
315 }
316
317 /**
318 * Fills binStats array (second pass through data file).
319 *
320 * @param in object providing access to the data
321 * @throws IOException if an IO error occurs
322 */
323 private void fillBinStats(Object in) throws IOException {
324 // Set up grid
325 min = sampleStats.getMin();
326 max = sampleStats.getMax();
327 delta = (max - min)/(Double.valueOf(binCount)).doubleValue();
328
329 // Initialize binStats ArrayList
330 if (!binStats.isEmpty()) {
331 binStats.clear();
332 }
333 for (int i = 0; i < binCount; i++) {
334 SummaryStatistics stats = new SummaryStatistics();
335 binStats.add(i,stats);
336 }
337
338 // Filling data in binStats Array
339 DataAdapterFactory aFactory = new DataAdapterFactory();
340 DataAdapter da = aFactory.getAdapter(in);
341 da.computeBinStats();
342
343 // Assign upperBounds based on bin counts
344 upperBounds = new double[binCount];
345 upperBounds[0] =
346 ((double) binStats.get(0).getN()) / (double) sampleStats.getN();
347 for (int i = 1; i < binCount-1; i++) {
348 upperBounds[i] = upperBounds[i-1] +
349 ((double) binStats.get(i).getN()) / (double) sampleStats.getN();
350 }
351 upperBounds[binCount-1] = 1.0d;
352 }
353
354 /**
355 * Returns the index of the bin to which the given value belongs
356 *
357 * @param value the value whose bin we are trying to find
358 * @return the index of the bin containing the value
359 */
360 private int findBin(double value) {
361 return FastMath.min(
362 FastMath.max((int) FastMath.ceil((value- min) / delta) - 1, 0),
363 binCount - 1);
364 }
365
366 /**
367 * Generates a random value from this distribution.
368 *
369 * @return the random value.
370 * @throws IllegalStateException if the distribution has not been loaded
371 */
372 public double getNextValue() throws IllegalStateException {
373
374 if (!loaded) {
375 throw MathRuntimeException.createIllegalStateException(LocalizedFormats.DISTRIBUTION_NOT_LOADED);
376 }
377
378 // Start with a uniformly distributed random number in (0,1)
379 double x = FastMath.random();
380
381 // Use this to select the bin and generate a Gaussian within the bin
382 for (int i = 0; i < binCount; i++) {
383 if (x <= upperBounds[i]) {
384 SummaryStatistics stats = binStats.get(i);
385 if (stats.getN() > 0) {
386 if (stats.getStandardDeviation() > 0) { // more than one obs
387 return randomData.nextGaussian
388 (stats.getMean(),stats.getStandardDeviation());
389 } else {
390 return stats.getMean(); // only one obs in bin
391 }
392 }
393 }
394 }
395 throw new MathRuntimeException(LocalizedFormats.NO_BIN_SELECTED);
396 }
397
398 /**
399 * Returns a {@link StatisticalSummary} describing this distribution.
400 * <strong>Preconditions:</strong><ul>
401 * <li>the distribution must be loaded before invoking this method</li></ul>
402 *
403 * @return the sample statistics
404 * @throws IllegalStateException if the distribution has not been loaded
405 */
406 public StatisticalSummary getSampleStats() {
407 return sampleStats;
408 }
409
410 /**
411 * Returns the number of bins.
412 *
413 * @return the number of bins.
414 */
415 public int getBinCount() {
416 return binCount;
417 }
418
419 /**
420 * Returns a List of {@link SummaryStatistics} instances containing
421 * statistics describing the values in each of the bins. The list is
422 * indexed on the bin number.
423 *
424 * @return List of bin statistics.
425 */
426 public List<SummaryStatistics> getBinStats() {
427 return binStats;
428 }
429
430 /**
431 * <p>Returns a fresh copy of the array of upper bounds for the bins.
432 * Bins are: <br/>
433 * [min,upperBounds[0]],(upperBounds[0],upperBounds[1]],...,
434 * (upperBounds[binCount-2], upperBounds[binCount-1] = max].</p>
435 *
436 * <p>Note: In versions 1.0-2.0 of commons-math, this method
437 * incorrectly returned the array of probability generator upper
438 * bounds now returned by {@link #getGeneratorUpperBounds()}.</p>
439 *
440 * @return array of bin upper bounds
441 * @since 2.1
442 */
443 public double[] getUpperBounds() {
444 double[] binUpperBounds = new double[binCount];
445 binUpperBounds[0] = min + delta;
446 for (int i = 1; i < binCount - 1; i++) {
447 binUpperBounds[i] = binUpperBounds[i-1] + delta;
448 }
449 binUpperBounds[binCount - 1] = max;
450 return binUpperBounds;
451 }
452
453 /**
454 * <p>Returns a fresh copy of the array of upper bounds of the subintervals
455 * of [0,1] used in generating data from the empirical distribution.
456 * Subintervals correspond to bins with lengths proportional to bin counts.</p>
457 *
458 * <p>In versions 1.0-2.0 of commons-math, this array was (incorrectly) returned
459 * by {@link #getUpperBounds()}.</p>
460 *
461 * @since 2.1
462 * @return array of upper bounds of subintervals used in data generation
463 */
464 public double[] getGeneratorUpperBounds() {
465 int len = upperBounds.length;
466 double[] out = new double[len];
467 System.arraycopy(upperBounds, 0, out, 0, len);
468 return out;
469 }
470
471 /**
472 * Property indicating whether or not the distribution has been loaded.
473 *
474 * @return true if the distribution has been loaded
475 */
476 public boolean isLoaded() {
477 return loaded;
478 }
479}
Note: See TracBrowser for help on using the repository browser.