1 | /*
|
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
|
---|
3 | * contributor license agreements. See the NOTICE file distributed with
|
---|
4 | * this work for additional information regarding copyright ownership.
|
---|
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
|
---|
6 | * (the "License"); you may not use this file except in compliance with
|
---|
7 | * the License. You may obtain a copy of the License at
|
---|
8 | *
|
---|
9 | * http://www.apache.org/licenses/LICENSE-2.0
|
---|
10 | *
|
---|
11 | * Unless required by applicable law or agreed to in writing, software
|
---|
12 | * distributed under the License is distributed on an "AS IS" BASIS,
|
---|
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
14 | * See the License for the specific language governing permissions and
|
---|
15 | * limitations under the License.
|
---|
16 | */
|
---|
17 |
|
---|
18 | package agents.org.apache.commons.math.distribution;
|
---|
19 |
|
---|
20 | import java.io.Serializable;
|
---|
21 |
|
---|
22 | import agents.org.apache.commons.math.MathRuntimeException;
|
---|
23 | import agents.org.apache.commons.math.exception.util.LocalizedFormats;
|
---|
24 | import agents.org.apache.commons.math.util.FastMath;
|
---|
25 | import agents.org.apache.commons.math.util.MathUtils;
|
---|
26 |
|
---|
27 | /**
|
---|
28 | * The default implementation of {@link HypergeometricDistribution}.
|
---|
29 | *
|
---|
30 | * @version $Revision: 1054524 $ $Date: 2011-01-03 05:59:18 +0100 (lun. 03 janv. 2011) $
|
---|
31 | */
|
---|
32 | public class HypergeometricDistributionImpl extends AbstractIntegerDistribution
|
---|
33 | implements HypergeometricDistribution, Serializable {
|
---|
34 |
|
---|
35 | /** Serializable version identifier */
|
---|
36 | private static final long serialVersionUID = -436928820673516179L;
|
---|
37 |
|
---|
38 | /** The number of successes in the population. */
|
---|
39 | private int numberOfSuccesses;
|
---|
40 |
|
---|
41 | /** The population size. */
|
---|
42 | private int populationSize;
|
---|
43 |
|
---|
44 | /** The sample size. */
|
---|
45 | private int sampleSize;
|
---|
46 |
|
---|
47 | /**
|
---|
48 | * Construct a new hypergeometric distribution with the given the population
|
---|
49 | * size, the number of successes in the population, and the sample size.
|
---|
50 | *
|
---|
51 | * @param populationSize the population size.
|
---|
52 | * @param numberOfSuccesses number of successes in the population.
|
---|
53 | * @param sampleSize the sample size.
|
---|
54 | */
|
---|
55 | public HypergeometricDistributionImpl(int populationSize,
|
---|
56 | int numberOfSuccesses, int sampleSize) {
|
---|
57 | super();
|
---|
58 | if (numberOfSuccesses > populationSize) {
|
---|
59 | throw MathRuntimeException
|
---|
60 | .createIllegalArgumentException(
|
---|
61 | LocalizedFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE,
|
---|
62 | numberOfSuccesses, populationSize);
|
---|
63 | }
|
---|
64 | if (sampleSize > populationSize) {
|
---|
65 | throw MathRuntimeException
|
---|
66 | .createIllegalArgumentException(
|
---|
67 | LocalizedFormats.SAMPLE_SIZE_LARGER_THAN_POPULATION_SIZE,
|
---|
68 | sampleSize, populationSize);
|
---|
69 | }
|
---|
70 |
|
---|
71 | setPopulationSizeInternal(populationSize);
|
---|
72 | setSampleSizeInternal(sampleSize);
|
---|
73 | setNumberOfSuccessesInternal(numberOfSuccesses);
|
---|
74 | }
|
---|
75 |
|
---|
76 | /**
|
---|
77 | * For this distribution, X, this method returns P(X ≤ x).
|
---|
78 | *
|
---|
79 | * @param x the value at which the PDF is evaluated.
|
---|
80 | * @return PDF for this distribution.
|
---|
81 | */
|
---|
82 | @Override
|
---|
83 | public double cumulativeProbability(int x) {
|
---|
84 | double ret;
|
---|
85 |
|
---|
86 | int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
|
---|
87 | if (x < domain[0]) {
|
---|
88 | ret = 0.0;
|
---|
89 | } else if (x >= domain[1]) {
|
---|
90 | ret = 1.0;
|
---|
91 | } else {
|
---|
92 | ret = innerCumulativeProbability(domain[0], x, 1, populationSize,
|
---|
93 | numberOfSuccesses, sampleSize);
|
---|
94 | }
|
---|
95 |
|
---|
96 | return ret;
|
---|
97 | }
|
---|
98 |
|
---|
99 | /**
|
---|
100 | * Return the domain for the given hypergeometric distribution parameters.
|
---|
101 | *
|
---|
102 | * @param n the population size.
|
---|
103 | * @param m number of successes in the population.
|
---|
104 | * @param k the sample size.
|
---|
105 | * @return a two element array containing the lower and upper bounds of the
|
---|
106 | * hypergeometric distribution.
|
---|
107 | */
|
---|
108 | private int[] getDomain(int n, int m, int k) {
|
---|
109 | return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) };
|
---|
110 | }
|
---|
111 |
|
---|
112 | /**
|
---|
113 | * Access the domain value lower bound, based on <code>p</code>, used to
|
---|
114 | * bracket a PDF root.
|
---|
115 | *
|
---|
116 | * @param p the desired probability for the critical value
|
---|
117 | * @return domain value lower bound, i.e. P(X < <i>lower bound</i>) <
|
---|
118 | * <code>p</code>
|
---|
119 | */
|
---|
120 | @Override
|
---|
121 | protected int getDomainLowerBound(double p) {
|
---|
122 | return getLowerDomain(populationSize, numberOfSuccesses, sampleSize);
|
---|
123 | }
|
---|
124 |
|
---|
125 | /**
|
---|
126 | * Access the domain value upper bound, based on <code>p</code>, used to
|
---|
127 | * bracket a PDF root.
|
---|
128 | *
|
---|
129 | * @param p the desired probability for the critical value
|
---|
130 | * @return domain value upper bound, i.e. P(X < <i>upper bound</i>) >
|
---|
131 | * <code>p</code>
|
---|
132 | */
|
---|
133 | @Override
|
---|
134 | protected int getDomainUpperBound(double p) {
|
---|
135 | return getUpperDomain(sampleSize, numberOfSuccesses);
|
---|
136 | }
|
---|
137 |
|
---|
138 | /**
|
---|
139 | * Return the lowest domain value for the given hypergeometric distribution
|
---|
140 | * parameters.
|
---|
141 | *
|
---|
142 | * @param n the population size.
|
---|
143 | * @param m number of successes in the population.
|
---|
144 | * @param k the sample size.
|
---|
145 | * @return the lowest domain value of the hypergeometric distribution.
|
---|
146 | */
|
---|
147 | private int getLowerDomain(int n, int m, int k) {
|
---|
148 | return FastMath.max(0, m - (n - k));
|
---|
149 | }
|
---|
150 |
|
---|
151 | /**
|
---|
152 | * Access the number of successes.
|
---|
153 | *
|
---|
154 | * @return the number of successes.
|
---|
155 | */
|
---|
156 | public int getNumberOfSuccesses() {
|
---|
157 | return numberOfSuccesses;
|
---|
158 | }
|
---|
159 |
|
---|
160 | /**
|
---|
161 | * Access the population size.
|
---|
162 | *
|
---|
163 | * @return the population size.
|
---|
164 | */
|
---|
165 | public int getPopulationSize() {
|
---|
166 | return populationSize;
|
---|
167 | }
|
---|
168 |
|
---|
169 | /**
|
---|
170 | * Access the sample size.
|
---|
171 | *
|
---|
172 | * @return the sample size.
|
---|
173 | */
|
---|
174 | public int getSampleSize() {
|
---|
175 | return sampleSize;
|
---|
176 | }
|
---|
177 |
|
---|
178 | /**
|
---|
179 | * Return the highest domain value for the given hypergeometric distribution
|
---|
180 | * parameters.
|
---|
181 | *
|
---|
182 | * @param m number of successes in the population.
|
---|
183 | * @param k the sample size.
|
---|
184 | * @return the highest domain value of the hypergeometric distribution.
|
---|
185 | */
|
---|
186 | private int getUpperDomain(int m, int k) {
|
---|
187 | return FastMath.min(k, m);
|
---|
188 | }
|
---|
189 |
|
---|
190 | /**
|
---|
191 | * For this distribution, X, this method returns P(X = x).
|
---|
192 | *
|
---|
193 | * @param x the value at which the PMF is evaluated.
|
---|
194 | * @return PMF for this distribution.
|
---|
195 | */
|
---|
196 | public double probability(int x) {
|
---|
197 | double ret;
|
---|
198 |
|
---|
199 | int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
|
---|
200 | if (x < domain[0] || x > domain[1]) {
|
---|
201 | ret = 0.0;
|
---|
202 | } else {
|
---|
203 | double p = (double) sampleSize / (double) populationSize;
|
---|
204 | double q = (double) (populationSize - sampleSize) / (double) populationSize;
|
---|
205 | double p1 = SaddlePointExpansion.logBinomialProbability(x,
|
---|
206 | numberOfSuccesses, p, q);
|
---|
207 | double p2 =
|
---|
208 | SaddlePointExpansion.logBinomialProbability(sampleSize - x,
|
---|
209 | populationSize - numberOfSuccesses, p, q);
|
---|
210 | double p3 =
|
---|
211 | SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q);
|
---|
212 | ret = FastMath.exp(p1 + p2 - p3);
|
---|
213 | }
|
---|
214 |
|
---|
215 | return ret;
|
---|
216 | }
|
---|
217 |
|
---|
218 | /**
|
---|
219 | * For the distribution, X, defined by the given hypergeometric distribution
|
---|
220 | * parameters, this method returns P(X = x).
|
---|
221 | *
|
---|
222 | * @param n the population size.
|
---|
223 | * @param m number of successes in the population.
|
---|
224 | * @param k the sample size.
|
---|
225 | * @param x the value at which the PMF is evaluated.
|
---|
226 | * @return PMF for the distribution.
|
---|
227 | */
|
---|
228 | private double probability(int n, int m, int k, int x) {
|
---|
229 | return FastMath.exp(MathUtils.binomialCoefficientLog(m, x) +
|
---|
230 | MathUtils.binomialCoefficientLog(n - m, k - x) -
|
---|
231 | MathUtils.binomialCoefficientLog(n, k));
|
---|
232 | }
|
---|
233 |
|
---|
234 | /**
|
---|
235 | * Modify the number of successes.
|
---|
236 | *
|
---|
237 | * @param num the new number of successes.
|
---|
238 | * @throws IllegalArgumentException if <code>num</code> is negative.
|
---|
239 | * @deprecated as of 2.1 (class will become immutable in 3.0)
|
---|
240 | */
|
---|
241 | @Deprecated
|
---|
242 | public void setNumberOfSuccesses(int num) {
|
---|
243 | setNumberOfSuccessesInternal(num);
|
---|
244 | }
|
---|
245 |
|
---|
246 | /**
|
---|
247 | * Modify the number of successes.
|
---|
248 | *
|
---|
249 | * @param num the new number of successes.
|
---|
250 | * @throws IllegalArgumentException if <code>num</code> is negative.
|
---|
251 | */
|
---|
252 | private void setNumberOfSuccessesInternal(int num) {
|
---|
253 | if (num < 0) {
|
---|
254 | throw MathRuntimeException.createIllegalArgumentException(
|
---|
255 | LocalizedFormats.NEGATIVE_NUMBER_OF_SUCCESSES, num);
|
---|
256 | }
|
---|
257 | numberOfSuccesses = num;
|
---|
258 | }
|
---|
259 |
|
---|
260 | /**
|
---|
261 | * Modify the population size.
|
---|
262 | *
|
---|
263 | * @param size the new population size.
|
---|
264 | * @throws IllegalArgumentException if <code>size</code> is not positive.
|
---|
265 | * @deprecated as of 2.1 (class will become immutable in 3.0)
|
---|
266 | */
|
---|
267 | @Deprecated
|
---|
268 | public void setPopulationSize(int size) {
|
---|
269 | setPopulationSizeInternal(size);
|
---|
270 | }
|
---|
271 |
|
---|
272 | /**
|
---|
273 | * Modify the population size.
|
---|
274 | *
|
---|
275 | * @param size the new population size.
|
---|
276 | * @throws IllegalArgumentException if <code>size</code> is not positive.
|
---|
277 | */
|
---|
278 | private void setPopulationSizeInternal(int size) {
|
---|
279 | if (size <= 0) {
|
---|
280 | throw MathRuntimeException.createIllegalArgumentException(
|
---|
281 | LocalizedFormats.NOT_POSITIVE_POPULATION_SIZE, size);
|
---|
282 | }
|
---|
283 | populationSize = size;
|
---|
284 | }
|
---|
285 |
|
---|
286 | /**
|
---|
287 | * Modify the sample size.
|
---|
288 | *
|
---|
289 | * @param size the new sample size.
|
---|
290 | * @throws IllegalArgumentException if <code>size</code> is negative.
|
---|
291 | * @deprecated as of 2.1 (class will become immutable in 3.0)
|
---|
292 | */
|
---|
293 | @Deprecated
|
---|
294 | public void setSampleSize(int size) {
|
---|
295 | setSampleSizeInternal(size);
|
---|
296 | }
|
---|
297 | /**
|
---|
298 | * Modify the sample size.
|
---|
299 | *
|
---|
300 | * @param size the new sample size.
|
---|
301 | * @throws IllegalArgumentException if <code>size</code> is negative.
|
---|
302 | */
|
---|
303 | private void setSampleSizeInternal(int size) {
|
---|
304 | if (size < 0) {
|
---|
305 | throw MathRuntimeException.createIllegalArgumentException(
|
---|
306 | LocalizedFormats.NOT_POSITIVE_SAMPLE_SIZE, size);
|
---|
307 | }
|
---|
308 | sampleSize = size;
|
---|
309 | }
|
---|
310 |
|
---|
311 | /**
|
---|
312 | * For this distribution, X, this method returns P(X ≥ x).
|
---|
313 | *
|
---|
314 | * @param x the value at which the CDF is evaluated.
|
---|
315 | * @return upper tail CDF for this distribution.
|
---|
316 | * @since 1.1
|
---|
317 | */
|
---|
318 | public double upperCumulativeProbability(int x) {
|
---|
319 | double ret;
|
---|
320 |
|
---|
321 | final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
|
---|
322 | if (x < domain[0]) {
|
---|
323 | ret = 1.0;
|
---|
324 | } else if (x > domain[1]) {
|
---|
325 | ret = 0.0;
|
---|
326 | } else {
|
---|
327 | ret = innerCumulativeProbability(domain[1], x, -1, populationSize, numberOfSuccesses, sampleSize);
|
---|
328 | }
|
---|
329 |
|
---|
330 | return ret;
|
---|
331 | }
|
---|
332 |
|
---|
333 | /**
|
---|
334 | * For this distribution, X, this method returns P(x0 ≤ X ≤ x1). This
|
---|
335 | * probability is computed by summing the point probabilities for the values
|
---|
336 | * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx.
|
---|
337 | *
|
---|
338 | * @param x0 the inclusive, lower bound
|
---|
339 | * @param x1 the inclusive, upper bound
|
---|
340 | * @param dx the direction of summation. 1 indicates summing from x0 to x1.
|
---|
341 | * 0 indicates summing from x1 to x0.
|
---|
342 | * @param n the population size.
|
---|
343 | * @param m number of successes in the population.
|
---|
344 | * @param k the sample size.
|
---|
345 | * @return P(x0 ≤ X ≤ x1).
|
---|
346 | */
|
---|
347 | private double innerCumulativeProbability(int x0, int x1, int dx, int n,
|
---|
348 | int m, int k) {
|
---|
349 | double ret = probability(n, m, k, x0);
|
---|
350 | while (x0 != x1) {
|
---|
351 | x0 += dx;
|
---|
352 | ret += probability(n, m, k, x0);
|
---|
353 | }
|
---|
354 | return ret;
|
---|
355 | }
|
---|
356 |
|
---|
357 | /**
|
---|
358 | * Returns the lower bound for the support for the distribution.
|
---|
359 | *
|
---|
360 | * For population size <code>N</code>,
|
---|
361 | * number of successes <code>m</code>, and
|
---|
362 | * sample size <code>n</code>,
|
---|
363 | * the lower bound of the support is
|
---|
364 | * <code>max(0, n + m - N)</code>
|
---|
365 | *
|
---|
366 | * @return lower bound of the support
|
---|
367 | * @since 2.2
|
---|
368 | */
|
---|
369 | public int getSupportLowerBound() {
|
---|
370 | return FastMath.max(0,
|
---|
371 | getSampleSize() + getNumberOfSuccesses() - getPopulationSize());
|
---|
372 | }
|
---|
373 |
|
---|
374 | /**
|
---|
375 | * Returns the upper bound for the support of the distribution.
|
---|
376 | *
|
---|
377 | * For number of successes <code>m</code> and
|
---|
378 | * sample size <code>n</code>,
|
---|
379 | * the upper bound of the support is
|
---|
380 | * <code>min(m, n)</code>
|
---|
381 | *
|
---|
382 | * @return upper bound of the support
|
---|
383 | * @since 2.2
|
---|
384 | */
|
---|
385 | public int getSupportUpperBound() {
|
---|
386 | return FastMath.min(getNumberOfSuccesses(), getSampleSize());
|
---|
387 | }
|
---|
388 |
|
---|
389 | /**
|
---|
390 | * Returns the mean.
|
---|
391 | *
|
---|
392 | * For population size <code>N</code>,
|
---|
393 | * number of successes <code>m</code>, and
|
---|
394 | * sample size <code>n</code>, the mean is
|
---|
395 | * <code>n * m / N</code>
|
---|
396 | *
|
---|
397 | * @return the mean
|
---|
398 | * @since 2.2
|
---|
399 | */
|
---|
400 | protected double getNumericalMean() {
|
---|
401 | return (double)(getSampleSize() * getNumberOfSuccesses()) / (double)getPopulationSize();
|
---|
402 | }
|
---|
403 |
|
---|
404 | /**
|
---|
405 | * Returns the variance.
|
---|
406 | *
|
---|
407 | * For population size <code>N</code>,
|
---|
408 | * number of successes <code>m</code>, and
|
---|
409 | * sample size <code>n</code>, the variance is
|
---|
410 | * <code>[ n * m * (N - n) * (N - m) ] / [ N^2 * (N - 1) ]</code>
|
---|
411 | *
|
---|
412 | * @return the variance
|
---|
413 | * @since 2.2
|
---|
414 | */
|
---|
415 | public double getNumericalVariance() {
|
---|
416 | final double N = getPopulationSize();
|
---|
417 | final double m = getNumberOfSuccesses();
|
---|
418 | final double n = getSampleSize();
|
---|
419 | return ( n * m * (N - n) * (N - m) ) / ( (N*N * (N - 1)) );
|
---|
420 | }
|
---|
421 | }
|
---|