1 | /*
|
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
|
---|
3 | * contributor license agreements. See the NOTICE file distributed with
|
---|
4 | * this work for additional information regarding copyright ownership.
|
---|
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
|
---|
6 | * (the "License"); you may not use this file except in compliance with
|
---|
7 | * the License. You may obtain a copy of the License at
|
---|
8 | *
|
---|
9 | * http://www.apache.org/licenses/LICENSE-2.0
|
---|
10 | *
|
---|
11 | * Unless required by applicable law or agreed to in writing, software
|
---|
12 | * distributed under the License is distributed on an "AS IS" BASIS,
|
---|
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
14 | * See the License for the specific language governing permissions and
|
---|
15 | * limitations under the License.
|
---|
16 | */
|
---|
17 |
|
---|
18 | package agents.org.apache.commons.math.distribution;
|
---|
19 |
|
---|
20 | import java.io.Serializable;
|
---|
21 |
|
---|
22 | import agents.org.apache.commons.math.MathRuntimeException;
|
---|
23 | import agents.org.apache.commons.math.exception.util.LocalizedFormats;
|
---|
24 | import agents.org.apache.commons.math.util.FastMath;
|
---|
25 |
|
---|
26 | /**
|
---|
27 | * Implementation for the {@link ZipfDistribution}.
|
---|
28 | *
|
---|
29 | * @version $Revision: 1054524 $ $Date: 2011-01-03 05:59:18 +0100 (lun. 03 janv. 2011) $
|
---|
30 | */
|
---|
31 | public class ZipfDistributionImpl extends AbstractIntegerDistribution
|
---|
32 | implements ZipfDistribution, Serializable {
|
---|
33 |
|
---|
34 | /** Serializable version identifier. */
|
---|
35 | private static final long serialVersionUID = -140627372283420404L;
|
---|
36 |
|
---|
37 | /** Number of elements. */
|
---|
38 | private int numberOfElements;
|
---|
39 |
|
---|
40 | /** Exponent parameter of the distribution. */
|
---|
41 | private double exponent;
|
---|
42 |
|
---|
43 | /**
|
---|
44 | * Create a new Zipf distribution with the given number of elements and
|
---|
45 | * exponent. Both values must be positive; otherwise an
|
---|
46 | * <code>IllegalArgumentException</code> is thrown.
|
---|
47 | *
|
---|
48 | * @param numberOfElements the number of elements
|
---|
49 | * @param exponent the exponent
|
---|
50 | * @exception IllegalArgumentException if n ≤ 0 or s ≤ 0.0
|
---|
51 | */
|
---|
52 | public ZipfDistributionImpl(final int numberOfElements, final double exponent)
|
---|
53 | throws IllegalArgumentException {
|
---|
54 | setNumberOfElementsInternal(numberOfElements);
|
---|
55 | setExponentInternal(exponent);
|
---|
56 | }
|
---|
57 |
|
---|
58 | /**
|
---|
59 | * Get the number of elements (e.g. corpus size) for the distribution.
|
---|
60 | *
|
---|
61 | * @return the number of elements
|
---|
62 | */
|
---|
63 | public int getNumberOfElements() {
|
---|
64 | return numberOfElements;
|
---|
65 | }
|
---|
66 |
|
---|
67 | /**
|
---|
68 | * Set the number of elements (e.g. corpus size) for the distribution.
|
---|
69 | * The parameter value must be positive; otherwise an
|
---|
70 | * <code>IllegalArgumentException</code> is thrown.
|
---|
71 | *
|
---|
72 | * @param n the number of elements
|
---|
73 | * @exception IllegalArgumentException if n ≤ 0
|
---|
74 | * @deprecated as of 2.1 (class will become immutable in 3.0)
|
---|
75 | */
|
---|
76 | @Deprecated
|
---|
77 | public void setNumberOfElements(final int n) {
|
---|
78 | setNumberOfElementsInternal(n);
|
---|
79 | }
|
---|
80 | /**
|
---|
81 | * Set the number of elements (e.g. corpus size) for the distribution.
|
---|
82 | * The parameter value must be positive; otherwise an
|
---|
83 | * <code>IllegalArgumentException</code> is thrown.
|
---|
84 | *
|
---|
85 | * @param n the number of elements
|
---|
86 | * @exception IllegalArgumentException if n ≤ 0
|
---|
87 | */
|
---|
88 | private void setNumberOfElementsInternal(final int n)
|
---|
89 | throws IllegalArgumentException {
|
---|
90 | if (n <= 0) {
|
---|
91 | throw MathRuntimeException.createIllegalArgumentException(
|
---|
92 | LocalizedFormats.INSUFFICIENT_DIMENSION, n, 0);
|
---|
93 | }
|
---|
94 | this.numberOfElements = n;
|
---|
95 | }
|
---|
96 |
|
---|
97 | /**
|
---|
98 | * Get the exponent characterising the distribution.
|
---|
99 | *
|
---|
100 | * @return the exponent
|
---|
101 | */
|
---|
102 | public double getExponent() {
|
---|
103 | return exponent;
|
---|
104 | }
|
---|
105 |
|
---|
106 | /**
|
---|
107 | * Set the exponent characterising the distribution.
|
---|
108 | * The parameter value must be positive; otherwise an
|
---|
109 | * <code>IllegalArgumentException</code> is thrown.
|
---|
110 | *
|
---|
111 | * @param s the exponent
|
---|
112 | * @exception IllegalArgumentException if s ≤ 0.0
|
---|
113 | * @deprecated as of 2.1 (class will become immutable in 3.0)
|
---|
114 | */
|
---|
115 | @Deprecated
|
---|
116 | public void setExponent(final double s) {
|
---|
117 | setExponentInternal(s);
|
---|
118 | }
|
---|
119 |
|
---|
120 | /**
|
---|
121 | * Set the exponent characterising the distribution.
|
---|
122 | * The parameter value must be positive; otherwise an
|
---|
123 | * <code>IllegalArgumentException</code> is thrown.
|
---|
124 | *
|
---|
125 | * @param s the exponent
|
---|
126 | * @exception IllegalArgumentException if s ≤ 0.0
|
---|
127 | */
|
---|
128 | private void setExponentInternal(final double s)
|
---|
129 | throws IllegalArgumentException {
|
---|
130 | if (s <= 0.0) {
|
---|
131 | throw MathRuntimeException.createIllegalArgumentException(
|
---|
132 | LocalizedFormats.NOT_POSITIVE_EXPONENT,
|
---|
133 | s);
|
---|
134 | }
|
---|
135 | this.exponent = s;
|
---|
136 | }
|
---|
137 |
|
---|
138 | /**
|
---|
139 | * The probability mass function P(X = x) for a Zipf distribution.
|
---|
140 | *
|
---|
141 | * @param x the value at which the probability density function is evaluated.
|
---|
142 | * @return the value of the probability mass function at x
|
---|
143 | */
|
---|
144 | public double probability(final int x) {
|
---|
145 | if (x <= 0 || x > numberOfElements) {
|
---|
146 | return 0.0;
|
---|
147 | }
|
---|
148 |
|
---|
149 | return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent);
|
---|
150 |
|
---|
151 | }
|
---|
152 |
|
---|
153 | /**
|
---|
154 | * The probability distribution function P(X <= x) for a Zipf distribution.
|
---|
155 | *
|
---|
156 | * @param x the value at which the PDF is evaluated.
|
---|
157 | * @return Zipf distribution function evaluated at x
|
---|
158 | */
|
---|
159 | @Override
|
---|
160 | public double cumulativeProbability(final int x) {
|
---|
161 | if (x <= 0) {
|
---|
162 | return 0.0;
|
---|
163 | } else if (x >= numberOfElements) {
|
---|
164 | return 1.0;
|
---|
165 | }
|
---|
166 |
|
---|
167 | return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent);
|
---|
168 |
|
---|
169 | }
|
---|
170 |
|
---|
171 | /**
|
---|
172 | * Access the domain value lower bound, based on <code>p</code>, used to
|
---|
173 | * bracket a PDF root.
|
---|
174 | *
|
---|
175 | * @param p the desired probability for the critical value
|
---|
176 | * @return domain value lower bound, i.e.
|
---|
177 | * P(X < <i>lower bound</i>) < <code>p</code>
|
---|
178 | */
|
---|
179 | @Override
|
---|
180 | protected int getDomainLowerBound(final double p) {
|
---|
181 | return 0;
|
---|
182 | }
|
---|
183 |
|
---|
184 | /**
|
---|
185 | * Access the domain value upper bound, based on <code>p</code>, used to
|
---|
186 | * bracket a PDF root.
|
---|
187 | *
|
---|
188 | * @param p the desired probability for the critical value
|
---|
189 | * @return domain value upper bound, i.e.
|
---|
190 | * P(X < <i>upper bound</i>) > <code>p</code>
|
---|
191 | */
|
---|
192 | @Override
|
---|
193 | protected int getDomainUpperBound(final double p) {
|
---|
194 | return numberOfElements;
|
---|
195 | }
|
---|
196 |
|
---|
197 |
|
---|
198 | /**
|
---|
199 | * Calculates the Nth generalized harmonic number. See
|
---|
200 | * <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic
|
---|
201 | * Series</a>.
|
---|
202 | *
|
---|
203 | * @param n the term in the series to calculate (must be ≥ 1)
|
---|
204 | * @param m the exponent; special case m == 1.0 is the harmonic series
|
---|
205 | * @return the nth generalized harmonic number
|
---|
206 | */
|
---|
207 | private double generalizedHarmonic(final int n, final double m) {
|
---|
208 | double value = 0;
|
---|
209 | for (int k = n; k > 0; --k) {
|
---|
210 | value += 1.0 / FastMath.pow(k, m);
|
---|
211 | }
|
---|
212 | return value;
|
---|
213 | }
|
---|
214 |
|
---|
215 | /**
|
---|
216 | * Returns the lower bound of the support for the distribution.
|
---|
217 | *
|
---|
218 | * The lower bound of the support is always 1 no matter the parameters.
|
---|
219 | *
|
---|
220 | * @return lower bound of the support (always 1)
|
---|
221 | * @since 2.2
|
---|
222 | */
|
---|
223 | public int getSupportLowerBound() {
|
---|
224 | return 1;
|
---|
225 | }
|
---|
226 |
|
---|
227 | /**
|
---|
228 | * Returns the upper bound of the support for the distribution.
|
---|
229 | *
|
---|
230 | * The upper bound of the support is the number of elements
|
---|
231 | *
|
---|
232 | * @return upper bound of the support
|
---|
233 | * @since 2.2
|
---|
234 | */
|
---|
235 | public int getSupportUpperBound() {
|
---|
236 | return getNumberOfElements();
|
---|
237 | }
|
---|
238 |
|
---|
239 | /**
|
---|
240 | * Returns the mean.
|
---|
241 | *
|
---|
242 | * For number of elements N and exponent s, the mean is
|
---|
243 | * <code>Hs1 / Hs</code> where
|
---|
244 | * <ul>
|
---|
245 | * <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li>
|
---|
246 | * <li><code>Hs = generalizedHarmonic(N, s)</code></li>
|
---|
247 | * </ul>
|
---|
248 | *
|
---|
249 | * @return the mean
|
---|
250 | * @since 2.2
|
---|
251 | */
|
---|
252 | protected double getNumericalMean() {
|
---|
253 | final int N = getNumberOfElements();
|
---|
254 | final double s = getExponent();
|
---|
255 |
|
---|
256 | final double Hs1 = generalizedHarmonic(N, s - 1);
|
---|
257 | final double Hs = generalizedHarmonic(N, s);
|
---|
258 |
|
---|
259 | return Hs1 / Hs;
|
---|
260 | }
|
---|
261 |
|
---|
262 | /**
|
---|
263 | * Returns the variance.
|
---|
264 | *
|
---|
265 | * For number of elements N and exponent s, the mean is
|
---|
266 | * <code>(Hs2 / Hs) - (Hs1^2 / Hs^2)</code> where
|
---|
267 | * <ul>
|
---|
268 | * <li><code>Hs2 = generalizedHarmonic(N, s - 2)</code></li>
|
---|
269 | * <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li>
|
---|
270 | * <li><code>Hs = generalizedHarmonic(N, s)</code></li>
|
---|
271 | * </ul>
|
---|
272 | *
|
---|
273 | * @return the variance
|
---|
274 | * @since 2.2
|
---|
275 | */
|
---|
276 | protected double getNumericalVariance() {
|
---|
277 | final int N = getNumberOfElements();
|
---|
278 | final double s = getExponent();
|
---|
279 |
|
---|
280 | final double Hs2 = generalizedHarmonic(N, s - 2);
|
---|
281 | final double Hs1 = generalizedHarmonic(N, s - 1);
|
---|
282 | final double Hs = generalizedHarmonic(N, s);
|
---|
283 |
|
---|
284 | return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs));
|
---|
285 | }
|
---|
286 | }
|
---|