source: src/main/java/agents/anac/y2019/harddealer/math3/stat/inference/TTest.java

Last change on this file was 204, checked in by Katsuhide Fujita, 5 years ago

Fixed errors of ANAC2019 agents

  • Property svn:executable set to *
File size: 52.7 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17package agents.anac.y2019.harddealer.math3.stat.inference;
18
19import agents.anac.y2019.harddealer.math3.distribution.TDistribution;
20import agents.anac.y2019.harddealer.math3.exception.DimensionMismatchException;
21import agents.anac.y2019.harddealer.math3.exception.MathIllegalArgumentException;
22import agents.anac.y2019.harddealer.math3.exception.MaxCountExceededException;
23import agents.anac.y2019.harddealer.math3.exception.NoDataException;
24import agents.anac.y2019.harddealer.math3.exception.NotStrictlyPositiveException;
25import agents.anac.y2019.harddealer.math3.exception.NullArgumentException;
26import agents.anac.y2019.harddealer.math3.exception.NumberIsTooSmallException;
27import agents.anac.y2019.harddealer.math3.exception.OutOfRangeException;
28import agents.anac.y2019.harddealer.math3.exception.util.LocalizedFormats;
29import agents.anac.y2019.harddealer.math3.stat.StatUtils;
30import agents.anac.y2019.harddealer.math3.stat.descriptive.StatisticalSummary;
31import agents.anac.y2019.harddealer.math3.util.FastMath;
32
33/**
34 * An implementation for Student's t-tests.
35 * <p>
36 * Tests can be:<ul>
37 * <li>One-sample or two-sample</li>
38 * <li>One-sided or two-sided</li>
39 * <li>Paired or unpaired (for two-sample tests)</li>
40 * <li>Homoscedastic (equal variance assumption) or heteroscedastic
41 * (for two sample tests)</li>
42 * <li>Fixed significance level (boolean-valued) or returning p-values.
43 * </li></ul></p>
44 * <p>
45 * Test statistics are available for all tests. Methods including "Test" in
46 * in their names perform tests, all other methods return t-statistics. Among
47 * the "Test" methods, <code>double-</code>valued methods return p-values;
48 * <code>boolean-</code>valued methods perform fixed significance level tests.
49 * Significance levels are always specified as numbers between 0 and 0.5
50 * (e.g. tests at the 95% level use <code>alpha=0.05</code>).</p>
51 * <p>
52 * Input to tests can be either <code>double[]</code> arrays or
53 * {@link StatisticalSummary} instances.</p><p>
54 * Uses commons-math {@link agents.anac.y2019.harddealer.math3.distribution.TDistribution}
55 * implementation to estimate exact p-values.</p>
56 *
57 */
58public class TTest {
59 /**
60 * Computes a paired, 2-sample t-statistic based on the data in the input
61 * arrays. The t-statistic returned is equivalent to what would be returned by
62 * computing the one-sample t-statistic {@link #t(double, double[])}, with
63 * <code>mu = 0</code> and the sample array consisting of the (signed)
64 * differences between corresponding entries in <code>sample1</code> and
65 * <code>sample2.</code>
66 * <p>
67 * <strong>Preconditions</strong>: <ul>
68 * <li>The input arrays must have the same length and their common length
69 * must be at least 2.
70 * </li></ul></p>
71 *
72 * @param sample1 array of sample data values
73 * @param sample2 array of sample data values
74 * @return t statistic
75 * @throws NullArgumentException if the arrays are <code>null</code>
76 * @throws NoDataException if the arrays are empty
77 * @throws DimensionMismatchException if the length of the arrays is not equal
78 * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
79 */
80 public double pairedT(final double[] sample1, final double[] sample2)
81 throws NullArgumentException, NoDataException,
82 DimensionMismatchException, NumberIsTooSmallException {
83
84 checkSampleData(sample1);
85 checkSampleData(sample2);
86 double meanDifference = StatUtils.meanDifference(sample1, sample2);
87 return t(meanDifference, 0,
88 StatUtils.varianceDifference(sample1, sample2, meanDifference),
89 sample1.length);
90
91 }
92
93 /**
94 * Returns the <i>observed significance level</i>, or
95 * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
96 * based on the data in the input arrays.
97 * <p>
98 * The number returned is the smallest significance level
99 * at which one can reject the null hypothesis that the mean of the paired
100 * differences is 0 in favor of the two-sided alternative that the mean paired
101 * difference is not equal to 0. For a one-sided test, divide the returned
102 * value by 2.</p>
103 * <p>
104 * This test is equivalent to a one-sample t-test computed using
105 * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
106 * array consisting of the signed differences between corresponding elements of
107 * <code>sample1</code> and <code>sample2.</code></p>
108 * <p>
109 * <strong>Usage Note:</strong><br>
110 * The validity of the p-value depends on the assumptions of the parametric
111 * t-test procedure, as discussed
112 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
113 * here</a></p>
114 * <p>
115 * <strong>Preconditions</strong>: <ul>
116 * <li>The input array lengths must be the same and their common length must
117 * be at least 2.
118 * </li></ul></p>
119 *
120 * @param sample1 array of sample data values
121 * @param sample2 array of sample data values
122 * @return p-value for t-test
123 * @throws NullArgumentException if the arrays are <code>null</code>
124 * @throws NoDataException if the arrays are empty
125 * @throws DimensionMismatchException if the length of the arrays is not equal
126 * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
127 * @throws MaxCountExceededException if an error occurs computing the p-value
128 */
129 public double pairedTTest(final double[] sample1, final double[] sample2)
130 throws NullArgumentException, NoDataException, DimensionMismatchException,
131 NumberIsTooSmallException, MaxCountExceededException {
132
133 double meanDifference = StatUtils.meanDifference(sample1, sample2);
134 return tTest(meanDifference, 0,
135 StatUtils.varianceDifference(sample1, sample2, meanDifference),
136 sample1.length);
137
138 }
139
140 /**
141 * Performs a paired t-test evaluating the null hypothesis that the
142 * mean of the paired differences between <code>sample1</code> and
143 * <code>sample2</code> is 0 in favor of the two-sided alternative that the
144 * mean paired difference is not equal to 0, with significance level
145 * <code>alpha</code>.
146 * <p>
147 * Returns <code>true</code> iff the null hypothesis can be rejected with
148 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
149 * <code>alpha * 2</code></p>
150 * <p>
151 * <strong>Usage Note:</strong><br>
152 * The validity of the test depends on the assumptions of the parametric
153 * t-test procedure, as discussed
154 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
155 * here</a></p>
156 * <p>
157 * <strong>Preconditions</strong>: <ul>
158 * <li>The input array lengths must be the same and their common length
159 * must be at least 2.
160 * </li>
161 * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
162 * </li></ul></p>
163 *
164 * @param sample1 array of sample data values
165 * @param sample2 array of sample data values
166 * @param alpha significance level of the test
167 * @return true if the null hypothesis can be rejected with
168 * confidence 1 - alpha
169 * @throws NullArgumentException if the arrays are <code>null</code>
170 * @throws NoDataException if the arrays are empty
171 * @throws DimensionMismatchException if the length of the arrays is not equal
172 * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
173 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
174 * @throws MaxCountExceededException if an error occurs computing the p-value
175 */
176 public boolean pairedTTest(final double[] sample1, final double[] sample2,
177 final double alpha)
178 throws NullArgumentException, NoDataException, DimensionMismatchException,
179 NumberIsTooSmallException, OutOfRangeException, MaxCountExceededException {
180
181 checkSignificanceLevel(alpha);
182 return pairedTTest(sample1, sample2) < alpha;
183
184 }
185
186 /**
187 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
188 * t statistic </a> given observed values and a comparison constant.
189 * <p>
190 * This statistic can be used to perform a one sample t-test for the mean.
191 * </p><p>
192 * <strong>Preconditions</strong>: <ul>
193 * <li>The observed array length must be at least 2.
194 * </li></ul></p>
195 *
196 * @param mu comparison constant
197 * @param observed array of values
198 * @return t statistic
199 * @throws NullArgumentException if <code>observed</code> is <code>null</code>
200 * @throws NumberIsTooSmallException if the length of <code>observed</code> is &lt; 2
201 */
202 public double t(final double mu, final double[] observed)
203 throws NullArgumentException, NumberIsTooSmallException {
204
205 checkSampleData(observed);
206 // No try-catch or advertised exception because args have just been checked
207 return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
208 observed.length);
209
210 }
211
212 /**
213 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
214 * t statistic </a> to use in comparing the mean of the dataset described by
215 * <code>sampleStats</code> to <code>mu</code>.
216 * <p>
217 * This statistic can be used to perform a one sample t-test for the mean.
218 * </p><p>
219 * <strong>Preconditions</strong>: <ul>
220 * <li><code>observed.getN() &ge; 2</code>.
221 * </li></ul></p>
222 *
223 * @param mu comparison constant
224 * @param sampleStats DescriptiveStatistics holding sample summary statitstics
225 * @return t statistic
226 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
227 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
228 */
229 public double t(final double mu, final StatisticalSummary sampleStats)
230 throws NullArgumentException, NumberIsTooSmallException {
231
232 checkSampleData(sampleStats);
233 return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
234 sampleStats.getN());
235
236 }
237
238 /**
239 * Computes a 2-sample t statistic, under the hypothesis of equal
240 * subpopulation variances. To compute a t-statistic without the
241 * equal variances hypothesis, use {@link #t(double[], double[])}.
242 * <p>
243 * This statistic can be used to perform a (homoscedastic) two-sample
244 * t-test to compare sample means.</p>
245 * <p>
246 * The t-statistic is</p>
247 * <p>
248 * &nbsp;&nbsp;<code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
249 * </p><p>
250 * where <strong><code>n1</code></strong> is the size of first sample;
251 * <strong><code> n2</code></strong> is the size of second sample;
252 * <strong><code> m1</code></strong> is the mean of first sample;
253 * <strong><code> m2</code></strong> is the mean of second sample</li>
254 * </ul>
255 * and <strong><code>var</code></strong> is the pooled variance estimate:
256 * </p><p>
257 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
258 * </p><p>
259 * with <strong><code>var1</code></strong> the variance of the first sample and
260 * <strong><code>var2</code></strong> the variance of the second sample.
261 * </p><p>
262 * <strong>Preconditions</strong>: <ul>
263 * <li>The observed array lengths must both be at least 2.
264 * </li></ul></p>
265 *
266 * @param sample1 array of sample data values
267 * @param sample2 array of sample data values
268 * @return t statistic
269 * @throws NullArgumentException if the arrays are <code>null</code>
270 * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
271 */
272 public double homoscedasticT(final double[] sample1, final double[] sample2)
273 throws NullArgumentException, NumberIsTooSmallException {
274
275 checkSampleData(sample1);
276 checkSampleData(sample2);
277 // No try-catch or advertised exception because args have just been checked
278 return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
279 StatUtils.variance(sample1), StatUtils.variance(sample2),
280 sample1.length, sample2.length);
281
282 }
283
284 /**
285 * Computes a 2-sample t statistic, without the hypothesis of equal
286 * subpopulation variances. To compute a t-statistic assuming equal
287 * variances, use {@link #homoscedasticT(double[], double[])}.
288 * <p>
289 * This statistic can be used to perform a two-sample t-test to compare
290 * sample means.</p>
291 * <p>
292 * The t-statistic is</p>
293 * <p>
294 * &nbsp;&nbsp; <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
295 * </p><p>
296 * where <strong><code>n1</code></strong> is the size of the first sample
297 * <strong><code> n2</code></strong> is the size of the second sample;
298 * <strong><code> m1</code></strong> is the mean of the first sample;
299 * <strong><code> m2</code></strong> is the mean of the second sample;
300 * <strong><code> var1</code></strong> is the variance of the first sample;
301 * <strong><code> var2</code></strong> is the variance of the second sample;
302 * </p><p>
303 * <strong>Preconditions</strong>: <ul>
304 * <li>The observed array lengths must both be at least 2.
305 * </li></ul></p>
306 *
307 * @param sample1 array of sample data values
308 * @param sample2 array of sample data values
309 * @return t statistic
310 * @throws NullArgumentException if the arrays are <code>null</code>
311 * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
312 */
313 public double t(final double[] sample1, final double[] sample2)
314 throws NullArgumentException, NumberIsTooSmallException {
315
316 checkSampleData(sample1);
317 checkSampleData(sample2);
318 // No try-catch or advertised exception because args have just been checked
319 return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
320 StatUtils.variance(sample1), StatUtils.variance(sample2),
321 sample1.length, sample2.length);
322
323 }
324
325 /**
326 * Computes a 2-sample t statistic </a>, comparing the means of the datasets
327 * described by two {@link StatisticalSummary} instances, without the
328 * assumption of equal subpopulation variances. Use
329 * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
330 * compute a t-statistic under the equal variances assumption.
331 * <p>
332 * This statistic can be used to perform a two-sample t-test to compare
333 * sample means.</p>
334 * <p>
335 * The returned t-statistic is</p>
336 * <p>
337 * &nbsp;&nbsp; <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
338 * </p><p>
339 * where <strong><code>n1</code></strong> is the size of the first sample;
340 * <strong><code> n2</code></strong> is the size of the second sample;
341 * <strong><code> m1</code></strong> is the mean of the first sample;
342 * <strong><code> m2</code></strong> is the mean of the second sample
343 * <strong><code> var1</code></strong> is the variance of the first sample;
344 * <strong><code> var2</code></strong> is the variance of the second sample
345 * </p><p>
346 * <strong>Preconditions</strong>: <ul>
347 * <li>The datasets described by the two Univariates must each contain
348 * at least 2 observations.
349 * </li></ul></p>
350 *
351 * @param sampleStats1 StatisticalSummary describing data from the first sample
352 * @param sampleStats2 StatisticalSummary describing data from the second sample
353 * @return t statistic
354 * @throws NullArgumentException if the sample statistics are <code>null</code>
355 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
356 */
357 public double t(final StatisticalSummary sampleStats1,
358 final StatisticalSummary sampleStats2)
359 throws NullArgumentException, NumberIsTooSmallException {
360
361 checkSampleData(sampleStats1);
362 checkSampleData(sampleStats2);
363 return t(sampleStats1.getMean(), sampleStats2.getMean(),
364 sampleStats1.getVariance(), sampleStats2.getVariance(),
365 sampleStats1.getN(), sampleStats2.getN());
366
367 }
368
369 /**
370 * Computes a 2-sample t statistic, comparing the means of the datasets
371 * described by two {@link StatisticalSummary} instances, under the
372 * assumption of equal subpopulation variances. To compute a t-statistic
373 * without the equal variances assumption, use
374 * {@link #t(StatisticalSummary, StatisticalSummary)}.
375 * <p>
376 * This statistic can be used to perform a (homoscedastic) two-sample
377 * t-test to compare sample means.</p>
378 * <p>
379 * The t-statistic returned is</p>
380 * <p>
381 * &nbsp;&nbsp;<code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
382 * </p><p>
383 * where <strong><code>n1</code></strong> is the size of first sample;
384 * <strong><code> n2</code></strong> is the size of second sample;
385 * <strong><code> m1</code></strong> is the mean of first sample;
386 * <strong><code> m2</code></strong> is the mean of second sample
387 * and <strong><code>var</code></strong> is the pooled variance estimate:
388 * </p><p>
389 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
390 * </p><p>
391 * with <strong><code>var1</code></strong> the variance of the first sample and
392 * <strong><code>var2</code></strong> the variance of the second sample.
393 * </p><p>
394 * <strong>Preconditions</strong>: <ul>
395 * <li>The datasets described by the two Univariates must each contain
396 * at least 2 observations.
397 * </li></ul></p>
398 *
399 * @param sampleStats1 StatisticalSummary describing data from the first sample
400 * @param sampleStats2 StatisticalSummary describing data from the second sample
401 * @return t statistic
402 * @throws NullArgumentException if the sample statistics are <code>null</code>
403 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
404 */
405 public double homoscedasticT(final StatisticalSummary sampleStats1,
406 final StatisticalSummary sampleStats2)
407 throws NullArgumentException, NumberIsTooSmallException {
408
409 checkSampleData(sampleStats1);
410 checkSampleData(sampleStats2);
411 return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
412 sampleStats1.getVariance(), sampleStats2.getVariance(),
413 sampleStats1.getN(), sampleStats2.getN());
414
415 }
416
417 /**
418 * Returns the <i>observed significance level</i>, or
419 * <i>p-value</i>, associated with a one-sample, two-tailed t-test
420 * comparing the mean of the input array with the constant <code>mu</code>.
421 * <p>
422 * The number returned is the smallest significance level
423 * at which one can reject the null hypothesis that the mean equals
424 * <code>mu</code> in favor of the two-sided alternative that the mean
425 * is different from <code>mu</code>. For a one-sided test, divide the
426 * returned value by 2.</p>
427 * <p>
428 * <strong>Usage Note:</strong><br>
429 * The validity of the test depends on the assumptions of the parametric
430 * t-test procedure, as discussed
431 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
432 * </p><p>
433 * <strong>Preconditions</strong>: <ul>
434 * <li>The observed array length must be at least 2.
435 * </li></ul></p>
436 *
437 * @param mu constant value to compare sample mean against
438 * @param sample array of sample data values
439 * @return p-value
440 * @throws NullArgumentException if the sample array is <code>null</code>
441 * @throws NumberIsTooSmallException if the length of the array is &lt; 2
442 * @throws MaxCountExceededException if an error occurs computing the p-value
443 */
444 public double tTest(final double mu, final double[] sample)
445 throws NullArgumentException, NumberIsTooSmallException,
446 MaxCountExceededException {
447
448 checkSampleData(sample);
449 // No try-catch or advertised exception because args have just been checked
450 return tTest(StatUtils.mean(sample), mu, StatUtils.variance(sample),
451 sample.length);
452
453 }
454
455 /**
456 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
457 * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
458 * which <code>sample</code> is drawn equals <code>mu</code>.
459 * <p>
460 * Returns <code>true</code> iff the null hypothesis can be
461 * rejected with confidence <code>1 - alpha</code>. To
462 * perform a 1-sided test, use <code>alpha * 2</code></p>
463 * <p>
464 * <strong>Examples:</strong><br><ol>
465 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
466 * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
467 * </li>
468 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
469 * at the 99% level, first verify that the measured sample mean is less
470 * than <code>mu</code> and then use
471 * <br><code>tTest(mu, sample, 0.02) </code>
472 * </li></ol></p>
473 * <p>
474 * <strong>Usage Note:</strong><br>
475 * The validity of the test depends on the assumptions of the one-sample
476 * parametric t-test procedure, as discussed
477 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
478 * </p><p>
479 * <strong>Preconditions</strong>: <ul>
480 * <li>The observed array length must be at least 2.
481 * </li></ul></p>
482 *
483 * @param mu constant value to compare sample mean against
484 * @param sample array of sample data values
485 * @param alpha significance level of the test
486 * @return p-value
487 * @throws NullArgumentException if the sample array is <code>null</code>
488 * @throws NumberIsTooSmallException if the length of the array is &lt; 2
489 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
490 * @throws MaxCountExceededException if an error computing the p-value
491 */
492 public boolean tTest(final double mu, final double[] sample, final double alpha)
493 throws NullArgumentException, NumberIsTooSmallException,
494 OutOfRangeException, MaxCountExceededException {
495
496 checkSignificanceLevel(alpha);
497 return tTest(mu, sample) < alpha;
498
499 }
500
501 /**
502 * Returns the <i>observed significance level</i>, or
503 * <i>p-value</i>, associated with a one-sample, two-tailed t-test
504 * comparing the mean of the dataset described by <code>sampleStats</code>
505 * with the constant <code>mu</code>.
506 * <p>
507 * The number returned is the smallest significance level
508 * at which one can reject the null hypothesis that the mean equals
509 * <code>mu</code> in favor of the two-sided alternative that the mean
510 * is different from <code>mu</code>. For a one-sided test, divide the
511 * returned value by 2.</p>
512 * <p>
513 * <strong>Usage Note:</strong><br>
514 * The validity of the test depends on the assumptions of the parametric
515 * t-test procedure, as discussed
516 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
517 * here</a></p>
518 * <p>
519 * <strong>Preconditions</strong>: <ul>
520 * <li>The sample must contain at least 2 observations.
521 * </li></ul></p>
522 *
523 * @param mu constant value to compare sample mean against
524 * @param sampleStats StatisticalSummary describing sample data
525 * @return p-value
526 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
527 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
528 * @throws MaxCountExceededException if an error occurs computing the p-value
529 */
530 public double tTest(final double mu, final StatisticalSummary sampleStats)
531 throws NullArgumentException, NumberIsTooSmallException,
532 MaxCountExceededException {
533
534 checkSampleData(sampleStats);
535 return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
536 sampleStats.getN());
537
538 }
539
540 /**
541 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
542 * two-sided t-test</a> evaluating the null hypothesis that the mean of the
543 * population from which the dataset described by <code>stats</code> is
544 * drawn equals <code>mu</code>.
545 * <p>
546 * Returns <code>true</code> iff the null hypothesis can be rejected with
547 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
548 * <code>alpha * 2.</code></p>
549 * <p>
550 * <strong>Examples:</strong><br><ol>
551 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
552 * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
553 * </li>
554 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
555 * at the 99% level, first verify that the measured sample mean is less
556 * than <code>mu</code> and then use
557 * <br><code>tTest(mu, sampleStats, 0.02) </code>
558 * </li></ol></p>
559 * <p>
560 * <strong>Usage Note:</strong><br>
561 * The validity of the test depends on the assumptions of the one-sample
562 * parametric t-test procedure, as discussed
563 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
564 * </p><p>
565 * <strong>Preconditions</strong>: <ul>
566 * <li>The sample must include at least 2 observations.
567 * </li></ul></p>
568 *
569 * @param mu constant value to compare sample mean against
570 * @param sampleStats StatisticalSummary describing sample data values
571 * @param alpha significance level of the test
572 * @return p-value
573 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
574 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
575 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
576 * @throws MaxCountExceededException if an error occurs computing the p-value
577 */
578 public boolean tTest(final double mu, final StatisticalSummary sampleStats,
579 final double alpha)
580 throws NullArgumentException, NumberIsTooSmallException,
581 OutOfRangeException, MaxCountExceededException {
582
583 checkSignificanceLevel(alpha);
584 return tTest(mu, sampleStats) < alpha;
585
586 }
587
588 /**
589 * Returns the <i>observed significance level</i>, or
590 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
591 * comparing the means of the input arrays.
592 * <p>
593 * The number returned is the smallest significance level
594 * at which one can reject the null hypothesis that the two means are
595 * equal in favor of the two-sided alternative that they are different.
596 * For a one-sided test, divide the returned value by 2.</p>
597 * <p>
598 * The test does not assume that the underlying popuation variances are
599 * equal and it uses approximated degrees of freedom computed from the
600 * sample data to compute the p-value. The t-statistic used is as defined in
601 * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
602 * to the degrees of freedom is used,
603 * as described
604 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
605 * here.</a> To perform the test under the assumption of equal subpopulation
606 * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
607 * <p>
608 * <strong>Usage Note:</strong><br>
609 * The validity of the p-value depends on the assumptions of the parametric
610 * t-test procedure, as discussed
611 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
612 * here</a></p>
613 * <p>
614 * <strong>Preconditions</strong>: <ul>
615 * <li>The observed array lengths must both be at least 2.
616 * </li></ul></p>
617 *
618 * @param sample1 array of sample data values
619 * @param sample2 array of sample data values
620 * @return p-value for t-test
621 * @throws NullArgumentException if the arrays are <code>null</code>
622 * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
623 * @throws MaxCountExceededException if an error occurs computing the p-value
624 */
625 public double tTest(final double[] sample1, final double[] sample2)
626 throws NullArgumentException, NumberIsTooSmallException,
627 MaxCountExceededException {
628
629 checkSampleData(sample1);
630 checkSampleData(sample2);
631 // No try-catch or advertised exception because args have just been checked
632 return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
633 StatUtils.variance(sample1), StatUtils.variance(sample2),
634 sample1.length, sample2.length);
635
636 }
637
638 /**
639 * Returns the <i>observed significance level</i>, or
640 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
641 * comparing the means of the input arrays, under the assumption that
642 * the two samples are drawn from subpopulations with equal variances.
643 * To perform the test without the equal variances assumption, use
644 * {@link #tTest(double[], double[])}.</p>
645 * <p>
646 * The number returned is the smallest significance level
647 * at which one can reject the null hypothesis that the two means are
648 * equal in favor of the two-sided alternative that they are different.
649 * For a one-sided test, divide the returned value by 2.</p>
650 * <p>
651 * A pooled variance estimate is used to compute the t-statistic. See
652 * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
653 * minus 2 is used as the degrees of freedom.</p>
654 * <p>
655 * <strong>Usage Note:</strong><br>
656 * The validity of the p-value depends on the assumptions of the parametric
657 * t-test procedure, as discussed
658 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
659 * here</a></p>
660 * <p>
661 * <strong>Preconditions</strong>: <ul>
662 * <li>The observed array lengths must both be at least 2.
663 * </li></ul></p>
664 *
665 * @param sample1 array of sample data values
666 * @param sample2 array of sample data values
667 * @return p-value for t-test
668 * @throws NullArgumentException if the arrays are <code>null</code>
669 * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
670 * @throws MaxCountExceededException if an error occurs computing the p-value
671 */
672 public double homoscedasticTTest(final double[] sample1, final double[] sample2)
673 throws NullArgumentException, NumberIsTooSmallException,
674 MaxCountExceededException {
675
676 checkSampleData(sample1);
677 checkSampleData(sample2);
678 // No try-catch or advertised exception because args have just been checked
679 return homoscedasticTTest(StatUtils.mean(sample1),
680 StatUtils.mean(sample2),
681 StatUtils.variance(sample1),
682 StatUtils.variance(sample2),
683 sample1.length, sample2.length);
684
685 }
686
687 /**
688 * Performs a
689 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
690 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
691 * and <code>sample2</code> are drawn from populations with the same mean,
692 * with significance level <code>alpha</code>. This test does not assume
693 * that the subpopulation variances are equal. To perform the test assuming
694 * equal variances, use
695 * {@link #homoscedasticTTest(double[], double[], double)}.
696 * <p>
697 * Returns <code>true</code> iff the null hypothesis that the means are
698 * equal can be rejected with confidence <code>1 - alpha</code>. To
699 * perform a 1-sided test, use <code>alpha * 2</code></p>
700 * <p>
701 * See {@link #t(double[], double[])} for the formula used to compute the
702 * t-statistic. Degrees of freedom are approximated using the
703 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
704 * Welch-Satterthwaite approximation.</a></p>
705 * <p>
706 * <strong>Examples:</strong><br><ol>
707 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
708 * the 95% level, use
709 * <br><code>tTest(sample1, sample2, 0.05). </code>
710 * </li>
711 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>,
712 * at the 99% level, first verify that the measured mean of <code>sample 1</code>
713 * is less than the mean of <code>sample 2</code> and then use
714 * <br><code>tTest(sample1, sample2, 0.02) </code>
715 * </li></ol></p>
716 * <p>
717 * <strong>Usage Note:</strong><br>
718 * The validity of the test depends on the assumptions of the parametric
719 * t-test procedure, as discussed
720 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
721 * here</a></p>
722 * <p>
723 * <strong>Preconditions</strong>: <ul>
724 * <li>The observed array lengths must both be at least 2.
725 * </li>
726 * <li> <code> 0 < alpha < 0.5 </code>
727 * </li></ul></p>
728 *
729 * @param sample1 array of sample data values
730 * @param sample2 array of sample data values
731 * @param alpha significance level of the test
732 * @return true if the null hypothesis can be rejected with
733 * confidence 1 - alpha
734 * @throws NullArgumentException if the arrays are <code>null</code>
735 * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
736 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
737 * @throws MaxCountExceededException if an error occurs computing the p-value
738 */
739 public boolean tTest(final double[] sample1, final double[] sample2,
740 final double alpha)
741 throws NullArgumentException, NumberIsTooSmallException,
742 OutOfRangeException, MaxCountExceededException {
743
744 checkSignificanceLevel(alpha);
745 return tTest(sample1, sample2) < alpha;
746
747 }
748
749 /**
750 * Performs a
751 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
752 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
753 * and <code>sample2</code> are drawn from populations with the same mean,
754 * with significance level <code>alpha</code>, assuming that the
755 * subpopulation variances are equal. Use
756 * {@link #tTest(double[], double[], double)} to perform the test without
757 * the assumption of equal variances.
758 * <p>
759 * Returns <code>true</code> iff the null hypothesis that the means are
760 * equal can be rejected with confidence <code>1 - alpha</code>. To
761 * perform a 1-sided test, use <code>alpha * 2.</code> To perform the test
762 * without the assumption of equal subpopulation variances, use
763 * {@link #tTest(double[], double[], double)}.</p>
764 * <p>
765 * A pooled variance estimate is used to compute the t-statistic. See
766 * {@link #t(double[], double[])} for the formula. The sum of the sample
767 * sizes minus 2 is used as the degrees of freedom.</p>
768 * <p>
769 * <strong>Examples:</strong><br><ol>
770 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
771 * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
772 * </li>
773 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
774 * at the 99% level, first verify that the measured mean of
775 * <code>sample 1</code> is less than the mean of <code>sample 2</code>
776 * and then use
777 * <br><code>tTest(sample1, sample2, 0.02) </code>
778 * </li></ol></p>
779 * <p>
780 * <strong>Usage Note:</strong><br>
781 * The validity of the test depends on the assumptions of the parametric
782 * t-test procedure, as discussed
783 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
784 * here</a></p>
785 * <p>
786 * <strong>Preconditions</strong>: <ul>
787 * <li>The observed array lengths must both be at least 2.
788 * </li>
789 * <li> <code> 0 < alpha < 0.5 </code>
790 * </li></ul></p>
791 *
792 * @param sample1 array of sample data values
793 * @param sample2 array of sample data values
794 * @param alpha significance level of the test
795 * @return true if the null hypothesis can be rejected with
796 * confidence 1 - alpha
797 * @throws NullArgumentException if the arrays are <code>null</code>
798 * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
799 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
800 * @throws MaxCountExceededException if an error occurs computing the p-value
801 */
802 public boolean homoscedasticTTest(final double[] sample1, final double[] sample2,
803 final double alpha)
804 throws NullArgumentException, NumberIsTooSmallException,
805 OutOfRangeException, MaxCountExceededException {
806
807 checkSignificanceLevel(alpha);
808 return homoscedasticTTest(sample1, sample2) < alpha;
809
810 }
811
812 /**
813 * Returns the <i>observed significance level</i>, or
814 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
815 * comparing the means of the datasets described by two StatisticalSummary
816 * instances.
817 * <p>
818 * The number returned is the smallest significance level
819 * at which one can reject the null hypothesis that the two means are
820 * equal in favor of the two-sided alternative that they are different.
821 * For a one-sided test, divide the returned value by 2.</p>
822 * <p>
823 * The test does not assume that the underlying population variances are
824 * equal and it uses approximated degrees of freedom computed from the
825 * sample data to compute the p-value. To perform the test assuming
826 * equal variances, use
827 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
828 * <p>
829 * <strong>Usage Note:</strong><br>
830 * The validity of the p-value depends on the assumptions of the parametric
831 * t-test procedure, as discussed
832 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
833 * here</a></p>
834 * <p>
835 * <strong>Preconditions</strong>: <ul>
836 * <li>The datasets described by the two Univariates must each contain
837 * at least 2 observations.
838 * </li></ul></p>
839 *
840 * @param sampleStats1 StatisticalSummary describing data from the first sample
841 * @param sampleStats2 StatisticalSummary describing data from the second sample
842 * @return p-value for t-test
843 * @throws NullArgumentException if the sample statistics are <code>null</code>
844 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
845 * @throws MaxCountExceededException if an error occurs computing the p-value
846 */
847 public double tTest(final StatisticalSummary sampleStats1,
848 final StatisticalSummary sampleStats2)
849 throws NullArgumentException, NumberIsTooSmallException,
850 MaxCountExceededException {
851
852 checkSampleData(sampleStats1);
853 checkSampleData(sampleStats2);
854 return tTest(sampleStats1.getMean(), sampleStats2.getMean(),
855 sampleStats1.getVariance(), sampleStats2.getVariance(),
856 sampleStats1.getN(), sampleStats2.getN());
857
858 }
859
860 /**
861 * Returns the <i>observed significance level</i>, or
862 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
863 * comparing the means of the datasets described by two StatisticalSummary
864 * instances, under the hypothesis of equal subpopulation variances. To
865 * perform a test without the equal variances assumption, use
866 * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
867 * <p>
868 * The number returned is the smallest significance level
869 * at which one can reject the null hypothesis that the two means are
870 * equal in favor of the two-sided alternative that they are different.
871 * For a one-sided test, divide the returned value by 2.</p>
872 * <p>
873 * See {@link #homoscedasticT(double[], double[])} for the formula used to
874 * compute the t-statistic. The sum of the sample sizes minus 2 is used as
875 * the degrees of freedom.</p>
876 * <p>
877 * <strong>Usage Note:</strong><br>
878 * The validity of the p-value depends on the assumptions of the parametric
879 * t-test procedure, as discussed
880 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
881 * </p><p>
882 * <strong>Preconditions</strong>: <ul>
883 * <li>The datasets described by the two Univariates must each contain
884 * at least 2 observations.
885 * </li></ul></p>
886 *
887 * @param sampleStats1 StatisticalSummary describing data from the first sample
888 * @param sampleStats2 StatisticalSummary describing data from the second sample
889 * @return p-value for t-test
890 * @throws NullArgumentException if the sample statistics are <code>null</code>
891 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
892 * @throws MaxCountExceededException if an error occurs computing the p-value
893 */
894 public double homoscedasticTTest(final StatisticalSummary sampleStats1,
895 final StatisticalSummary sampleStats2)
896 throws NullArgumentException, NumberIsTooSmallException,
897 MaxCountExceededException {
898
899 checkSampleData(sampleStats1);
900 checkSampleData(sampleStats2);
901 return homoscedasticTTest(sampleStats1.getMean(),
902 sampleStats2.getMean(),
903 sampleStats1.getVariance(),
904 sampleStats2.getVariance(),
905 sampleStats1.getN(), sampleStats2.getN());
906
907 }
908
909 /**
910 * Performs a
911 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
912 * two-sided t-test</a> evaluating the null hypothesis that
913 * <code>sampleStats1</code> and <code>sampleStats2</code> describe
914 * datasets drawn from populations with the same mean, with significance
915 * level <code>alpha</code>. This test does not assume that the
916 * subpopulation variances are equal. To perform the test under the equal
917 * variances assumption, use
918 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
919 * <p>
920 * Returns <code>true</code> iff the null hypothesis that the means are
921 * equal can be rejected with confidence <code>1 - alpha</code>. To
922 * perform a 1-sided test, use <code>alpha * 2</code></p>
923 * <p>
924 * See {@link #t(double[], double[])} for the formula used to compute the
925 * t-statistic. Degrees of freedom are approximated using the
926 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
927 * Welch-Satterthwaite approximation.</a></p>
928 * <p>
929 * <strong>Examples:</strong><br><ol>
930 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
931 * the 95%, use
932 * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
933 * </li>
934 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
935 * at the 99% level, first verify that the measured mean of
936 * <code>sample 1</code> is less than the mean of <code>sample 2</code>
937 * and then use
938 * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
939 * </li></ol></p>
940 * <p>
941 * <strong>Usage Note:</strong><br>
942 * The validity of the test depends on the assumptions of the parametric
943 * t-test procedure, as discussed
944 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
945 * here</a></p>
946 * <p>
947 * <strong>Preconditions</strong>: <ul>
948 * <li>The datasets described by the two Univariates must each contain
949 * at least 2 observations.
950 * </li>
951 * <li> <code> 0 < alpha < 0.5 </code>
952 * </li></ul></p>
953 *
954 * @param sampleStats1 StatisticalSummary describing sample data values
955 * @param sampleStats2 StatisticalSummary describing sample data values
956 * @param alpha significance level of the test
957 * @return true if the null hypothesis can be rejected with
958 * confidence 1 - alpha
959 * @throws NullArgumentException if the sample statistics are <code>null</code>
960 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
961 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
962 * @throws MaxCountExceededException if an error occurs computing the p-value
963 */
964 public boolean tTest(final StatisticalSummary sampleStats1,
965 final StatisticalSummary sampleStats2,
966 final double alpha)
967 throws NullArgumentException, NumberIsTooSmallException,
968 OutOfRangeException, MaxCountExceededException {
969
970 checkSignificanceLevel(alpha);
971 return tTest(sampleStats1, sampleStats2) < alpha;
972
973 }
974
975 //----------------------------------------------- Protected methods
976
977 /**
978 * Computes approximate degrees of freedom for 2-sample t-test.
979 *
980 * @param v1 first sample variance
981 * @param v2 second sample variance
982 * @param n1 first sample n
983 * @param n2 second sample n
984 * @return approximate degrees of freedom
985 */
986 protected double df(double v1, double v2, double n1, double n2) {
987 return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
988 ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
989 (n2 * n2 * (n2 - 1d)));
990 }
991
992 /**
993 * Computes t test statistic for 1-sample t-test.
994 *
995 * @param m sample mean
996 * @param mu constant to test against
997 * @param v sample variance
998 * @param n sample n
999 * @return t test statistic
1000 */
1001 protected double t(final double m, final double mu,
1002 final double v, final double n) {
1003 return (m - mu) / FastMath.sqrt(v / n);
1004 }
1005
1006 /**
1007 * Computes t test statistic for 2-sample t-test.
1008 * <p>
1009 * Does not assume that subpopulation variances are equal.</p>
1010 *
1011 * @param m1 first sample mean
1012 * @param m2 second sample mean
1013 * @param v1 first sample variance
1014 * @param v2 second sample variance
1015 * @param n1 first sample n
1016 * @param n2 second sample n
1017 * @return t test statistic
1018 */
1019 protected double t(final double m1, final double m2,
1020 final double v1, final double v2,
1021 final double n1, final double n2) {
1022 return (m1 - m2) / FastMath.sqrt((v1 / n1) + (v2 / n2));
1023 }
1024
1025 /**
1026 * Computes t test statistic for 2-sample t-test under the hypothesis
1027 * of equal subpopulation variances.
1028 *
1029 * @param m1 first sample mean
1030 * @param m2 second sample mean
1031 * @param v1 first sample variance
1032 * @param v2 second sample variance
1033 * @param n1 first sample n
1034 * @param n2 second sample n
1035 * @return t test statistic
1036 */
1037 protected double homoscedasticT(final double m1, final double m2,
1038 final double v1, final double v2,
1039 final double n1, final double n2) {
1040 final double pooledVariance = ((n1 - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
1041 return (m1 - m2) / FastMath.sqrt(pooledVariance * (1d / n1 + 1d / n2));
1042 }
1043
1044 /**
1045 * Computes p-value for 2-sided, 1-sample t-test.
1046 *
1047 * @param m sample mean
1048 * @param mu constant to test against
1049 * @param v sample variance
1050 * @param n sample n
1051 * @return p-value
1052 * @throws MaxCountExceededException if an error occurs computing the p-value
1053 * @throws MathIllegalArgumentException if n is not greater than 1
1054 */
1055 protected double tTest(final double m, final double mu,
1056 final double v, final double n)
1057 throws MaxCountExceededException, MathIllegalArgumentException {
1058
1059 final double t = FastMath.abs(t(m, mu, v, n));
1060 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1061 final TDistribution distribution = new TDistribution(null, n - 1);
1062 return 2.0 * distribution.cumulativeProbability(-t);
1063
1064 }
1065
1066 /**
1067 * Computes p-value for 2-sided, 2-sample t-test.
1068 * <p>
1069 * Does not assume subpopulation variances are equal. Degrees of freedom
1070 * are estimated from the data.</p>
1071 *
1072 * @param m1 first sample mean
1073 * @param m2 second sample mean
1074 * @param v1 first sample variance
1075 * @param v2 second sample variance
1076 * @param n1 first sample n
1077 * @param n2 second sample n
1078 * @return p-value
1079 * @throws MaxCountExceededException if an error occurs computing the p-value
1080 * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1081 * strictly positive
1082 */
1083 protected double tTest(final double m1, final double m2,
1084 final double v1, final double v2,
1085 final double n1, final double n2)
1086 throws MaxCountExceededException, NotStrictlyPositiveException {
1087
1088 final double t = FastMath.abs(t(m1, m2, v1, v2, n1, n2));
1089 final double degreesOfFreedom = df(v1, v2, n1, n2);
1090 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1091 final TDistribution distribution = new TDistribution(null, degreesOfFreedom);
1092 return 2.0 * distribution.cumulativeProbability(-t);
1093
1094 }
1095
1096 /**
1097 * Computes p-value for 2-sided, 2-sample t-test, under the assumption
1098 * of equal subpopulation variances.
1099 * <p>
1100 * The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
1101 *
1102 * @param m1 first sample mean
1103 * @param m2 second sample mean
1104 * @param v1 first sample variance
1105 * @param v2 second sample variance
1106 * @param n1 first sample n
1107 * @param n2 second sample n
1108 * @return p-value
1109 * @throws MaxCountExceededException if an error occurs computing the p-value
1110 * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1111 * strictly positive
1112 */
1113 protected double homoscedasticTTest(double m1, double m2,
1114 double v1, double v2,
1115 double n1, double n2)
1116 throws MaxCountExceededException, NotStrictlyPositiveException {
1117
1118 final double t = FastMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
1119 final double degreesOfFreedom = n1 + n2 - 2;
1120 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1121 final TDistribution distribution = new TDistribution(null, degreesOfFreedom);
1122 return 2.0 * distribution.cumulativeProbability(-t);
1123
1124 }
1125
1126 /**
1127 * Check significance level.
1128 *
1129 * @param alpha significance level
1130 * @throws OutOfRangeException if the significance level is out of bounds.
1131 */
1132 private void checkSignificanceLevel(final double alpha)
1133 throws OutOfRangeException {
1134
1135 if (alpha <= 0 || alpha > 0.5) {
1136 throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL,
1137 alpha, 0.0, 0.5);
1138 }
1139
1140 }
1141
1142 /**
1143 * Check sample data.
1144 *
1145 * @param data Sample data.
1146 * @throws NullArgumentException if {@code data} is {@code null}.
1147 * @throws NumberIsTooSmallException if there is not enough sample data.
1148 */
1149 private void checkSampleData(final double[] data)
1150 throws NullArgumentException, NumberIsTooSmallException {
1151
1152 if (data == null) {
1153 throw new NullArgumentException();
1154 }
1155 if (data.length < 2) {
1156 throw new NumberIsTooSmallException(
1157 LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1158 data.length, 2, true);
1159 }
1160
1161 }
1162
1163 /**
1164 * Check sample data.
1165 *
1166 * @param stat Statistical summary.
1167 * @throws NullArgumentException if {@code data} is {@code null}.
1168 * @throws NumberIsTooSmallException if there is not enough sample data.
1169 */
1170 private void checkSampleData(final StatisticalSummary stat)
1171 throws NullArgumentException, NumberIsTooSmallException {
1172
1173 if (stat == null) {
1174 throw new NullArgumentException();
1175 }
1176 if (stat.getN() < 2) {
1177 throw new NumberIsTooSmallException(
1178 LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1179 stat.getN(), 2, true);
1180 }
1181
1182 }
1183
1184}
Note: See TracBrowser for help on using the repository browser.