Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

TTest.java

Last change on this file was 204, checked in by Katsuhide Fujita, 6 years ago
Fixed errors of ANAC2019 agents
Property svn:executable set to ``*
File size: 52.7 KB

Line
1	/*
2	* Licensed to the Apache Software Foundation (ASF) under one or more
3	* contributor license agreements. See the NOTICE file distributed with
4	* this work for additional information regarding copyright ownership.
5	* The ASF licenses this file to You under the Apache License, Version 2.0
6	* (the "License"); you may not use this file except in compliance with
7	* the License. You may obtain a copy of the License at
8	*
9	* http://www.apache.org/licenses/LICENSE-2.0
10	*
11	* Unless required by applicable law or agreed to in writing, software
12	* distributed under the License is distributed on an "AS IS" BASIS,
13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14	* See the License for the specific language governing permissions and
15	* limitations under the License.
16	*/
17	package agents.anac.y2019.harddealer.math3.stat.inference;
18
19	import agents.anac.y2019.harddealer.math3.distribution.TDistribution;
20	import agents.anac.y2019.harddealer.math3.exception.DimensionMismatchException;
21	import agents.anac.y2019.harddealer.math3.exception.MathIllegalArgumentException;
22	import agents.anac.y2019.harddealer.math3.exception.MaxCountExceededException;
23	import agents.anac.y2019.harddealer.math3.exception.NoDataException;
24	import agents.anac.y2019.harddealer.math3.exception.NotStrictlyPositiveException;
25	import agents.anac.y2019.harddealer.math3.exception.NullArgumentException;
26	import agents.anac.y2019.harddealer.math3.exception.NumberIsTooSmallException;
27	import agents.anac.y2019.harddealer.math3.exception.OutOfRangeException;
28	import agents.anac.y2019.harddealer.math3.exception.util.LocalizedFormats;
29	import agents.anac.y2019.harddealer.math3.stat.StatUtils;
30	import agents.anac.y2019.harddealer.math3.stat.descriptive.StatisticalSummary;
31	import agents.anac.y2019.harddealer.math3.util.FastMath;
32
33	/**
34	* An implementation for Student's t-tests.
35	* <p>
36	* Tests can be:<ul>
37	* <li>One-sample or two-sample</li>
38	* <li>One-sided or two-sided</li>
39	* <li>Paired or unpaired (for two-sample tests)</li>
40	* <li>Homoscedastic (equal variance assumption) or heteroscedastic
41	* (for two sample tests)</li>
42	* <li>Fixed significance level (boolean-valued) or returning p-values.
43	* </li></ul></p>
44	* <p>
45	* Test statistics are available for all tests. Methods including "Test" in
46	* in their names perform tests, all other methods return t-statistics. Among
47	* the "Test" methods, <code>double-</code>valued methods return p-values;
48	* <code>boolean-</code>valued methods perform fixed significance level tests.
49	* Significance levels are always specified as numbers between 0 and 0.5
50	* (e.g. tests at the 95% level use <code>alpha=0.05</code>).</p>
51	* <p>
52	* Input to tests can be either <code>double[]</code> arrays or
53	* {@link StatisticalSummary} instances.</p><p>
54	* Uses commons-math {@link agents.anac.y2019.harddealer.math3.distribution.TDistribution}
55	* implementation to estimate exact p-values.</p>
56	*
57	*/
58	public class TTest {
59	/**
60	* Computes a paired, 2-sample t-statistic based on the data in the input
61	* arrays. The t-statistic returned is equivalent to what would be returned by
62	* computing the one-sample t-statistic {@link #t(double, double[])}, with
63	* <code>mu = 0</code> and the sample array consisting of the (signed)
64	* differences between corresponding entries in <code>sample1</code> and
65	* <code>sample2.</code>
66	* <p>
67	* <strong>Preconditions</strong>: <ul>
68	* <li>The input arrays must have the same length and their common length
69	* must be at least 2.
70	* </li></ul></p>
71	*
72	* @param sample1 array of sample data values
73	* @param sample2 array of sample data values
74	* @return t statistic
75	* @throws NullArgumentException if the arrays are <code>null</code>
76	* @throws NoDataException if the arrays are empty
77	* @throws DimensionMismatchException if the length of the arrays is not equal
78	* @throws NumberIsTooSmallException if the length of the arrays is < 2
79	*/
80	public double pairedT(final double[] sample1, final double[] sample2)
81	throws NullArgumentException, NoDataException,
82	DimensionMismatchException, NumberIsTooSmallException {
83
84	checkSampleData(sample1);
85	checkSampleData(sample2);
86	double meanDifference = StatUtils.meanDifference(sample1, sample2);
87	return t(meanDifference, 0,
88	StatUtils.varianceDifference(sample1, sample2, meanDifference),
89	sample1.length);
90
91	}
92
93	/**
94	* Returns the <i>observed significance level</i>, or
95	* <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
96	* based on the data in the input arrays.
97	* <p>
98	* The number returned is the smallest significance level
99	* at which one can reject the null hypothesis that the mean of the paired
100	* differences is 0 in favor of the two-sided alternative that the mean paired
101	* difference is not equal to 0. For a one-sided test, divide the returned
102	* value by 2.</p>
103	* <p>
104	* This test is equivalent to a one-sample t-test computed using
105	* {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
106	* array consisting of the signed differences between corresponding elements of
107	* <code>sample1</code> and <code>sample2.</code></p>
108	* <p>
109	* <strong>Usage Note:</strong><br>
110	* The validity of the p-value depends on the assumptions of the parametric
111	* t-test procedure, as discussed
112	* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
113	* here</a></p>
114	* <p>
115	* <strong>Preconditions</strong>: <ul>
116	* <li>The input array lengths must be the same and their common length must
117	* be at least 2.
118	* </li></ul></p>
119	*
120	* @param sample1 array of sample data values
121	* @param sample2 array of sample data values
122	* @return p-value for t-test
123	* @throws NullArgumentException if the arrays are <code>null</code>
124	* @throws NoDataException if the arrays are empty
125	* @throws DimensionMismatchException if the length of the arrays is not equal
126	* @throws NumberIsTooSmallException if the length of the arrays is < 2
127	* @throws MaxCountExceededException if an error occurs computing the p-value
128	*/
129	public double pairedTTest(final double[] sample1, final double[] sample2)
130	throws NullArgumentException, NoDataException, DimensionMismatchException,
131	NumberIsTooSmallException, MaxCountExceededException {
132
133	double meanDifference = StatUtils.meanDifference(sample1, sample2);
134	return tTest(meanDifference, 0,
135	StatUtils.varianceDifference(sample1, sample2, meanDifference),
136	sample1.length);
137
138	}
139
140	/**
141	* Performs a paired t-test evaluating the null hypothesis that the
142	* mean of the paired differences between <code>sample1</code> and
143	* <code>sample2</code> is 0 in favor of the two-sided alternative that the
144	* mean paired difference is not equal to 0, with significance level
145	* <code>alpha</code>.
146	* <p>
147	* Returns <code>true</code> iff the null hypothesis can be rejected with
148	* confidence <code>1 - alpha</code>. To perform a 1-sided test, use
149	* <code>alpha * 2</code></p>
150	* <p>
151	* <strong>Usage Note:</strong><br>
152	* The validity of the test depends on the assumptions of the parametric
153	* t-test procedure, as discussed
154	* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
155	* here</a></p>
156	* <p>
157	* <strong>Preconditions</strong>: <ul>
158	* <li>The input array lengths must be the same and their common length
159	* must be at least 2.
160	* </li>
161	* <li> <code> 0 < alpha < 0.5 </code>
162	* </li></ul></p>
163	*
164	* @param sample1 array of sample data values
165	* @param sample2 array of sample data values
166	* @param alpha significance level of the test
167	* @return true if the null hypothesis can be rejected with
168	* confidence 1 - alpha
169	* @throws NullArgumentException if the arrays are <code>null</code>
170	* @throws NoDataException if the arrays are empty
171	* @throws DimensionMismatchException if the length of the arrays is not equal
172	* @throws NumberIsTooSmallException if the length of the arrays is < 2
173	* @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
174	* @throws MaxCountExceededException if an error occurs computing the p-value
175	*/
176	public boolean pairedTTest(final double[] sample1, final double[] sample2,
177	final double alpha)
178	throws NullArgumentException, NoDataException, DimensionMismatchException,
179	NumberIsTooSmallException, OutOfRangeException, MaxCountExceededException {
180
181	checkSignificanceLevel(alpha);
182	return pairedTTest(sample1, sample2) < alpha;
183
184	}
185
186	/**
187	* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
188	* t statistic </a> given observed values and a comparison constant.
189	* <p>
190	* This statistic can be used to perform a one sample t-test for the mean.
191	* </p><p>
192	* <strong>Preconditions</strong>: <ul>
193	* <li>The observed array length must be at least 2.
194	* </li></ul></p>
195	*
196	* @param mu comparison constant
197	* @param observed array of values
198	* @return t statistic
199	* @throws NullArgumentException if <code>observed</code> is <code>null</code>
200	* @throws NumberIsTooSmallException if the length of <code>observed</code> is < 2
201	*/
202	public double t(final double mu, final double[] observed)
203	throws NullArgumentException, NumberIsTooSmallException {
204
205	checkSampleData(observed);
206	// No try-catch or advertised exception because args have just been checked
207	return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
208	observed.length);
209
210	}
211
212	/**
213	* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
214	* t statistic </a> to use in comparing the mean of the dataset described by
215	* <code>sampleStats</code> to <code>mu</code>.
216	* <p>
217	* This statistic can be used to perform a one sample t-test for the mean.
218	* </p><p>
219	* <strong>Preconditions</strong>: <ul>
220	* <li><code>observed.getN() ≥ 2</code>.
221	* </li></ul></p>
222	*
223	* @param mu comparison constant
224	* @param sampleStats DescriptiveStatistics holding sample summary statitstics
225	* @return t statistic
226	* @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
227	* @throws NumberIsTooSmallException if the number of samples is < 2
228	*/
229	public double t(final double mu, final StatisticalSummary sampleStats)
230	throws NullArgumentException, NumberIsTooSmallException {
231
232	checkSampleData(sampleStats);
233	return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
234	sampleStats.getN());
235
236	}
237
238	/**
239	* Computes a 2-sample t statistic, under the hypothesis of equal
240	* subpopulation variances. To compute a t-statistic without the
241	* equal variances hypothesis, use {@link #t(double[], double[])}.
242	* <p>
243	* This statistic can be used to perform a (homoscedastic) two-sample
244	* t-test to compare sample means.</p>
245	* <p>
246	* The t-statistic is</p>
247	* <p>
248	*   <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
249	* </p><p>
250	* where <strong><code>n1</code></strong> is the size of first sample;
251	* <strong><code> n2</code></strong> is the size of second sample;
252	* <strong><code> m1</code></strong> is the mean of first sample;
253	* <strong><code> m2</code></strong> is the mean of second sample</li>
254	* </ul>
255	* and <strong><code>var</code></strong> is the pooled variance estimate:
256	* </p><p>
257	* <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
258	* </p><p>
259	* with <strong><code>var1</code></strong> the variance of the first sample and
260	* <strong><code>var2</code></strong> the variance of the second sample.
261	* </p><p>
262	* <strong>Preconditions</strong>: <ul>
263	* <li>The observed array lengths must both be at least 2.
264	* </li></ul></p>
265	*
266	* @param sample1 array of sample data values
267	* @param sample2 array of sample data values
268	* @return t statistic
269	* @throws NullArgumentException if the arrays are <code>null</code>
270	* @throws NumberIsTooSmallException if the length of the arrays is < 2
271	*/
272	public double homoscedasticT(final double[] sample1, final double[] sample2)
273	throws NullArgumentException, NumberIsTooSmallException {
274
275	checkSampleData(sample1);
276	checkSampleData(sample2);
277	// No try-catch or advertised exception because args have just been checked
278	return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
279	StatUtils.variance(sample1), StatUtils.variance(sample2),
280	sample1.length, sample2.length);
281
282	}
283
284	/**
285	* Computes a 2-sample t statistic, without the hypothesis of equal
286	* subpopulation variances. To compute a t-statistic assuming equal
287	* variances, use {@link #homoscedasticT(double[], double[])}.
288	* <p>
289	* This statistic can be used to perform a two-sample t-test to compare
290	* sample means.</p>
291	* <p>
292	* The t-statistic is</p>
293	* <p>
294	*    <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
295	* </p><p>
296	* where <strong><code>n1</code></strong> is the size of the first sample
297	* <strong><code> n2</code></strong> is the size of the second sample;
298	* <strong><code> m1</code></strong> is the mean of the first sample;
299	* <strong><code> m2</code></strong> is the mean of the second sample;
300	* <strong><code> var1</code></strong> is the variance of the first sample;
301	* <strong><code> var2</code></strong> is the variance of the second sample;
302	* </p><p>
303	* <strong>Preconditions</strong>: <ul>
304	* <li>The observed array lengths must both be at least 2.
305	* </li></ul></p>
306	*
307	* @param sample1 array of sample data values
308	* @param sample2 array of sample data values
309	* @return t statistic
310	* @throws NullArgumentException if the arrays are <code>null</code>
311	* @throws NumberIsTooSmallException if the length of the arrays is < 2
312	*/
313	public double t(final double[] sample1, final double[] sample2)
314	throws NullArgumentException, NumberIsTooSmallException {
315
316	checkSampleData(sample1);
317	checkSampleData(sample2);
318	// No try-catch or advertised exception because args have just been checked
319	return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
320	StatUtils.variance(sample1), StatUtils.variance(sample2),
321	sample1.length, sample2.length);
322
323	}
324
325	/**
326	* Computes a 2-sample t statistic </a>, comparing the means of the datasets
327	* described by two {@link StatisticalSummary} instances, without the
328	* assumption of equal subpopulation variances. Use
329	* {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
330	* compute a t-statistic under the equal variances assumption.
331	* <p>
332	* This statistic can be used to perform a two-sample t-test to compare
333	* sample means.</p>
334	* <p>
335	* The returned t-statistic is</p>
336	* <p>
337	*    <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
338	* </p><p>
339	* where <strong><code>n1</code></strong> is the size of the first sample;
340	* <strong><code> n2</code></strong> is the size of the second sample;
341	* <strong><code> m1</code></strong> is the mean of the first sample;
342	* <strong><code> m2</code></strong> is the mean of the second sample
343	* <strong><code> var1</code></strong> is the variance of the first sample;
344	* <strong><code> var2</code></strong> is the variance of the second sample
345	* </p><p>
346	* <strong>Preconditions</strong>: <ul>
347	* <li>The datasets described by the two Univariates must each contain
348	* at least 2 observations.
349	* </li></ul></p>
350	*
351	* @param sampleStats1 StatisticalSummary describing data from the first sample
352	* @param sampleStats2 StatisticalSummary describing data from the second sample
353	* @return t statistic
354	* @throws NullArgumentException if the sample statistics are <code>null</code>
355	* @throws NumberIsTooSmallException if the number of samples is < 2
356	*/
357	public double t(final StatisticalSummary sampleStats1,
358	final StatisticalSummary sampleStats2)
359	throws NullArgumentException, NumberIsTooSmallException {
360
361	checkSampleData(sampleStats1);
362	checkSampleData(sampleStats2);
363	return t(sampleStats1.getMean(), sampleStats2.getMean(),
364	sampleStats1.getVariance(), sampleStats2.getVariance(),
365	sampleStats1.getN(), sampleStats2.getN());
366
367	}
368
369	/**
370	* Computes a 2-sample t statistic, comparing the means of the datasets
371	* described by two {@link StatisticalSummary} instances, under the
372	* assumption of equal subpopulation variances. To compute a t-statistic
373	* without the equal variances assumption, use
374	* {@link #t(StatisticalSummary, StatisticalSummary)}.
375	* <p>
376	* This statistic can be used to perform a (homoscedastic) two-sample
377	* t-test to compare sample means.</p>
378	* <p>
379	* The t-statistic returned is</p>
380	* <p>
381	*   <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
382	* </p><p>
383	* where <strong><code>n1</code></strong> is the size of first sample;
384	* <strong><code> n2</code></strong> is the size of second sample;
385	* <strong><code> m1</code></strong> is the mean of first sample;
386	* <strong><code> m2</code></strong> is the mean of second sample
387	* and <strong><code>var</code></strong> is the pooled variance estimate:
388	* </p><p>
389	* <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
390	* </p><p>
391	* with <strong><code>var1</code></strong> the variance of the first sample and
392	* <strong><code>var2</code></strong> the variance of the second sample.
393	* </p><p>
394	* <strong>Preconditions</strong>: <ul>
395	* <li>The datasets described by the two Univariates must each contain
396	* at least 2 observations.
397	* </li></ul></p>
398	*
399	* @param sampleStats1 StatisticalSummary describing data from the first sample
400	* @param sampleStats2 StatisticalSummary describing data from the second sample
401	* @return t statistic
402	* @throws NullArgumentException if the sample statistics are <code>null</code>
403	* @throws NumberIsTooSmallException if the number of samples is < 2
404	*/
405	public double homoscedasticT(final StatisticalSummary sampleStats1,
406	final StatisticalSummary sampleStats2)
407	throws NullArgumentException, NumberIsTooSmallException {
408
409	checkSampleData(sampleStats1);
410	checkSampleData(sampleStats2);
411	return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
412	sampleStats1.getVariance(), sampleStats2.getVariance(),
413	sampleStats1.getN(), sampleStats2.getN());
414
415	}
416
417	/**
418	* Returns the <i>observed significance level</i>, or
419	* <i>p-value</i>, associated with a one-sample, two-tailed t-test
420	* comparing the mean of the input array with the constant <code>mu</code>.
421	* <p>
422	* The number returned is the smallest significance level
423	* at which one can reject the null hypothesis that the mean equals
424	* <code>mu</code> in favor of the two-sided alternative that the mean
425	* is different from <code>mu</code>. For a one-sided test, divide the
426	* returned value by 2.</p>
427	* <p>
428	* <strong>Usage Note:</strong><br>
429	* The validity of the test depends on the assumptions of the parametric
430	* t-test procedure, as discussed
431	* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
432	* </p><p>
433	* <strong>Preconditions</strong>: <ul>
434	* <li>The observed array length must be at least 2.
435	* </li></ul></p>
436	*
437	* @param mu constant value to compare sample mean against
438	* @param sample array of sample data values
439	* @return p-value
440	* @throws NullArgumentException if the sample array is <code>null</code>
441	* @throws NumberIsTooSmallException if the length of the array is < 2
442	* @throws MaxCountExceededException if an error occurs computing the p-value
443	*/
444	public double tTest(final double mu, final double[] sample)
445	throws NullArgumentException, NumberIsTooSmallException,
446	MaxCountExceededException {
447
448	checkSampleData(sample);
449	// No try-catch or advertised exception because args have just been checked
450	return tTest(StatUtils.mean(sample), mu, StatUtils.variance(sample),
451	sample.length);
452
453	}
454
455	/**
456	* Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
457	* two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
458	* which <code>sample</code> is drawn equals <code>mu</code>.
459	* <p>
460	* Returns <code>true</code> iff the null hypothesis can be
461	* rejected with confidence <code>1 - alpha</code>. To
462	* perform a 1-sided test, use <code>alpha * 2</code></p>
463	* <p>
464	* <strong>Examples:</strong><br><ol>
465	* <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
466	* the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
467	* </li>
468	* <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
469	* at the 99% level, first verify that the measured sample mean is less
470	* than <code>mu</code> and then use
471	* <br><code>tTest(mu, sample, 0.02) </code>
472	* </li></ol></p>
473	* <p>
474	* <strong>Usage Note:</strong><br>
475	* The validity of the test depends on the assumptions of the one-sample
476	* parametric t-test procedure, as discussed
477	* <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
478	* </p><p>
479	* <strong>Preconditions</strong>: <ul>
480	* <li>The observed array length must be at least 2.
481	* </li></ul></p>
482	*
483	* @param mu constant value to compare sample mean against
484	* @param sample array of sample data values
485	* @param alpha significance level of the test
486	* @return p-value
487	* @throws NullArgumentException if the sample array is <code>null</code>
488	* @throws NumberIsTooSmallException if the length of the array is < 2
489	* @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
490	* @throws MaxCountExceededException if an error computing the p-value
491	*/
492	public boolean tTest(final double mu, final double[] sample, final double alpha)
493	throws NullArgumentException, NumberIsTooSmallException,
494	OutOfRangeException, MaxCountExceededException {
495
496	checkSignificanceLevel(alpha);
497	return tTest(mu, sample) < alpha;
498
499	}
500
501	/**
502	* Returns the <i>observed significance level</i>, or
503	* <i>p-value</i>, associated with a one-sample, two-tailed t-test
504	* comparing the mean of the dataset described by <code>sampleStats</code>
505	* with the constant <code>mu</code>.
506	* <p>
507	* The number returned is the smallest significance level
508	* at which one can reject the null hypothesis that the mean equals
509	* <code>mu</code> in favor of the two-sided alternative that the mean
510	* is different from <code>mu</code>. For a one-sided test, divide the
511	* returned value by 2.</p>
512	* <p>
513	* <strong>Usage Note:</strong><br>
514	* The validity of the test depends on the assumptions of the parametric
515	* t-test procedure, as discussed
516	* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
517	* here</a></p>
518	* <p>
519	* <strong>Preconditions</strong>: <ul>
520	* <li>The sample must contain at least 2 observations.
521	* </li></ul></p>
522	*
523	* @param mu constant value to compare sample mean against
524	* @param sampleStats StatisticalSummary describing sample data
525	* @return p-value
526	* @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
527	* @throws NumberIsTooSmallException if the number of samples is < 2
528	* @throws MaxCountExceededException if an error occurs computing the p-value
529	*/
530	public double tTest(final double mu, final StatisticalSummary sampleStats)
531	throws NullArgumentException, NumberIsTooSmallException,
532	MaxCountExceededException {
533
534	checkSampleData(sampleStats);
535	return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
536	sampleStats.getN());
537
538	}
539
540	/**
541	* Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
542	* two-sided t-test</a> evaluating the null hypothesis that the mean of the
543	* population from which the dataset described by <code>stats</code> is
544	* drawn equals <code>mu</code>.
545	* <p>
546	* Returns <code>true</code> iff the null hypothesis can be rejected with
547	* confidence <code>1 - alpha</code>. To perform a 1-sided test, use
548	* <code>alpha * 2.</code></p>
549	* <p>
550	* <strong>Examples:</strong><br><ol>
551	* <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
552	* the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
553	* </li>
554	* <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
555	* at the 99% level, first verify that the measured sample mean is less
556	* than <code>mu</code> and then use
557	* <br><code>tTest(mu, sampleStats, 0.02) </code>
558	* </li></ol></p>
559	* <p>
560	* <strong>Usage Note:</strong><br>
561	* The validity of the test depends on the assumptions of the one-sample
562	* parametric t-test procedure, as discussed
563	* <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
564	* </p><p>
565	* <strong>Preconditions</strong>: <ul>
566	* <li>The sample must include at least 2 observations.
567	* </li></ul></p>
568	*
569	* @param mu constant value to compare sample mean against
570	* @param sampleStats StatisticalSummary describing sample data values
571	* @param alpha significance level of the test
572	* @return p-value
573	* @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
574	* @throws NumberIsTooSmallException if the number of samples is < 2
575	* @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
576	* @throws MaxCountExceededException if an error occurs computing the p-value
577	*/
578	public boolean tTest(final double mu, final StatisticalSummary sampleStats,
579	final double alpha)
580	throws NullArgumentException, NumberIsTooSmallException,
581	OutOfRangeException, MaxCountExceededException {
582
583	checkSignificanceLevel(alpha);
584	return tTest(mu, sampleStats) < alpha;
585
586	}
587
588	/**
589	* Returns the <i>observed significance level</i>, or
590	* <i>p-value</i>, associated with a two-sample, two-tailed t-test
591	* comparing the means of the input arrays.
592	* <p>
593	* The number returned is the smallest significance level
594	* at which one can reject the null hypothesis that the two means are
595	* equal in favor of the two-sided alternative that they are different.
596	* For a one-sided test, divide the returned value by 2.</p>
597	* <p>
598	* The test does not assume that the underlying popuation variances are
599	* equal and it uses approximated degrees of freedom computed from the
600	* sample data to compute the p-value. The t-statistic used is as defined in
601	* {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
602	* to the degrees of freedom is used,
603	* as described
604	* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
605	* here.</a> To perform the test under the assumption of equal subpopulation
606	* variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
607	* <p>
608	* <strong>Usage Note:</strong><br>
609	* The validity of the p-value depends on the assumptions of the parametric
610	* t-test procedure, as discussed
611	* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
612	* here</a></p>
613	* <p>
614	* <strong>Preconditions</strong>: <ul>
615	* <li>The observed array lengths must both be at least 2.
616	* </li></ul></p>
617	*
618	* @param sample1 array of sample data values
619	* @param sample2 array of sample data values
620	* @return p-value for t-test
621	* @throws NullArgumentException if the arrays are <code>null</code>
622	* @throws NumberIsTooSmallException if the length of the arrays is < 2
623	* @throws MaxCountExceededException if an error occurs computing the p-value
624	*/
625	public double tTest(final double[] sample1, final double[] sample2)
626	throws NullArgumentException, NumberIsTooSmallException,
627	MaxCountExceededException {
628
629	checkSampleData(sample1);
630	checkSampleData(sample2);
631	// No try-catch or advertised exception because args have just been checked
632	return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
633	StatUtils.variance(sample1), StatUtils.variance(sample2),
634	sample1.length, sample2.length);
635
636	}
637
638	/**
639	* Returns the <i>observed significance level</i>, or
640	* <i>p-value</i>, associated with a two-sample, two-tailed t-test
641	* comparing the means of the input arrays, under the assumption that
642	* the two samples are drawn from subpopulations with equal variances.
643	* To perform the test without the equal variances assumption, use
644	* {@link #tTest(double[], double[])}.</p>
645	* <p>
646	* The number returned is the smallest significance level
647	* at which one can reject the null hypothesis that the two means are
648	* equal in favor of the two-sided alternative that they are different.
649	* For a one-sided test, divide the returned value by 2.</p>
650	* <p>
651	* A pooled variance estimate is used to compute the t-statistic. See
652	* {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
653	* minus 2 is used as the degrees of freedom.</p>
654	* <p>
655	* <strong>Usage Note:</strong><br>
656	* The validity of the p-value depends on the assumptions of the parametric
657	* t-test procedure, as discussed
658	* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
659	* here</a></p>
660	* <p>
661	* <strong>Preconditions</strong>: <ul>
662	* <li>The observed array lengths must both be at least 2.
663	* </li></ul></p>
664	*
665	* @param sample1 array of sample data values
666	* @param sample2 array of sample data values
667	* @return p-value for t-test
668	* @throws NullArgumentException if the arrays are <code>null</code>
669	* @throws NumberIsTooSmallException if the length of the arrays is < 2
670	* @throws MaxCountExceededException if an error occurs computing the p-value
671	*/
672	public double homoscedasticTTest(final double[] sample1, final double[] sample2)
673	throws NullArgumentException, NumberIsTooSmallException,
674	MaxCountExceededException {
675
676	checkSampleData(sample1);
677	checkSampleData(sample2);
678	// No try-catch or advertised exception because args have just been checked
679	return homoscedasticTTest(StatUtils.mean(sample1),
680	StatUtils.mean(sample2),
681	StatUtils.variance(sample1),
682	StatUtils.variance(sample2),
683	sample1.length, sample2.length);
684
685	}
686
687	/**
688	* Performs a
689	* <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
690	* two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
691	* and <code>sample2</code> are drawn from populations with the same mean,
692	* with significance level <code>alpha</code>. This test does not assume
693	* that the subpopulation variances are equal. To perform the test assuming
694	* equal variances, use
695	* {@link #homoscedasticTTest(double[], double[], double)}.
696	* <p>
697	* Returns <code>true</code> iff the null hypothesis that the means are
698	* equal can be rejected with confidence <code>1 - alpha</code>. To
699	* perform a 1-sided test, use <code>alpha * 2</code></p>
700	* <p>
701	* See {@link #t(double[], double[])} for the formula used to compute the
702	* t-statistic. Degrees of freedom are approximated using the
703	* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
704	* Welch-Satterthwaite approximation.</a></p>
705	* <p>
706	* <strong>Examples:</strong><br><ol>
707	* <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
708	* the 95% level, use
709	* <br><code>tTest(sample1, sample2, 0.05). </code>
710	* </li>
711	* <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>,
712	* at the 99% level, first verify that the measured mean of <code>sample 1</code>
713	* is less than the mean of <code>sample 2</code> and then use
714	* <br><code>tTest(sample1, sample2, 0.02) </code>
715	* </li></ol></p>
716	* <p>
717	* <strong>Usage Note:</strong><br>
718	* The validity of the test depends on the assumptions of the parametric
719	* t-test procedure, as discussed
720	* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
721	* here</a></p>
722	* <p>
723	* <strong>Preconditions</strong>: <ul>
724	* <li>The observed array lengths must both be at least 2.
725	* </li>
726	* <li> <code> 0 < alpha < 0.5 </code>
727	* </li></ul></p>
728	*
729	* @param sample1 array of sample data values
730	* @param sample2 array of sample data values
731	* @param alpha significance level of the test
732	* @return true if the null hypothesis can be rejected with
733	* confidence 1 - alpha
734	* @throws NullArgumentException if the arrays are <code>null</code>
735	* @throws NumberIsTooSmallException if the length of the arrays is < 2
736	* @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
737	* @throws MaxCountExceededException if an error occurs computing the p-value
738	*/
739	public boolean tTest(final double[] sample1, final double[] sample2,
740	final double alpha)
741	throws NullArgumentException, NumberIsTooSmallException,
742	OutOfRangeException, MaxCountExceededException {
743
744	checkSignificanceLevel(alpha);
745	return tTest(sample1, sample2) < alpha;
746
747	}
748
749	/**
750	* Performs a
751	* <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
752	* two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
753	* and <code>sample2</code> are drawn from populations with the same mean,
754	* with significance level <code>alpha</code>, assuming that the
755	* subpopulation variances are equal. Use
756	* {@link #tTest(double[], double[], double)} to perform the test without
757	* the assumption of equal variances.
758	* <p>
759	* Returns <code>true</code> iff the null hypothesis that the means are
760	* equal can be rejected with confidence <code>1 - alpha</code>. To
761	* perform a 1-sided test, use <code>alpha * 2.</code> To perform the test
762	* without the assumption of equal subpopulation variances, use
763	* {@link #tTest(double[], double[], double)}.</p>
764	* <p>
765	* A pooled variance estimate is used to compute the t-statistic. See
766	* {@link #t(double[], double[])} for the formula. The sum of the sample
767	* sizes minus 2 is used as the degrees of freedom.</p>
768	* <p>
769	* <strong>Examples:</strong><br><ol>
770	* <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
771	* the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
772	* </li>
773	* <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
774	* at the 99% level, first verify that the measured mean of
775	* <code>sample 1</code> is less than the mean of <code>sample 2</code>
776	* and then use
777	* <br><code>tTest(sample1, sample2, 0.02) </code>
778	* </li></ol></p>
779	* <p>
780	* <strong>Usage Note:</strong><br>
781	* The validity of the test depends on the assumptions of the parametric
782	* t-test procedure, as discussed
783	* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
784	* here</a></p>
785	* <p>
786	* <strong>Preconditions</strong>: <ul>
787	* <li>The observed array lengths must both be at least 2.
788	* </li>
789	* <li> <code> 0 < alpha < 0.5 </code>
790	* </li></ul></p>
791	*
792	* @param sample1 array of sample data values
793	* @param sample2 array of sample data values
794	* @param alpha significance level of the test
795	* @return true if the null hypothesis can be rejected with
796	* confidence 1 - alpha
797	* @throws NullArgumentException if the arrays are <code>null</code>
798	* @throws NumberIsTooSmallException if the length of the arrays is < 2
799	* @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
800	* @throws MaxCountExceededException if an error occurs computing the p-value
801	*/
802	public boolean homoscedasticTTest(final double[] sample1, final double[] sample2,
803	final double alpha)
804	throws NullArgumentException, NumberIsTooSmallException,
805	OutOfRangeException, MaxCountExceededException {
806
807	checkSignificanceLevel(alpha);
808	return homoscedasticTTest(sample1, sample2) < alpha;
809
810	}
811
812	/**
813	* Returns the <i>observed significance level</i>, or
814	* <i>p-value</i>, associated with a two-sample, two-tailed t-test
815	* comparing the means of the datasets described by two StatisticalSummary
816	* instances.
817	* <p>
818	* The number returned is the smallest significance level
819	* at which one can reject the null hypothesis that the two means are
820	* equal in favor of the two-sided alternative that they are different.
821	* For a one-sided test, divide the returned value by 2.</p>
822	* <p>
823	* The test does not assume that the underlying population variances are
824	* equal and it uses approximated degrees of freedom computed from the
825	* sample data to compute the p-value. To perform the test assuming
826	* equal variances, use
827	* {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
828	* <p>
829	* <strong>Usage Note:</strong><br>
830	* The validity of the p-value depends on the assumptions of the parametric
831	* t-test procedure, as discussed
832	* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
833	* here</a></p>
834	* <p>
835	* <strong>Preconditions</strong>: <ul>
836	* <li>The datasets described by the two Univariates must each contain
837	* at least 2 observations.
838	* </li></ul></p>
839	*
840	* @param sampleStats1 StatisticalSummary describing data from the first sample
841	* @param sampleStats2 StatisticalSummary describing data from the second sample
842	* @return p-value for t-test
843	* @throws NullArgumentException if the sample statistics are <code>null</code>
844	* @throws NumberIsTooSmallException if the number of samples is < 2
845	* @throws MaxCountExceededException if an error occurs computing the p-value
846	*/
847	public double tTest(final StatisticalSummary sampleStats1,
848	final StatisticalSummary sampleStats2)
849	throws NullArgumentException, NumberIsTooSmallException,
850	MaxCountExceededException {
851
852	checkSampleData(sampleStats1);
853	checkSampleData(sampleStats2);
854	return tTest(sampleStats1.getMean(), sampleStats2.getMean(),
855	sampleStats1.getVariance(), sampleStats2.getVariance(),
856	sampleStats1.getN(), sampleStats2.getN());
857
858	}
859
860	/**
861	* Returns the <i>observed significance level</i>, or
862	* <i>p-value</i>, associated with a two-sample, two-tailed t-test
863	* comparing the means of the datasets described by two StatisticalSummary
864	* instances, under the hypothesis of equal subpopulation variances. To
865	* perform a test without the equal variances assumption, use
866	* {@link #tTest(StatisticalSummary, StatisticalSummary)}.
867	* <p>
868	* The number returned is the smallest significance level
869	* at which one can reject the null hypothesis that the two means are
870	* equal in favor of the two-sided alternative that they are different.
871	* For a one-sided test, divide the returned value by 2.</p>
872	* <p>
873	* See {@link #homoscedasticT(double[], double[])} for the formula used to
874	* compute the t-statistic. The sum of the sample sizes minus 2 is used as
875	* the degrees of freedom.</p>
876	* <p>
877	* <strong>Usage Note:</strong><br>
878	* The validity of the p-value depends on the assumptions of the parametric
879	* t-test procedure, as discussed
880	* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
881	* </p><p>
882	* <strong>Preconditions</strong>: <ul>
883	* <li>The datasets described by the two Univariates must each contain
884	* at least 2 observations.
885	* </li></ul></p>
886	*
887	* @param sampleStats1 StatisticalSummary describing data from the first sample
888	* @param sampleStats2 StatisticalSummary describing data from the second sample
889	* @return p-value for t-test
890	* @throws NullArgumentException if the sample statistics are <code>null</code>
891	* @throws NumberIsTooSmallException if the number of samples is < 2
892	* @throws MaxCountExceededException if an error occurs computing the p-value
893	*/
894	public double homoscedasticTTest(final StatisticalSummary sampleStats1,
895	final StatisticalSummary sampleStats2)
896	throws NullArgumentException, NumberIsTooSmallException,
897	MaxCountExceededException {
898
899	checkSampleData(sampleStats1);
900	checkSampleData(sampleStats2);
901	return homoscedasticTTest(sampleStats1.getMean(),
902	sampleStats2.getMean(),
903	sampleStats1.getVariance(),
904	sampleStats2.getVariance(),
905	sampleStats1.getN(), sampleStats2.getN());
906
907	}
908
909	/**
910	* Performs a
911	* <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
912	* two-sided t-test</a> evaluating the null hypothesis that
913	* <code>sampleStats1</code> and <code>sampleStats2</code> describe
914	* datasets drawn from populations with the same mean, with significance
915	* level <code>alpha</code>. This test does not assume that the
916	* subpopulation variances are equal. To perform the test under the equal
917	* variances assumption, use
918	* {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
919	* <p>
920	* Returns <code>true</code> iff the null hypothesis that the means are
921	* equal can be rejected with confidence <code>1 - alpha</code>. To
922	* perform a 1-sided test, use <code>alpha * 2</code></p>
923	* <p>
924	* See {@link #t(double[], double[])} for the formula used to compute the
925	* t-statistic. Degrees of freedom are approximated using the
926	* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
927	* Welch-Satterthwaite approximation.</a></p>
928	* <p>
929	* <strong>Examples:</strong><br><ol>
930	* <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
931	* the 95%, use
932	* <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
933	* </li>
934	* <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
935	* at the 99% level, first verify that the measured mean of
936	* <code>sample 1</code> is less than the mean of <code>sample 2</code>
937	* and then use
938	* <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
939	* </li></ol></p>
940	* <p>
941	* <strong>Usage Note:</strong><br>
942	* The validity of the test depends on the assumptions of the parametric
943	* t-test procedure, as discussed
944	* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
945	* here</a></p>
946	* <p>
947	* <strong>Preconditions</strong>: <ul>
948	* <li>The datasets described by the two Univariates must each contain
949	* at least 2 observations.
950	* </li>
951	* <li> <code> 0 < alpha < 0.5 </code>
952	* </li></ul></p>
953	*
954	* @param sampleStats1 StatisticalSummary describing sample data values
955	* @param sampleStats2 StatisticalSummary describing sample data values
956	* @param alpha significance level of the test
957	* @return true if the null hypothesis can be rejected with
958	* confidence 1 - alpha
959	* @throws NullArgumentException if the sample statistics are <code>null</code>
960	* @throws NumberIsTooSmallException if the number of samples is < 2
961	* @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
962	* @throws MaxCountExceededException if an error occurs computing the p-value
963	*/
964	public boolean tTest(final StatisticalSummary sampleStats1,
965	final StatisticalSummary sampleStats2,
966	final double alpha)
967	throws NullArgumentException, NumberIsTooSmallException,
968	OutOfRangeException, MaxCountExceededException {
969
970	checkSignificanceLevel(alpha);
971	return tTest(sampleStats1, sampleStats2) < alpha;
972
973	}
974
975	//----------------------------------------------- Protected methods
976
977	/**
978	* Computes approximate degrees of freedom for 2-sample t-test.
979	*
980	* @param v1 first sample variance
981	* @param v2 second sample variance
982	* @param n1 first sample n
983	* @param n2 second sample n
984	* @return approximate degrees of freedom
985	*/
986	protected double df(double v1, double v2, double n1, double n2) {
987	return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
988	((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
989	(n2 * n2 * (n2 - 1d)));
990	}
991
992	/**
993	* Computes t test statistic for 1-sample t-test.
994	*
995	* @param m sample mean
996	* @param mu constant to test against
997	* @param v sample variance
998	* @param n sample n
999	* @return t test statistic
1000	*/
1001	protected double t(final double m, final double mu,
1002	final double v, final double n) {
1003	return (m - mu) / FastMath.sqrt(v / n);
1004	}
1005
1006	/**
1007	* Computes t test statistic for 2-sample t-test.
1008	* <p>
1009	* Does not assume that subpopulation variances are equal.</p>
1010	*
1011	* @param m1 first sample mean
1012	* @param m2 second sample mean
1013	* @param v1 first sample variance
1014	* @param v2 second sample variance
1015	* @param n1 first sample n
1016	* @param n2 second sample n
1017	* @return t test statistic
1018	*/
1019	protected double t(final double m1, final double m2,
1020	final double v1, final double v2,
1021	final double n1, final double n2) {
1022	return (m1 - m2) / FastMath.sqrt((v1 / n1) + (v2 / n2));
1023	}
1024
1025	/**
1026	* Computes t test statistic for 2-sample t-test under the hypothesis
1027	* of equal subpopulation variances.
1028	*
1029	* @param m1 first sample mean
1030	* @param m2 second sample mean
1031	* @param v1 first sample variance
1032	* @param v2 second sample variance
1033	* @param n1 first sample n
1034	* @param n2 second sample n
1035	* @return t test statistic
1036	*/
1037	protected double homoscedasticT(final double m1, final double m2,
1038	final double v1, final double v2,
1039	final double n1, final double n2) {
1040	final double pooledVariance = ((n1 - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
1041	return (m1 - m2) / FastMath.sqrt(pooledVariance * (1d / n1 + 1d / n2));
1042	}
1043
1044	/**
1045	* Computes p-value for 2-sided, 1-sample t-test.
1046	*
1047	* @param m sample mean
1048	* @param mu constant to test against
1049	* @param v sample variance
1050	* @param n sample n
1051	* @return p-value
1052	* @throws MaxCountExceededException if an error occurs computing the p-value
1053	* @throws MathIllegalArgumentException if n is not greater than 1
1054	*/
1055	protected double tTest(final double m, final double mu,
1056	final double v, final double n)
1057	throws MaxCountExceededException, MathIllegalArgumentException {
1058
1059	final double t = FastMath.abs(t(m, mu, v, n));
1060	// pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1061	final TDistribution distribution = new TDistribution(null, n - 1);
1062	return 2.0 * distribution.cumulativeProbability(-t);
1063
1064	}
1065
1066	/**
1067	* Computes p-value for 2-sided, 2-sample t-test.
1068	* <p>
1069	* Does not assume subpopulation variances are equal. Degrees of freedom
1070	* are estimated from the data.</p>
1071	*
1072	* @param m1 first sample mean
1073	* @param m2 second sample mean
1074	* @param v1 first sample variance
1075	* @param v2 second sample variance
1076	* @param n1 first sample n
1077	* @param n2 second sample n
1078	* @return p-value
1079	* @throws MaxCountExceededException if an error occurs computing the p-value
1080	* @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1081	* strictly positive
1082	*/
1083	protected double tTest(final double m1, final double m2,
1084	final double v1, final double v2,
1085	final double n1, final double n2)
1086	throws MaxCountExceededException, NotStrictlyPositiveException {
1087
1088	final double t = FastMath.abs(t(m1, m2, v1, v2, n1, n2));
1089	final double degreesOfFreedom = df(v1, v2, n1, n2);
1090	// pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1091	final TDistribution distribution = new TDistribution(null, degreesOfFreedom);
1092	return 2.0 * distribution.cumulativeProbability(-t);
1093
1094	}
1095
1096	/**
1097	* Computes p-value for 2-sided, 2-sample t-test, under the assumption
1098	* of equal subpopulation variances.
1099	* <p>
1100	* The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
1101	*
1102	* @param m1 first sample mean
1103	* @param m2 second sample mean
1104	* @param v1 first sample variance
1105	* @param v2 second sample variance
1106	* @param n1 first sample n
1107	* @param n2 second sample n
1108	* @return p-value
1109	* @throws MaxCountExceededException if an error occurs computing the p-value
1110	* @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1111	* strictly positive
1112	*/
1113	protected double homoscedasticTTest(double m1, double m2,
1114	double v1, double v2,
1115	double n1, double n2)
1116	throws MaxCountExceededException, NotStrictlyPositiveException {
1117
1118	final double t = FastMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
1119	final double degreesOfFreedom = n1 + n2 - 2;
1120	// pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1121	final TDistribution distribution = new TDistribution(null, degreesOfFreedom);
1122	return 2.0 * distribution.cumulativeProbability(-t);
1123
1124	}
1125
1126	/**
1127	* Check significance level.
1128	*
1129	* @param alpha significance level
1130	* @throws OutOfRangeException if the significance level is out of bounds.
1131	*/
1132	private void checkSignificanceLevel(final double alpha)
1133	throws OutOfRangeException {
1134
1135	if (alpha <= 0 \|\| alpha > 0.5) {
1136	throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL,
1137	alpha, 0.0, 0.5);
1138	}
1139
1140	}
1141
1142	/**
1143	* Check sample data.
1144	*
1145	* @param data Sample data.
1146	* @throws NullArgumentException if {@code data} is {@code null}.
1147	* @throws NumberIsTooSmallException if there is not enough sample data.
1148	*/
1149	private void checkSampleData(final double[] data)
1150	throws NullArgumentException, NumberIsTooSmallException {
1151
1152	if (data == null) {
1153	throw new NullArgumentException();
1154	}
1155	if (data.length < 2) {
1156	throw new NumberIsTooSmallException(
1157	LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1158	data.length, 2, true);
1159	}
1160
1161	}
1162
1163	/**
1164	* Check sample data.
1165	*
1166	* @param stat Statistical summary.
1167	* @throws NullArgumentException if {@code data} is {@code null}.
1168	* @throws NumberIsTooSmallException if there is not enough sample data.
1169	*/
1170	private void checkSampleData(final StatisticalSummary stat)
1171	throws NullArgumentException, NumberIsTooSmallException {
1172
1173	if (stat == null) {
1174	throw new NullArgumentException();
1175	}
1176	if (stat.getN() < 2) {
1177	throw new NumberIsTooSmallException(
1178	LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1179	stat.getN(), 2, true);
1180	}
1181
1182	}
1183
1184	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: src/main/java/agents/anac/y2019/harddealer/math3/stat/inference/TTest.java

Download in other formats: