001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math.stat.descriptive;
019
020 import junit.framework.Test;
021 import junit.framework.TestCase;
022 import junit.framework.TestSuite;
023
024 import java.util.Collection;
025 import java.util.ArrayList;
026
027 import org.apache.commons.math.random.RandomData;
028 import org.apache.commons.math.random.RandomDataImpl;
029 import org.apache.commons.math.TestUtils;
030
031
032 /**
033 * Test cases for {@link AggregateSummaryStatistics}
034 *
035 */
036 public class AggregateSummaryStatisticsTest extends TestCase {
037
038 /**
039 * Creates and returns a {@code Test} representing all the test cases in this
040 * class
041 *
042 * @return a {@code Test} representing all the test cases in this class
043 */
044 public static Test suite() {
045 TestSuite suite = new TestSuite(AggregateSummaryStatisticsTest.class);
046 suite.setName("AggregateSummaryStatistics tests");
047 return suite;
048 }
049
050 /**
051 * Tests the standard aggregation behavior
052 */
053 public void testAggregation() {
054 AggregateSummaryStatistics aggregate = new AggregateSummaryStatistics();
055 SummaryStatistics setOneStats = aggregate.createContributingStatistics();
056 SummaryStatistics setTwoStats = aggregate.createContributingStatistics();
057
058 assertNotNull("The set one contributing stats are null", setOneStats);
059 assertNotNull("The set two contributing stats are null", setTwoStats);
060 assertNotSame("Contributing stats objects are the same", setOneStats, setTwoStats);
061
062 setOneStats.addValue(2);
063 setOneStats.addValue(3);
064 setOneStats.addValue(5);
065 setOneStats.addValue(7);
066 setOneStats.addValue(11);
067 assertEquals("Wrong number of set one values", 5, setOneStats.getN());
068 assertEquals("Wrong sum of set one values", 28.0, setOneStats.getSum());
069
070 setTwoStats.addValue(2);
071 setTwoStats.addValue(4);
072 setTwoStats.addValue(8);
073 assertEquals("Wrong number of set two values", 3, setTwoStats.getN());
074 assertEquals("Wrong sum of set two values", 14.0, setTwoStats.getSum());
075
076 assertEquals("Wrong number of aggregate values", 8, aggregate.getN());
077 assertEquals("Wrong aggregate sum", 42.0, aggregate.getSum());
078 }
079
080 /**
081 * Verify that aggregating over a partition gives the same results
082 * as direct computation.
083 *
084 * 1) Randomly generate a dataset of 10-100 values
085 * from [-100, 100]
086 * 2) Divide the dataset it into 2-5 partitions
087 * 3) Create an AggregateSummaryStatistic and ContributingStatistics
088 * for each partition
089 * 4) Compare results from the AggregateSummaryStatistic with values
090 * returned by a single SummaryStatistics instance that is provided
091 * the full dataset
092 */
093 public void testAggregationConsistency() throws Exception {
094
095 // Generate a random sample and random partition
096 double[] totalSample = generateSample();
097 double[][] subSamples = generatePartition(totalSample);
098 int nSamples = subSamples.length;
099
100 // Create aggregator and total stats for comparison
101 AggregateSummaryStatistics aggregate = new AggregateSummaryStatistics();
102 SummaryStatistics totalStats = new SummaryStatistics();
103
104 // Create array of component stats
105 SummaryStatistics componentStats[] = new SummaryStatistics[nSamples];
106
107 for (int i = 0; i < nSamples; i++) {
108
109 // Make componentStats[i] a contributing statistic to aggregate
110 componentStats[i] = aggregate.createContributingStatistics();
111
112 // Add values from subsample
113 for (int j = 0; j < subSamples[i].length; j++) {
114 componentStats[i].addValue(subSamples[i][j]);
115 }
116 }
117
118 // Compute totalStats directly
119 for (int i = 0; i < totalSample.length; i++) {
120 totalStats.addValue(totalSample[i]);
121 }
122
123 /*
124 * Compare statistics in totalStats with aggregate.
125 * Note that guaranteed success of this comparison depends on the
126 * fact that <aggregate> gets values in exactly the same order
127 * as <totalStats>.
128 *
129 */
130 assertEquals(totalStats.getSummary(), aggregate.getSummary());
131
132 }
133
134 /**
135 * Test aggregate function by randomly generating a dataset of 10-100 values
136 * from [-100, 100], dividing it into 2-5 partitions, computing stats for each
137 * partition and comparing the result of aggregate(...) applied to the collection
138 * of per-partition SummaryStatistics with a single SummaryStatistics computed
139 * over the full sample.
140 *
141 * @throws Exception
142 */
143 public void testAggregate() throws Exception {
144
145 // Generate a random sample and random partition
146 double[] totalSample = generateSample();
147 double[][] subSamples = generatePartition(totalSample);
148 int nSamples = subSamples.length;
149
150 // Compute combined stats directly
151 SummaryStatistics totalStats = new SummaryStatistics();
152 for (int i = 0; i < totalSample.length; i++) {
153 totalStats.addValue(totalSample[i]);
154 }
155
156 // Now compute subsample stats individually and aggregate
157 SummaryStatistics[] subSampleStats = new SummaryStatistics[nSamples];
158 for (int i = 0; i < nSamples; i++) {
159 subSampleStats[i] = new SummaryStatistics();
160 }
161 Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>();
162 for (int i = 0; i < nSamples; i++) {
163 for (int j = 0; j < subSamples[i].length; j++) {
164 subSampleStats[i].addValue(subSamples[i][j]);
165 }
166 aggregate.add(subSampleStats[i]);
167 }
168
169 // Compare values
170 StatisticalSummary aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate);
171 assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
172 }
173
174
175 public void testAggregateDegenerate() throws Exception {
176 double[] totalSample = {1, 2, 3, 4, 5};
177 double[][] subSamples = {{1}, {2}, {3}, {4}, {5}};
178
179 // Compute combined stats directly
180 SummaryStatistics totalStats = new SummaryStatistics();
181 for (int i = 0; i < totalSample.length; i++) {
182 totalStats.addValue(totalSample[i]);
183 }
184
185 // Now compute subsample stats individually and aggregate
186 SummaryStatistics[] subSampleStats = new SummaryStatistics[5];
187 for (int i = 0; i < 5; i++) {
188 subSampleStats[i] = new SummaryStatistics();
189 }
190 Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>();
191 for (int i = 0; i < 5; i++) {
192 for (int j = 0; j < subSamples[i].length; j++) {
193 subSampleStats[i].addValue(subSamples[i][j]);
194 }
195 aggregate.add(subSampleStats[i]);
196 }
197
198 // Compare values
199 StatisticalSummaryValues aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate);
200 assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
201 }
202
203 public void testAggregateSpecialValues() throws Exception {
204 double[] totalSample = {Double.POSITIVE_INFINITY, 2, 3, Double.NaN, 5};
205 double[][] subSamples = {{Double.POSITIVE_INFINITY, 2}, {3}, {Double.NaN}, {5}};
206
207 // Compute combined stats directly
208 SummaryStatistics totalStats = new SummaryStatistics();
209 for (int i = 0; i < totalSample.length; i++) {
210 totalStats.addValue(totalSample[i]);
211 }
212
213 // Now compute subsample stats individually and aggregate
214 SummaryStatistics[] subSampleStats = new SummaryStatistics[5];
215 for (int i = 0; i < 4; i++) {
216 subSampleStats[i] = new SummaryStatistics();
217 }
218 Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>();
219 for (int i = 0; i < 4; i++) {
220 for (int j = 0; j < subSamples[i].length; j++) {
221 subSampleStats[i].addValue(subSamples[i][j]);
222 }
223 aggregate.add(subSampleStats[i]);
224 }
225
226 // Compare values
227 StatisticalSummaryValues aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate);
228 assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
229
230 }
231
232 /**
233 * Verifies that a StatisticalSummary and a StatisticalSummaryValues are equal up
234 * to delta, with NaNs, infinities returned in the same spots. For max, min, n, values
235 * have to agree exactly, delta is used only for sum, mean, variance, std dev.
236 */
237 protected static void assertEquals(StatisticalSummary expected, StatisticalSummary observed, double delta) {
238 TestUtils.assertEquals(expected.getMax(), observed.getMax(), 0);
239 TestUtils.assertEquals(expected.getMin(), observed.getMin(), 0);
240 assertEquals(expected.getN(), observed.getN());
241 TestUtils.assertEquals(expected.getSum(), observed.getSum(), delta);
242 TestUtils.assertEquals(expected.getMean(), observed.getMean(), delta);
243 TestUtils.assertEquals(expected.getStandardDeviation(), observed.getStandardDeviation(), delta);
244 TestUtils.assertEquals(expected.getVariance(), observed.getVariance(), delta);
245 }
246
247
248 /**
249 * Generates a random sample of double values.
250 * Sample size is random, between 10 and 100 and values are
251 * uniformly distributed over [-100, 100].
252 *
253 * @return array of random double values
254 */
255 private double[] generateSample() {
256 final RandomData randomData = new RandomDataImpl();
257 final int sampleSize = randomData.nextInt(10,100);
258 double[] out = new double[sampleSize];
259 for (int i = 0; i < out.length; i++) {
260 out[i] = randomData.nextUniform(-100, 100);
261 }
262 return out;
263 }
264
265 /**
266 * Generates a partition of <sample> into up to 5 sequentially selected
267 * subsamples with randomly selected partition points.
268 *
269 * @param sample array to partition
270 * @return rectangular array with rows = subsamples
271 */
272 private double[][] generatePartition(double[] sample) {
273 final int length = sample.length;
274 final double[][] out = new double[5][];
275 final RandomData randomData = new RandomDataImpl();
276 int cur = 0;
277 int offset = 0;
278 int sampleCount = 0;
279 for (int i = 0; i < 5; i++) {
280 if (cur == length || offset == length) {
281 break;
282 }
283 final int next = (i == 4 || cur == length - 1) ? length - 1 : randomData.nextInt(cur, length - 1);
284 final int subLength = next - cur + 1;
285 out[i] = new double[subLength];
286 System.arraycopy(sample, offset, out[i], 0, subLength);
287 cur = next + 1;
288 sampleCount++;
289 offset += subLength;
290 }
291 if (sampleCount < 5) {
292 double[][] out2 = new double[sampleCount][];
293 for (int j = 0; j < sampleCount; j++) {
294 final int curSize = out[j].length;
295 out2[j] = new double[curSize];
296 System.arraycopy(out[j], 0, out2[j], 0, curSize);
297 }
298 return out2;
299 } else {
300 return out;
301 }
302 }
303
304 }