001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.regression;
018
019 import java.util.Random;
020
021 import junit.framework.Test;
022 import junit.framework.TestCase;
023 import junit.framework.TestSuite;
024 /**
025 * Test cases for the TestStatistic class.
026 *
027 * @version $Revision: 764749 $ $Date: 2009-04-14 07:51:40 -0400 (Tue, 14 Apr 2009) $
028 */
029
030 public final class SimpleRegressionTest extends TestCase {
031
032 /*
033 * NIST "Norris" refernce data set from
034 * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
035 * Strangely, order is {y,x}
036 */
037 private double[][] data = { { 0.1, 0.2 }, {338.8, 337.4 }, {118.1, 118.2 },
038 {888.0, 884.6 }, {9.2, 10.1 }, {228.1, 226.5 }, {668.5, 666.3 }, {998.5, 996.3 },
039 {449.1, 448.6 }, {778.9, 777.0 }, {559.2, 558.2 }, {0.3, 0.4 }, {0.1, 0.6 }, {778.1, 775.5 },
040 {668.8, 666.9 }, {339.3, 338.0 }, {448.9, 447.5 }, {10.8, 11.6 }, {557.7, 556.0 },
041 {228.3, 228.1 }, {998.0, 995.8 }, {888.8, 887.6 }, {119.6, 120.2 }, {0.3, 0.3 },
042 {0.6, 0.3 }, {557.6, 556.8 }, {339.3, 339.1 }, {888.0, 887.2 }, {998.5, 999.0 },
043 {778.9, 779.0 }, {10.2, 11.1 }, {117.6, 118.3 }, {228.9, 229.2 }, {668.4, 669.1 },
044 {449.2, 448.9 }, {0.2, 0.5 }
045 };
046
047 /*
048 * Correlation example from
049 * http://www.xycoon.com/correlation.htm
050 */
051 private double[][] corrData = { { 101.0, 99.2 }, {100.1, 99.0 }, {100.0, 100.0 },
052 {90.6, 111.6 }, {86.5, 122.2 }, {89.7, 117.6 }, {90.6, 121.1 }, {82.8, 136.0 },
053 {70.1, 154.2 }, {65.4, 153.6 }, {61.3, 158.5 }, {62.5, 140.6 }, {63.6, 136.2 },
054 {52.6, 168.0 }, {59.7, 154.3 }, {59.5, 149.0 }, {61.3, 165.5 }
055 };
056
057 /*
058 * From Moore and Mcabe, "Introduction to the Practice of Statistics"
059 * Example 10.3
060 */
061 private double[][] infData = { { 15.6, 5.2 }, {26.8, 6.1 }, {37.8, 8.7 }, {36.4, 8.5 },
062 {35.5, 8.8 }, {18.6, 4.9 }, {15.3, 4.5 }, {7.9, 2.5 }, {0.0, 1.1 }
063 };
064
065 /*
066 * Points to remove in the remove tests
067 */
068 private double[][] removeSingle = {infData[1]};
069 private double[][] removeMultiple = { infData[1], infData[2] };
070 private double removeX = infData[0][0];
071 private double removeY = infData[0][1];
072
073
074 /*
075 * Data with bad linear fit
076 */
077 private double[][] infData2 = { { 1, 1 }, {2, 0 }, {3, 5 }, {4, 2 },
078 {5, -1 }, {6, 12 }
079 };
080
081 public SimpleRegressionTest(String name) {
082 super(name);
083 }
084
085 public static Test suite() {
086 TestSuite suite = new TestSuite(SimpleRegressionTest.class);
087 suite.setName("BivariateRegression Tests");
088 return suite;
089 }
090
091 public void testNorris() {
092 SimpleRegression regression = new SimpleRegression();
093 for (int i = 0; i < data.length; i++) {
094 regression.addData(data[i][1], data[i][0]);
095 }
096 // Tests against certified values from
097 // http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
098 assertEquals("slope", 1.00211681802045, regression.getSlope(), 10E-12);
099 assertEquals("slope std err", 0.429796848199937E-03,
100 regression.getSlopeStdErr(),10E-12);
101 assertEquals("number of observations", 36, regression.getN());
102 assertEquals( "intercept", -0.262323073774029,
103 regression.getIntercept(),10E-12);
104 assertEquals("std err intercept", 0.232818234301152,
105 regression.getInterceptStdErr(),10E-12);
106 assertEquals("r-square", 0.999993745883712,
107 regression.getRSquare(), 10E-12);
108 assertEquals("SSR", 4255954.13232369,
109 regression.getRegressionSumSquares(), 10E-9);
110 assertEquals("MSE", 0.782864662630069,
111 regression.getMeanSquareError(), 10E-10);
112 assertEquals("SSE", 26.6173985294224,
113 regression.getSumSquaredErrors(),10E-9);
114 // ------------ End certified data tests
115
116 assertEquals( "predict(0)", -0.262323073774029,
117 regression.predict(0), 10E-12);
118 assertEquals("predict(1)", 1.00211681802045 - 0.262323073774029,
119 regression.predict(1), 10E-12);
120 }
121
122 public void testCorr() {
123 SimpleRegression regression = new SimpleRegression();
124 regression.addData(corrData);
125 assertEquals("number of observations", 17, regression.getN());
126 assertEquals("r-square", .896123, regression.getRSquare(), 10E-6);
127 assertEquals("r", -0.94663767742, regression.getR(), 1E-10);
128 }
129
130 public void testNaNs() {
131 SimpleRegression regression = new SimpleRegression();
132 assertTrue("intercept not NaN", Double.isNaN(regression.getIntercept()));
133 assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
134 assertTrue("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr()));
135 assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr()));
136 assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
137 assertTrue("e not NaN", Double.isNaN(regression.getR()));
138 assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare()));
139 assertTrue( "RSS not NaN", Double.isNaN(regression.getRegressionSumSquares()));
140 assertTrue("SSE not NaN",Double.isNaN(regression.getSumSquaredErrors()));
141 assertTrue("SSTO not NaN", Double.isNaN(regression.getTotalSumSquares()));
142 assertTrue("predict not NaN", Double.isNaN(regression.predict(0)));
143
144 regression.addData(1, 2);
145 regression.addData(1, 3);
146
147 // No x variation, so these should still blow...
148 assertTrue("intercept not NaN", Double.isNaN(regression.getIntercept()));
149 assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
150 assertTrue("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr()));
151 assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr()));
152 assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
153 assertTrue("e not NaN", Double.isNaN(regression.getR()));
154 assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare()));
155 assertTrue("RSS not NaN", Double.isNaN(regression.getRegressionSumSquares()));
156 assertTrue("SSE not NaN", Double.isNaN(regression.getSumSquaredErrors()));
157 assertTrue("predict not NaN", Double.isNaN(regression.predict(0)));
158
159 // but SSTO should be OK
160 assertTrue("SSTO NaN", !Double.isNaN(regression.getTotalSumSquares()));
161
162 regression = new SimpleRegression();
163
164 regression.addData(1, 2);
165 regression.addData(3, 3);
166
167 // All should be OK except MSE, s(b0), s(b1) which need one more df
168 assertTrue("interceptNaN", !Double.isNaN(regression.getIntercept()));
169 assertTrue("slope NaN", !Double.isNaN(regression.getSlope()));
170 assertTrue ("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr()));
171 assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr()));
172 assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
173 assertTrue("r NaN", !Double.isNaN(regression.getR()));
174 assertTrue("r-square NaN", !Double.isNaN(regression.getRSquare()));
175 assertTrue("RSS NaN", !Double.isNaN(regression.getRegressionSumSquares()));
176 assertTrue("SSE NaN", !Double.isNaN(regression.getSumSquaredErrors()));
177 assertTrue("SSTO NaN", !Double.isNaN(regression.getTotalSumSquares()));
178 assertTrue("predict NaN", !Double.isNaN(regression.predict(0)));
179
180 regression.addData(1, 4);
181
182 // MSE, MSE, s(b0), s(b1) should all be OK now
183 assertTrue("MSE NaN", !Double.isNaN(regression.getMeanSquareError()));
184 assertTrue("slope std err NaN", !Double.isNaN(regression.getSlopeStdErr()));
185 assertTrue("intercept std err NaN", !Double.isNaN(regression.getInterceptStdErr()));
186 }
187
188 public void testClear() {
189 SimpleRegression regression = new SimpleRegression();
190 regression.addData(corrData);
191 assertEquals("number of observations", 17, regression.getN());
192 regression.clear();
193 assertEquals("number of observations", 0, regression.getN());
194 regression.addData(corrData);
195 assertEquals("r-square", .896123, regression.getRSquare(), 10E-6);
196 regression.addData(data);
197 assertEquals("number of observations", 53, regression.getN());
198 }
199
200 public void testInference() throws Exception {
201 //---------- verified against R, version 1.8.1 -----
202 // infData
203 SimpleRegression regression = new SimpleRegression();
204 regression.addData(infData);
205 assertEquals("slope std err", 0.011448491,
206 regression.getSlopeStdErr(), 1E-10);
207 assertEquals("std err intercept", 0.286036932,
208 regression.getInterceptStdErr(),1E-8);
209 assertEquals("significance", 4.596e-07,
210 regression.getSignificance(),1E-8);
211 assertEquals("slope conf interval half-width", 0.0270713794287,
212 regression.getSlopeConfidenceInterval(),1E-8);
213 // infData2
214 regression = new SimpleRegression();
215 regression.addData(infData2);
216 assertEquals("slope std err", 1.07260253,
217 regression.getSlopeStdErr(), 1E-8);
218 assertEquals("std err intercept",4.17718672,
219 regression.getInterceptStdErr(),1E-8);
220 assertEquals("significance", 0.261829133982,
221 regression.getSignificance(),1E-11);
222 assertEquals("slope conf interval half-width", 2.97802204827,
223 regression.getSlopeConfidenceInterval(),1E-8);
224 //------------- End R-verified tests -------------------------------
225
226 //FIXME: get a real example to test against with alpha = .01
227 assertTrue("tighter means wider",
228 regression.getSlopeConfidenceInterval() < regression.getSlopeConfidenceInterval(0.01));
229
230 try {
231 regression.getSlopeConfidenceInterval(1);
232 fail("expecting IllegalArgumentException for alpha = 1");
233 } catch (IllegalArgumentException ex) {
234 // ignored
235 }
236
237 }
238
239 public void testPerfect() throws Exception {
240 SimpleRegression regression = new SimpleRegression();
241 int n = 100;
242 for (int i = 0; i < n; i++) {
243 regression.addData(((double) i) / (n - 1), i);
244 }
245 assertEquals(0.0, regression.getSignificance(), 1.0e-5);
246 assertTrue(regression.getSlope() > 0.0);
247 assertTrue(regression.getSumSquaredErrors() >= 0.0);
248 }
249
250 public void testPerfectNegative() throws Exception {
251 SimpleRegression regression = new SimpleRegression();
252 int n = 100;
253 for (int i = 0; i < n; i++) {
254 regression.addData(- ((double) i) / (n - 1), i);
255 }
256
257 assertEquals(0.0, regression.getSignificance(), 1.0e-5);
258 assertTrue(regression.getSlope() < 0.0);
259 }
260
261 public void testRandom() throws Exception {
262 SimpleRegression regression = new SimpleRegression();
263 Random random = new Random(1);
264 int n = 100;
265 for (int i = 0; i < n; i++) {
266 regression.addData(((double) i) / (n - 1), random.nextDouble());
267 }
268
269 assertTrue( 0.0 < regression.getSignificance()
270 && regression.getSignificance() < 1.0);
271 }
272
273
274 // Jira MATH-85 = Bugzilla 39432
275 public void testSSENonNegative() {
276 double[] y = { 8915.102, 8919.302, 8923.502 };
277 double[] x = { 1.107178495E2, 1.107264895E2, 1.107351295E2 };
278 SimpleRegression reg = new SimpleRegression();
279 for (int i = 0; i < x.length; i++) {
280 reg.addData(x[i], y[i]);
281 }
282 assertTrue(reg.getSumSquaredErrors() >= 0.0);
283 }
284
285 // Test remove X,Y (single observation)
286 public void testRemoveXY() throws Exception {
287 // Create regression with inference data then remove to test
288 SimpleRegression regression = new SimpleRegression();
289 regression.addData(infData);
290 regression.removeData(removeX, removeY);
291 regression.addData(removeX, removeY);
292 // Use the inference assertions to make sure that everything worked
293 assertEquals("slope std err", 0.011448491,
294 regression.getSlopeStdErr(), 1E-10);
295 assertEquals("std err intercept", 0.286036932,
296 regression.getInterceptStdErr(),1E-8);
297 assertEquals("significance", 4.596e-07,
298 regression.getSignificance(),1E-8);
299 assertEquals("slope conf interval half-width", 0.0270713794287,
300 regression.getSlopeConfidenceInterval(),1E-8);
301 }
302
303
304 // Test remove single observation in array
305 public void testRemoveSingle() throws Exception {
306 // Create regression with inference data then remove to test
307 SimpleRegression regression = new SimpleRegression();
308 regression.addData(infData);
309 regression.removeData(removeSingle);
310 regression.addData(removeSingle);
311 // Use the inference assertions to make sure that everything worked
312 assertEquals("slope std err", 0.011448491,
313 regression.getSlopeStdErr(), 1E-10);
314 assertEquals("std err intercept", 0.286036932,
315 regression.getInterceptStdErr(),1E-8);
316 assertEquals("significance", 4.596e-07,
317 regression.getSignificance(),1E-8);
318 assertEquals("slope conf interval half-width", 0.0270713794287,
319 regression.getSlopeConfidenceInterval(),1E-8);
320 }
321
322 // Test remove multiple observations
323 public void testRemoveMultiple() throws Exception {
324 // Create regression with inference data then remove to test
325 SimpleRegression regression = new SimpleRegression();
326 regression.addData(infData);
327 regression.removeData(removeMultiple);
328 regression.addData(removeMultiple);
329 // Use the inference assertions to make sure that everything worked
330 assertEquals("slope std err", 0.011448491,
331 regression.getSlopeStdErr(), 1E-10);
332 assertEquals("std err intercept", 0.286036932,
333 regression.getInterceptStdErr(),1E-8);
334 assertEquals("significance", 4.596e-07,
335 regression.getSignificance(),1E-8);
336 assertEquals("slope conf interval half-width", 0.0270713794287,
337 regression.getSlopeConfidenceInterval(),1E-8);
338 }
339
340 // Remove observation when empty
341 public void testRemoveObsFromEmpty() {
342 SimpleRegression regression = new SimpleRegression();
343 regression.removeData(removeX, removeY);
344 assertEquals(regression.getN(), 0);
345 }
346
347 // Remove single observation to empty
348 public void testRemoveObsFromSingle() {
349 SimpleRegression regression = new SimpleRegression();
350 regression.addData(removeX, removeY);
351 regression.removeData(removeX, removeY);
352 assertEquals(regression.getN(), 0);
353 }
354
355 // Remove multiple observations to empty
356 public void testRemoveMultipleToEmpty() {
357 SimpleRegression regression = new SimpleRegression();
358 regression.addData(removeMultiple);
359 regression.removeData(removeMultiple);
360 assertEquals(regression.getN(), 0);
361 }
362
363 // Remove multiple observations past empty (i.e. size of array > n)
364 public void testRemoveMultiplePastEmpty() {
365 SimpleRegression regression = new SimpleRegression();
366 regression.addData(removeX, removeY);
367 regression.removeData(removeMultiple);
368 assertEquals(regression.getN(), 0);
369 }
370 }