1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math.stat.correlation;
18
19 import org.apache.commons.math.MathException;
20 import org.apache.commons.math.MathRuntimeException;
21 import org.apache.commons.math.distribution.TDistribution;
22 import org.apache.commons.math.distribution.TDistributionImpl;
23 import org.apache.commons.math.linear.RealMatrix;
24 import org.apache.commons.math.linear.BlockRealMatrix;
25 import org.apache.commons.math.stat.regression.SimpleRegression;
26
27 /**
28 * Computes Pearson's product-moment correlation coefficients for pairs of arrays
29 * or columns of a matrix.
30 *
31 * <p>The constructors that take <code>RealMatrix</code> or
32 * <code>double[][]</code> arguments generate correlation matrices. The
33 * columns of the input matrices are assumed to represent variable values.
34 * Correlations are given by the formula</p>
35 * <code>cor(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / [(n - 1)s(X)s(Y)]</code>
36 * where <code>E(X)</code> is the mean of <code>X</code>, <code>E(Y)</code>
37 * is the mean of the <code>Y</code> values and s(X), s(Y) are standard deviations.
38 *
39 * @version $Revision: 799857 $ $Date: 2009-08-01 09:07:12 -0400 (Sat, 01 Aug 2009) $
40 * @since 2.0
41 */
42 public class PearsonsCorrelation {
43
44 /** correlation matrix */
45 private final RealMatrix correlationMatrix;
46
47 /** number of observations */
48 private final int nObs;
49
50 /**
51 * Create a PearsonsCorrelation instance without data
52 */
53 public PearsonsCorrelation() {
54 super();
55 correlationMatrix = null;
56 nObs = 0;
57 }
58
59 /**
60 * Create a PearsonsCorrelation from a rectangular array
61 * whose columns represent values of variables to be correlated.
62 *
63 * @param data rectangular array with columns representing variables
64 * @throws IllegalArgumentException if the input data array is not
65 * rectangular with at least two rows and two columns.
66 */
67 public PearsonsCorrelation(double[][] data) {
68 this(new BlockRealMatrix(data));
69 }
70
71 /**
72 * Create a PearsonsCorrelation from a RealMatrix whose columns
73 * represent variables to be correlated.
74 *
75 * @param matrix matrix with columns representing variables to correlate
76 */
77 public PearsonsCorrelation(RealMatrix matrix) {
78 checkSufficientData(matrix);
79 nObs = matrix.getRowDimension();
80 correlationMatrix = computeCorrelationMatrix(matrix);
81 }
82
83 /**
84 * Create a PearsonsCorrelation from a {@link Covariance}. The correlation
85 * matrix is computed by scaling the Covariance's covariance matrix.
86 * The Covariance instance must have been created from a data matrix with
87 * columns representing variable values.
88 *
89 * @param covariance Covariance instance
90 */
91 public PearsonsCorrelation(Covariance covariance) {
92 RealMatrix covarianceMatrix = covariance.getCovarianceMatrix();
93 if (covarianceMatrix == null) {
94 throw MathRuntimeException.createIllegalArgumentException("covariance matrix is null");
95 }
96 nObs = covariance.getN();
97 correlationMatrix = covarianceToCorrelation(covarianceMatrix);
98 }
99
100 /**
101 * Create a PearsonsCorrelation from a covariance matrix. The correlation
102 * matrix is computed by scaling the covariance matrix.
103 *
104 * @param covarianceMatrix covariance matrix
105 * @param numberOfObservations the number of observations in the dataset used to compute
106 * the covariance matrix
107 */
108 public PearsonsCorrelation(RealMatrix covarianceMatrix, int numberOfObservations) {
109 nObs = numberOfObservations;
110 correlationMatrix = covarianceToCorrelation(covarianceMatrix);
111
112 }
113
114 /**
115 * Returns the correlation matrix
116 *
117 * @return correlation matrix
118 */
119 public RealMatrix getCorrelationMatrix() {
120 return correlationMatrix;
121 }
122
123 /**
124 * Returns a matrix of standard errors associated with the estimates
125 * in the correlation matrix.<br/>
126 * <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard
127 * error associated with <code>getCorrelationMatrix.getEntry(i,j)</code>
128 * <p>The formula used to compute the standard error is <br/>
129 * <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code>
130 * where <code>r</code> is the estimated correlation coefficient and
131 * <code>n</code> is the number of observations in the source dataset.</p>
132 *
133 * @return matrix of correlation standard errors
134 */
135 public RealMatrix getCorrelationStandardErrors() {
136 int nVars = correlationMatrix.getColumnDimension();
137 double[][] out = new double[nVars][nVars];
138 for (int i = 0; i < nVars; i++) {
139 for (int j = 0; j < nVars; j++) {
140 double r = correlationMatrix.getEntry(i, j);
141 out[i][j] = Math.sqrt((1 - r * r) /(nObs - 2));
142 }
143 }
144 return new BlockRealMatrix(out);
145 }
146
147 /**
148 * Returns a matrix of p-values associated with the (two-sided) null
149 * hypothesis that the corresponding correlation coefficient is zero.
150 * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
151 * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
152 * a value with absolute value greater than or equal to <br>
153 * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
154 * <p>The values in the matrix are sometimes referred to as the
155 * <i>significance</i> of the corresponding correlation coefficients.</p>
156 *
157 * @return matrix of p-values
158 * @throws MathException if an error occurs estimating probabilities
159 */
160 public RealMatrix getCorrelationPValues() throws MathException {
161 TDistribution tDistribution = new TDistributionImpl(nObs - 2);
162 int nVars = correlationMatrix.getColumnDimension();
163 double[][] out = new double[nVars][nVars];
164 for (int i = 0; i < nVars; i++) {
165 for (int j = 0; j < nVars; j++) {
166 if (i == j) {
167 out[i][j] = 0d;
168 } else {
169 double r = correlationMatrix.getEntry(i, j);
170 double t = Math.abs(r * Math.sqrt((nObs - 2)/(1 - r * r)));
171 out[i][j] = 2 * (1 - tDistribution.cumulativeProbability(t));
172 }
173 }
174 }
175 return new BlockRealMatrix(out);
176 }
177
178
179 /**
180 * Computes the correlation matrix for the columns of the
181 * input matrix.
182 *
183 * @param matrix matrix with columns representing variables to correlate
184 * @return correlation matrix
185 */
186 public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
187 int nVars = matrix.getColumnDimension();
188 RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
189 for (int i = 0; i < nVars; i++) {
190 for (int j = 0; j < i; j++) {
191 double corr = correlation(matrix.getColumn(i), matrix.getColumn(j));
192 outMatrix.setEntry(i, j, corr);
193 outMatrix.setEntry(j, i, corr);
194 }
195 outMatrix.setEntry(i, i, 1d);
196 }
197 return outMatrix;
198 }
199
200 /**
201 * Computes the correlation matrix for the columns of the
202 * input rectangular array. The colums of the array represent values
203 * of variables to be correlated.
204 *
205 * @param data matrix with columns representing variables to correlate
206 * @return correlation matrix
207 */
208 public RealMatrix computeCorrelationMatrix(double[][] data) {
209 return computeCorrelationMatrix(new BlockRealMatrix(data));
210 }
211
212 /**
213 * Computes the Pearson's product-moment correlation coefficient between the two arrays.
214 *
215 * </p>Throws IllegalArgumentException if the arrays do not have the same length
216 * or their common length is less than 2</p>
217 *
218 * @param xArray first data array
219 * @param yArray second data array
220 * @return Returns Pearson's correlation coefficient for the two arrays
221 * @throws IllegalArgumentException if the arrays lengths do not match or
222 * there is insufficient data
223 */
224 public double correlation(final double[] xArray, final double[] yArray) throws IllegalArgumentException {
225 SimpleRegression regression = new SimpleRegression();
226 if(xArray.length == yArray.length && xArray.length > 1) {
227 for(int i=0; i<xArray.length; i++) {
228 regression.addData(xArray[i], yArray[i]);
229 }
230 return regression.getR();
231 }
232 else {
233 throw MathRuntimeException.createIllegalArgumentException(
234 "invalid array dimensions. xArray has size {0}; yArray has {1} elements",
235 xArray.length, yArray.length);
236 }
237 }
238
239 /**
240 * Derives a correlation matrix from a covariance matrix.
241 *
242 * <p>Uses the formula <br/>
243 * <code>r(X,Y) = cov(X,Y)/s(X)s(Y)</code> where
244 * <code>r(·,·)</code> is the correlation coefficient and
245 * <code>s(·)</code> means standard deviation.</p>
246 *
247 * @param covarianceMatrix the covariance matrix
248 * @return correlation matrix
249 */
250 public RealMatrix covarianceToCorrelation(RealMatrix covarianceMatrix) {
251 int nVars = covarianceMatrix.getColumnDimension();
252 RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
253 for (int i = 0; i < nVars; i++) {
254 double sigma = Math.sqrt(covarianceMatrix.getEntry(i, i));
255 outMatrix.setEntry(i, i, 1d);
256 for (int j = 0; j < i; j++) {
257 double entry = covarianceMatrix.getEntry(i, j) /
258 (sigma * Math.sqrt(covarianceMatrix.getEntry(j, j)));
259 outMatrix.setEntry(i, j, entry);
260 outMatrix.setEntry(j, i, entry);
261 }
262 }
263 return outMatrix;
264 }
265
266 /**
267 * Throws IllegalArgumentException of the matrix does not have at least
268 * two columns and two rows
269 *
270 * @param matrix matrix to check for sufficiency
271 */
272 private void checkSufficientData(final RealMatrix matrix) {
273 int nRows = matrix.getRowDimension();
274 int nCols = matrix.getColumnDimension();
275 if (nRows < 2 || nCols < 2) {
276 throw MathRuntimeException.createIllegalArgumentException(
277 "insufficient data: only {0} rows and {1} columns.",
278 nRows, nCols);
279 }
280 }
281 }