1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.math.distribution;
19
20 import java.io.Serializable;
21
22 import org.apache.commons.math.MathRuntimeException;
23
24 /**
25 * Implementation for the {@link ZipfDistribution}.
26 *
27 * @version $Revision: 762087 $ $Date: 2009-04-05 10:20:18 -0400 (Sun, 05 Apr 2009) $
28 */
29 public class ZipfDistributionImpl extends AbstractIntegerDistribution
30 implements ZipfDistribution, Serializable {
31
32 /** Serializable version identifier. */
33 private static final long serialVersionUID = -140627372283420404L;
34
35 /** Number of elements. */
36 private int numberOfElements;
37
38 /** Exponent parameter of the distribution. */
39 private double exponent;
40
41 /**
42 * Create a new Zipf distribution with the given number of elements and
43 * exponent. Both values must be positive; otherwise an
44 * <code>IllegalArgumentException</code> is thrown.
45 *
46 * @param numberOfElements the number of elements
47 * @param exponent the exponent
48 * @exception IllegalArgumentException if n ≤ 0 or s ≤ 0.0
49 */
50 public ZipfDistributionImpl(final int numberOfElements, final double exponent)
51 throws IllegalArgumentException {
52 setNumberOfElements(numberOfElements);
53 setExponent(exponent);
54 }
55
56 /**
57 * Get the number of elements (e.g. corpus size) for the distribution.
58 *
59 * @return the number of elements
60 */
61 public int getNumberOfElements() {
62 return numberOfElements;
63 }
64
65 /**
66 * Set the number of elements (e.g. corpus size) for the distribution.
67 * The parameter value must be positive; otherwise an
68 * <code>IllegalArgumentException</code> is thrown.
69 *
70 * @param n the number of elements
71 * @exception IllegalArgumentException if n ≤ 0
72 */
73 public void setNumberOfElements(final int n)
74 throws IllegalArgumentException {
75 if (n <= 0) {
76 throw MathRuntimeException.createIllegalArgumentException(
77 "invalid number of elements {0} (must be positive)",
78 n);
79 }
80 this.numberOfElements = n;
81 }
82
83 /**
84 * Get the exponent characterising the distribution.
85 *
86 * @return the exponent
87 */
88 public double getExponent() {
89 return exponent;
90 }
91
92 /**
93 * Set the exponent characterising the distribution.
94 * The parameter value must be positive; otherwise an
95 * <code>IllegalArgumentException</code> is thrown.
96 *
97 * @param s the exponent
98 * @exception IllegalArgumentException if s ≤ 0.0
99 */
100 public void setExponent(final double s)
101 throws IllegalArgumentException {
102 if (s <= 0.0) {
103 throw MathRuntimeException.createIllegalArgumentException(
104 "invalid exponent {0} (must be positive)",
105 s);
106 }
107 this.exponent = s;
108 }
109
110 /**
111 * The probability mass function P(X = x) for a Zipf distribution.
112 *
113 * @param x the value at which the probability density function is evaluated.
114 * @return the value of the probability mass function at x
115 */
116 public double probability(final int x) {
117 if (x <= 0 || x > getNumberOfElements()) {
118 return 0.0;
119 }
120
121 return (1.0 / Math.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent);
122
123 }
124
125 /**
126 * The probability distribution function P(X <= x) for a Zipf distribution.
127 *
128 * @param x the value at which the PDF is evaluated.
129 * @return Zipf distribution function evaluated at x
130 */
131 @Override
132 public double cumulativeProbability(final int x) {
133 if (x <= 0) {
134 return 0.0;
135 } else if (x >= getNumberOfElements()) {
136 return 1.0;
137 }
138
139 return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent);
140
141 }
142
143 /**
144 * Access the domain value lower bound, based on <code>p</code>, used to
145 * bracket a PDF root.
146 *
147 * @param p the desired probability for the critical value
148 * @return domain value lower bound, i.e.
149 * P(X < <i>lower bound</i>) < <code>p</code>
150 */
151 @Override
152 protected int getDomainLowerBound(final double p) {
153 return 0;
154 }
155
156 /**
157 * Access the domain value upper bound, based on <code>p</code>, used to
158 * bracket a PDF root.
159 *
160 * @param p the desired probability for the critical value
161 * @return domain value upper bound, i.e.
162 * P(X < <i>upper bound</i>) > <code>p</code>
163 */
164 @Override
165 protected int getDomainUpperBound(final double p) {
166 return numberOfElements;
167 }
168
169
170 /**
171 * Calculates the Nth generalized harmonic number. See
172 * <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic
173 * Series</a>.
174 *
175 * @param n the term in the series to calculate (must be ≥ 1)
176 * @param m the exponent; special case m == 1.0 is the harmonic series
177 * @return the nth generalized harmonic number
178 */
179 private double generalizedHarmonic(final int n, final double m) {
180 double value = 0;
181 for (int k = n; k > 0; --k) {
182 value += 1.0 / Math.pow(k, m);
183 }
184 return value;
185 }
186
187 }