Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
EST_multistats.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1995,1996 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Paul Taylor */
34/* Date : July 1995 */
35/*-----------------------------------------------------------------------*/
36/* Basic Multivariate Statistical Functions */
37/* */
38/*=======================================================================*/
39#include <cmath>
40#include "EST_multistats.h"
41
42float mean(EST_FVector &v)
43{
44 int i;
45 float u = 0.0;
46
47 for (i = 0; i < v.n(); ++i)
48 u += v(i);
49
50 return u / v.n();
51}
52
53float sum(EST_FVector &v)
54{
55 int i;
56 float u = 0.0;
57
58 for (i = 0; i < v.n(); ++i)
59 u += v(i);
60
61 return u;
62}
63
65{
66 EST_FVector u(m.num_columns());
67 int i, j;
68
69 for (i = 0; i < m.num_columns(); ++i)
70 {
71 u[i] = 0.0;
72 for (j = 0; j < m.num_rows(); ++j)
73 u[i] += m(j, i);
74 u[i] /= m.num_rows();
75 }
76
77 return u;
78}
79
80EST_FVector sample_variance(EST_FMatrix &m)
81{
82 EST_FVector v(m.num_columns());
83 EST_FVector u(m.num_columns());
84 int i, j;
85
86 u = mean(m);
87
88 for (j = 0; j < m.num_columns(); ++j)
89 {
90 v[j] = 0.0;
91 for (i = 0; i < m.num_rows(); ++i)
92 v[j] += pow(m(i, j) - u(j), float(2.0));
93 v[j] /= m.num_rows() - 1; // sample variance
94 }
95
96 return v;
97}
98
99EST_FVector sample_stdev(EST_FMatrix &m)
100{
101 EST_FVector v;
102 v = sample_variance(m);
103 int j;
104
105 for (j = 0; j < v.n(); ++j)
106 v[j] = sqrt(v(j));
107
108 return v;
109}
110
111EST_FMatrix sample_covariance(EST_FMatrix &m)
112{
113 EST_FVector u(m.num_columns());
114 EST_FMatrix c(m.num_columns(), m.num_columns());
115 int i, j, k;
116
117 u = mean(m);
118
119 for (j = 0; j < m.num_columns(); ++j)
120 for (k = 0; k < m.num_columns(); ++k)
121 {
122 c(j, k) = 0.0;
123 for (i = 0; i < m.num_rows(); ++i)
124 c(j, k) += (m(i, j) - u(j)) * (m(i, k) - u(k));
125 c(j, k) /= m.num_rows();
126 }
127
128
129 return c;
130}
131
132EST_FMatrix sample_correlation(EST_FMatrix &m)
133{
134 EST_FMatrix r(m.num_columns(), m.num_columns());
135
136 EST_FVector s = sample_stdev(m);
137 EST_FMatrix c = sample_covariance(m);
138
139 int j, k;
140
141 for (j = 0; j < m.num_columns(); ++j)
142 for (k = 0; k < m.num_columns(); ++k)
143 r(j, k) = c(j, k)/(s(j) * s(k));
144
145 return r;
146}
147
148EST_FMatrix euclidean_distance(EST_FMatrix &m)
149{
150 // nowt yet
151 return m;
152}
153
154// normalise matrix m, by subtracting vector "sub" from each column and
155// then dividing each column by vector div;
156
158{
159 int i, j;
160 EST_FMatrix z(m.num_rows(), m.num_columns());
161
162 for (j = 0; j < m.num_columns(); ++j)
163 for (i = 0; i < m.num_rows(); ++i)
164 z(i, j) = (m(i, j) - sub(j)) / div(j);
165
166 return z;
167}
168
169// calculate penrose distance for matrix of means gu of p variables
170// (columns) in g populations (columns), and variance gv.
171
172EST_FMatrix penrose_distance(EST_FMatrix &gu, EST_FVector &gv)
173{
174 int i, j, k;
175 int p = gu.num_columns();
176 int n = gu.num_rows();
177 EST_FMatrix P(n, n);
178
179 cout << "pop mean " << gu;
180
181 for (i = 0; i < n; ++i)
182 for (j = 0; j < n; ++j)
183 {
184 P(i, j) = 0.0;
185 for (k = 0; k < p; ++k)
186 P(i, j) += pow(gu(i, k) - gu(j, k), float(2.0)) / gv(k);
187 P(i, j) /= p;
188 }
189 return P;
190}
191
192float single_mahal(EST_FMatrix &ui, EST_FMatrix &uj, EST_FMatrix &invv)
193{
194 float e;
195 EST_FMatrix a, b,c,d;
196
197 a = ui - uj;
198 transpose(a,b);
199 multiply(b,invv,c);
200 multiply(c,a,d);
201 e = d(0,0);
202 return e;
203}
204
205EST_FMatrix mahalanobis_distance(EST_FMatrix &gu, EST_FMatrix &covar)
206{
207 int i, j;
208 int n = gu.num_rows();
209 EST_FMatrix P(n, n), invv, ui, uj;
210
211 inverse(covar,invv); // should be inverse!!
212
213 for (i = 0; i < n; ++i)
214 for (j = 0; j < n; ++j)
215 {
216 transpose(row(gu, i),ui);
217 transpose(row(gu, j),uj);
218 P(i, j) = single_mahal(ui, uj, invv);
219 }
220
221 return P;
222}
223
224float simple_penrose(EST_FVector &ui, EST_FVector &uj, EST_FVector &v)
225{
226 int k;
227 int n = uj.n();
228 float P = 0.0;
229
230 for (k = 0; k < n; ++k)
231 P += pow(ui(k) - uj(k), float(2.0)) / v(k);
232 P /= n;
233
234 return P;
235}
236
237EST_FMatrix population_mean(EST_FMatrix *in, int num_pop)
238{
239 int i, k;
240 EST_FMatrix pmean(num_pop, in[0].num_columns());
241 EST_FVector u(in[0].num_columns());
242
243 for (i = 0; i < num_pop; ++i)
244 {
245 u = mean(in[i]);
246 for (k =0; k < in[i].num_columns(); ++k)
247 pmean(i, k) = u(k);
248 }
249 return pmean;
250}
251
252EST_FMatrix add_populations(EST_FMatrix *in, int num_pop)
253{
254 int i, j, k, l, n;
255
256 l = 0;
257 for (i = 0; i < num_pop; ++i)
258 l += in[i].num_rows();
259 n = in[0].num_columns();
260
261 EST_FMatrix msum(l, n);
262
263 for (k = l = 0; k < num_pop; ++k)
264 for (j =0; j < n; ++j, ++l)
265 for (i = 0; i < in[i].num_rows(); ++i)
266 msum(l, j) = in[k](i, j);
267
268 return msum;
269}
270
INLINE int n() const
number of items in vector.