Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
EST_FeatureData.cc
1/************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1996,1997 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/************************************************************************/
33/* */
34/* Author: Paul Taylor Caley */
35/* Date: July 1998 */
36/* -------------------------------------------------------------------- */
37/* Feature Data Class */
38/* */
39/************************************************************************/
40
41#include "EST_TMatrix.h"
42#include "EST_Val.h"
43#include "EST_FeatureData.h"
44#include "EST_string_aux.h"
45#include "EST_Token.h"
46#include "EST_FileType.h"
47#include "EST_error.h"
48#include <iostream>
49#include <fstream>
50
51#include "EST_THash.h"
52
53
54EST_FeatureData::EST_FeatureData()
55{
56 default_vals();
57}
58
59
60
61EST_FeatureData::EST_FeatureData(const EST_FeatureData &a)
62{
63 default_vals();
64 copy(a);
65}
66
67EST_FeatureData::~EST_FeatureData(void)
68{
69}
70
71int EST_FeatureData::num_samples() const
72{
73 return fd.num_rows();
74}
75
76int EST_FeatureData::num_features() const
77{
78 return fd.num_columns();
79}
80
81
82void EST_FeatureData::default_vals()
83{
84/* cout << "Default values\n";
85 p_sub_fd = false;
86 p_info = new EST_FeatureInfo;
87*/
88}
89
90void EST_FeatureData::set_num_samples(int num_samples, bool preserve)
91{
92 fd.resize(num_samples, fd.num_columns(), preserve);
93}
94
95void EST_FeatureData::resize(int num_samples, int num_features, bool preserve)
96{
97 // If enlargement is required, give new features dummy names
98 // and set their types to <STRING>. If preserve is set to 0
99 // rename all features this way.
100
101 if (num_features > fd.num_columns())
102 {
103 int i;
104 if (preserve)
105 i = fd.num_columns();
106 else
107 i = 0;
108 for (; i < num_features; ++i)
109 info.set("unnamed_" + itoString(i), "<STRING>");
110 }
111
112 fd.resize(num_samples, num_features, preserve);
113}
114
115void EST_FeatureData::resize(int num_samples, EST_Features &f, bool preserve)
116{
117 fd.resize(num_samples, f.length(), preserve);
118 info = f;
119}
120
121EST_String EST_FeatureData::type(const EST_String &feature_name)
122{
123 EST_String t = info.S(feature_name);
124
125 if (t.contains("<", 0)) // i.e. a predefined type
126 return t;
127
128 return "undef";
129}
130
131EST_StrList EST_FeatureData::values(const EST_String &feature_name)
132{
133 EST_StrList v;
134 EST_String t = info.S(feature_name);
135
136 // check for infinite set:
137 if ((t == "<FLOAT>") || (t == "<INT>") || (t == "<STRING>"))
138 return v;
139
140 StringtoStrList(t, v);
141 return v;
142}
143
144int EST_FeatureData::feature_position(const EST_String &feature_name)
145{
146 int i;
147
149
150 for (i = 0, p.begin(info); p; ++p, ++i)
151 {
152// cout << "looking at " << info.fname(p) << endl;
153// cout << "i = " << i << endl;
154 if (p->k == feature_name)
155 return i;
156 }
157
158 EST_error("No such feature %s\n", (const char *) feature_name);
159 return 0;
160}
161
162int EST_FeatureData::update_values(const EST_String &feature_name, int max)
163{
164 // This should be converted back to Hash tables once extra
165 // iteration functions are added the EST_Hash.
166 int i, col;
167 EST_Features values;
168 EST_String v;
169
170// EST_TStringHash<int> values(max);
171
172 col = feature_position(feature_name);
173
174 for (i = 0; i < num_samples(); ++i)
175 values.set(fd.a(i, col).string(), 1);
176
177 // check to see if there are more types than allowed, if so
178 // just set to open set STRING
179 if (values.length() > max)
180 v = "<STRING>";
181 else
182 {
184 for(p.begin(values); p; ++p)
185 v += p->k + " ";
186 }
187
188 info.set(feature_name, v);
189
190 return values.length();
191}
192
193EST_FeatureData & EST_FeatureData::copy(const EST_FeatureData &a)
194{
195 (void) a;
196/* // copy on a sub can't alter header information
197 if (!p_sub_fd)
198 {
199 delete p_info;
200 *p_info = *(a.p_info);
201 }
202 // but data can be copied so long as no resizing is involved.
203 EST_ValMatrix::operator=(a);
204*/
205 return *this;
206}
207
208/*void EST_FeatureData::a(int i, int j)
209{
210 return EST_ValMatrix::a(i, j);
211}
212*/
213/*
214EST_Val &EST_FeatureData::operator()(int i, int j)
215{
216 return a(i, j);
217}
218
219EST_Val &EST_FeatureData::operator()(int s, const EST_String &f)
220{
221 int i = info().field_index(f);
222 return a(s, i);
223}
224
225EST_FeatureData &EST_FeatureData::operator=(const EST_FeatureData &f)
226{
227 return copy(f);
228}
229
230*/
231EST_Val &EST_FeatureData::a(int i, const EST_String &f)
232{
233 (void)f;
234 return fd.a(i, 0);
235}
236
237EST_Val &EST_FeatureData::a(int i, int j)
238{
239 return fd.a(i, j);
240}
241const EST_Val &EST_FeatureData::a(int i, const EST_String &f) const
242{
243 (void)f;
244 return fd.a(i, 0);
245}
246
247const EST_Val &EST_FeatureData::a(int i, int j) const
248{
249 return fd.a(i, j);
250}
251
252
253/*
254void EST_FeatureData::sub_samples(EST_FeatureData &f, int start, int num)
255{
256 sub_matrix(f, start, num);
257 f.p_info = p_info;
258 f.p_sub_fd = true;
259}
260
261void EST_FeatureData::extract_named_fields(const EST_String &fields)
262{
263 EST_FeatureData n;
264 // there must be a more efficient way than a copy?
265 extract_named_fields(n, fields);
266 *this = n;
267}
268
269void EST_FeatureData::extract_named_fields(const EST_StrList &fields)
270{
271 EST_FeatureData n;
272 // there must be a more efficient way than a copy?
273 extract_named_fields(n, fields);
274 *this = n;
275}
276
277void EST_FeatureData::extract_numbered_fields(const EST_String &fields)
278{
279 EST_FeatureData n;
280 // there must be a more efficient way than a copy?
281 extract_numbered_fields(n, fields);
282 *this = n;
283}
284
285void EST_FeatureData::extract_numbered_fields(const EST_IList &fields)
286{
287 EST_FeatureData n;
288 // there must be a more efficient way than a copy?
289 extract_numbered_fields(n, fields);
290 *this = n;
291}
292
293
294void EST_FeatureData::extract_named_fields(EST_FeatureData &f,
295 const EST_String &fields) const
296{
297 EST_StrList s;
298
299 StringtoStrList(fields, s);
300 extract_named_fields(f, s);
301}
302void EST_FeatureData::extract_named_fields(EST_FeatureData &f,
303 const EST_StrList &n_fields) const
304{
305 EST_Litem *p;
306 EST_StrList n_types;
307 int i, j;
308
309 info().extract_named_fields(*(f.p_info), n_fields);
310
311 for (p = n_fields.head(), i = 0; i < f.num_fields(); ++i, p = p->next())
312 for (j = 0; j < f.num_samples(); ++j)
313 f(j, i) = a(j, n_fields(p));
314
315}
316
317void EST_FeatureData::extract_numbered_fields(EST_FeatureData &f,
318 const EST_IList &fields) const
319{
320 EST_Litem *p;
321 EST_StrList n_fields;
322 int i, j;
323
324 for (p = fields.head(); p; p = p->next())
325 n_fields.append(info().field_name(fields(p)));
326
327 info().extract_named_fields(*(f.p_info), n_fields);
328
329 for (p = fields.head(), i = 0; i < f.num_fields(); ++i, p = p->next())
330 for (j = 0; j < f.num_samples(); ++j)
331 f(j, i) = a(j, fields(p));
332
333}
334
335void EST_FeatureData::extract_numbered_fields(EST_FeatureData &f,
336 const EST_String &fields) const
337{
338 EST_StrList s;
339 EST_IList il;
340
341 StringtoStrList(fields, s);
342 StrListtoIList(s, il);
343 extract_numbered_fields(f, il);
344}
345*/
346
347EST_write_status save_est(const EST_FeatureData &f, const EST_String &filename)
348{
349 (void)f;
350 (void)filename;
351/*
352 ostream *outf;
353 EST_Litem *s, *e;
354 int i;
355 if (filename == "-")
356 outf = &cout;
357 else
358 outf = new ofstream(filename);
359
360 if (!(*outf))
361 return write_fail;
362
363 outf->precision(5);
364 outf->setf(ios::scientific, ios::floatfield);
365 outf->width(8);
366
367 *outf << "EST_File feature_data\n"; // EST header identifier
368 *outf << "DataType ascii\n";
369 *outf << "NumSamples " << f.num_samples() << endl;
370 *outf << "NumFields " << f.num_fields() << endl;
371 *outf << "FieldNames " << f.info().field_names();
372 *outf << "FieldTypes " << f.info().field_types();
373 if (f.info().group_start.length() > 0)
374 for (s = f.info().group_start.head(), e = f.info().group_end.head();
375 s; s = s->next(), e = e->next())
376 *outf << "Group " << f.info().group_start.key(s) << " " <<
377 f.info().group_start.val(s) << " " << f.info().group_end.val(e) << endl;
378
379 for (i = 0; i < f.num_fields(); ++i)
380 if (f.info().field_values(i).length() > 0)
381 *outf << "Field_" << i << "_Values "
382 << f.info().field_values(i) << endl;
383
384 *outf << "EST_Header_End\n"; // EST end of header identifier
385
386// *outf << ((EST_ValMatrix ) f);
387 *outf << f;
388 */
389
390 return write_ok;
391}
392
393
394EST_write_status EST_FeatureData::save(const EST_String &filename,
395 const EST_String &file_type) const
396{
397 if ((file_type == "est") || (file_type == ""))
398 return save_est(*this, filename);
399/* else if (file_type = "octave")
400 return save_octave(*this, filename);
401 else if (file_type = "ascii")
402 return save_ascii(*this, filename);
403*/
404
405 cerr << "Can't save feature data in format \"" << file_type << endl;
406 return write_fail;
407}
408
409
410
411EST_read_status EST_FeatureData::load(const EST_String &filename)
412{
413 int i, j;
414 EST_Option hinfo;
415 EST_String k, v;
416 EST_read_status r;
417 bool ascii;
419 EST_EstFileType t;
420 int ns, nf;
421
422 if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
423 {
424 cerr << "Can't open track file " << filename << endl;
425 return misc_read_error;
426 }
427 // set up the character constant values for this stream
428 ts.set_SingleCharSymbols(";");
429 ts.set_quotes('"','\\');
430
431 if ((r = read_est_header(ts, hinfo, ascii, t)) != format_ok)
432 {
433 cerr << "Error reading est header of file " << filename << endl;
434 return r;
435 }
436
437 if (t != est_file_feature_data)
438 {
439 cerr << "Not a EST Feature Data file: " << filename << endl;
440 return misc_read_error;
441 }
442
443 ns = hinfo.ival("NumSamples");
444 nf = hinfo.ival("NumFeatures");
445
446 cout << "ns: " << ns << endl;
447 cout << "nf: " << nf << endl;
448 resize(ns, nf);
449
450 info.clear(); // because resize will make default names
451
452 for (i = 0; i < nf; ++i)
453 {
454 k = "Feature_" + itoString(i+1);
455 if (hinfo.present(k))
456 {
457 v = hinfo.val(k);
458 info.set(v.before(" "), v.after(" "));
459 cout << "value: " << v.after(" ") << endl;
460 }
461 else
462 EST_error("No feature definition given for feature %d\n", i);
463 }
464
465 for (i = 0; i < ns; ++i)
466 {
468 for (p.begin(info), j = 0; j < nf; ++j, ++p)
469 {
470 if (p->k == "<FLOAT>")
471 a(i, j) = atof(ts.get().string());
472 else if (p->k == "<BOOL>")
473 a(i, j) = atoi(ts.get().string());
474 else if (p->k == "<INT>")
475 a(i, j) = atoi(ts.get().string());
476 else
477 a(i, j) = ts.get().string();
478 }
479 }
480
481 return format_ok;
482}
483
484/*ostream& operator << (ostream &st, const EST_FeatureInfo &a)
485{
486
487// st << a.field_names() << endl;
488// st << a.field_types() << endl;
489
490 return st;
491}
492*/
493
494ostream& operator << (ostream &st, const EST_FeatureData &d)
495{
496 int i, j;
497 EST_String t;
498 EST_Val v;
499
500// st << a;
501
502// EST_ValMatrix::operator<<(st, (EST_ValMatrix)a);
503
504 for (i = 0; i < d.num_samples(); ++i)
505 {
506 for (j = 0; j < d.num_features(); ++j)
507 {
508 v = d.a(i, j);
509 st << v << " ";
510// cout << "field type " << a.info().field_type(j) << endl;
511/* else if (a.info().field_type(j) == "float")
512 st << a.a(i, j);
513 else if (a.info().field_type(j) == "int")
514 st << a.a(i, j);
515
516 else if (a.info().field_type(j) == "string")
517 {
518 // st << "\"" << a.a(i, j) << "\"";
519 t = a.a(i, j);
520 t.gsub(" ", "_");
521 st << t;
522 }
523*/
524 }
525 st << endl;
526 }
527
528 return st;
529}
void set(const EST_String &name, int ival)
const EST_String S(const EST_String &path) const
int ival(const EST_String &rkey, int m=1) const
Definition EST_Option.cc:76
EST_String before(int pos, int len=0) const
Part before position.
Definition EST_String.h:286
int length(void) const
Length of string ({not} length of underlying chunk)
Definition EST_String.h:241
int contains(const char *s, int pos=-1) const
Does it contain this substring?
Definition EST_String.h:375
EST_String after(int pos, int len=1) const
Part after pos+len.
Definition EST_String.h:318
void begin(const Container &over)
Set the iterator ready to run over this container.
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition EST_TKVL.cc:145
const int present(const K &rkey) const
Returns true if key is present.
Definition EST_TKVL.cc:222
int num_columns() const
return number of columns
int num_rows() const
return number of rows
void resize(int rows, int cols, int set=1)
void set_SingleCharSymbols(const EST_String &sc)
set which characters are to be treated as single character symbols
Definition EST_Token.h:338
void set_quotes(char q, char e)
set characters to be used as quotes and escape, and set quote mode
Definition EST_Token.h:347
int open(const EST_String &filename)
open a \Ref{EST_TokenStream} for a file.
Definition EST_Token.cc:200
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition EST_Token.cc:486
const EST_String & string(void) const
Definition EST_Val.h:150