Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
EST_lattice_io.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1995,1996 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Simon King */
34/* Date : November 1996 */
35/*-----------------------------------------------------------------------*/
36/* Lattice/Finite State Network i/o functions */
37/* */
38/*=======================================================================*/
39
40#include <fstream>
41#include <cstdlib>
42#include "EST_lattice.h"
43#include "EST_types.h"
44#include "EST_Token.h"
45#include "EST_StringTrie.h"
46
47bool save(Lattice &lattice, EST_String filename)
48{
51 int acount=0,ncount=0;
52 int i,from,to;
53 Lattice::symbol_t *symbol;
55
56 if (filename == "-")
57 outf = &cout;
58 else
59 outf = new ofstream(filename);
60
61 if (!(*outf))
62 {
63 cerr << "lattice save: can't open lattice output file \""
64 << filename << "\"" << endl;
65 return false;
66 }
67
68 // count
69 for (n_ptr = lattice.nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
70 ncount++;
71 for (a_ptr = lattice.nodes(n_ptr)->arcs_out.head();
72 a_ptr != 0; a_ptr = a_ptr->next())
73 acount++;
74 }
75
76 // size line (duh!)
77 *outf << "# " << "Generated by Edinburgh Speech Tools" << endl << "#" << endl;
78 *outf << "# Header" << endl;
79 *outf << "VERSION=1.1" << endl << "#" << endl;
80 *outf << "# Size line" << endl;
81 *outf << "N=" << ncount << " L=" << acount << endl;
82 *outf << "#" << endl;
83
84
85 // to do : for HTK name nodes, not arcs
86 // since all arcs in have same label
87
88 // nodes
89 for(i=0;i<=1;i++){
90
91 if(i==0)
92 *outf << "# Nodes" << endl;
93 else
94 *outf << "# Arcs" << endl;
95
96 ncount=0;
97 acount=0;
98
99 for (n_ptr = lattice.nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
100
101 from=lattice.node_index(lattice.nodes(n_ptr));
102
103 if(i==0){
104 *outf << "I=" << from << endl;
105
106 }else
107 for (a_ptr = lattice.nodes(n_ptr)->arcs_out.head();
108 a_ptr != 0; a_ptr = a_ptr->next()){
109
110 to = lattice.node_index(lattice.nodes(n_ptr)->arcs_out(a_ptr)->to);
111
112 symbol = lattice.alphabet_index_to_symbol(lattice.nodes(n_ptr)->arcs_out(a_ptr)->label);
113
114 if(lattice.nodes(n_ptr)->arcs_out(a_ptr)->label == lattice.e_move_symbol_index){
115 *outf << "J=" << acount++ << " S=" << from << " E=" << to
116 << " W=!NULL" << endl;
117
118 }else{
119 *outf << "J=" << acount++ << " S=" << from << " E=" << to
120 << " l=" << lattice.qmap_index_to_value(symbol->qmap_index)
121 << " W=" << lattice.nmap_index_to_name(symbol->nmap_index)
122 << endl;
123 }
124 }
125 }
126 }
127
128 return true;
129}
130
131bool
132load(Lattice &lattice,EST_String filename)
133{
134
135 EST_String name, next_token, str;
137 EST_Token t;
138 int i,j;
139 // temporary storage
140 struct arc_t{
141 int start;
142 int end;
143 float logprob;
145 };
146
147 arc_t *temp_arcs = NULL;
148 int arcindex=0;
149 int nodeindex=0;
150
151 // nodes can have labels too - but this is not yet supported
152
153
154
155 if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
156 {
157 cerr << "Can't open lattice input file " << filename << endl;
158 return false;
159 }
160
161 // read file into a arrays, make alphabet, then make lattice
162
163 // find 'size' line
164
165 int numnodes=0;
166 int numarcs=0;
167
168 int narcs=-1;
169 int nnodes=-1;
170 int nodenum=-1;
171 int arcnum=-1;
172 int startnode=-1;
173 int endnode=-1;
174 float logprob = 0.0;
175 EST_String word="";
176
177 while(!ts.eof())
178 {
179
180 str = ts.get().string();
181
182 if(!str.contains("="))
183 continue;
184
185 EST_String left=str.before("=");
186 EST_String right=str.after("=");
187
188 if(left == "N")
189 nnodes=atoi(right);
190 else if(left == "L")
191 narcs=atoi(right);
192 else if(left == "I")
193 nodenum=atoi(right);
194 else if(left == "J")
195 arcnum=atoi(right);
196 else if(left == "S")
197 startnode=atoi(right);
198 else if(left == "E")
199 endnode=atoi(right);
200 else if(left == "l")
201 logprob=atof(right);
202 else if(left == "W")
203 word=right;
204
205
206 if(ts.eoln()){
207
208 // do something
209
210 if( (narcs>0) && (nnodes>0) ){
211 // it's the size line
212 if(temp_arcs != NULL){
213 cerr << "Error in lattice file : 2 size lines found"
214 << " : line " << ts.linenum() << endl;
215 ts.close();
216 return false;
217 }else{
220 temp_arcs = new arc_t[numarcs];
221 cerr << "size : " << numnodes << " " << numarcs << endl;
222 }
223
224 }else if(nodenum>=0){
225
226 if(arcnum>0){
227 cerr << "Error in lattice file at line "
228 << ts.linenum() << endl;
229 ts.close();
230 return false;
231 }
232
233 if(nodenum>=numnodes){
234 cerr << "Error in lattice file at line "
235 << ts.linenum()
236 << " : node index (" << nodenum << ") out of range"
237 << endl;
238 ts.close();
239 return false;
240 }
241
242 nodeindex++;
243
244 }else if(arcnum>=0){
245 if(nodenum>0){
246 cerr << "Error in lattice file at line "
247 << ts.linenum() << endl;
248 ts.close();
249 return false;
250 }
251
252 if(arcnum>=numarcs){
253 cerr << "Error in lattice file at line "
254 << ts.linenum()
255 << " : arc index (" << arcnum << ") out of range"
256 << endl;
257 ts.close();
258 return false;
259 }
260
261 if((startnode<0) || (startnode>=numnodes)){
262 cerr << "Error in lattice file at line " << ts.linenum()
263 << endl
264 << " arc starts at out of range node "
265 << startnode << endl;
266 return false;
267 }
268
269 if((endnode<0) || (endnode>=numnodes)){
270 cerr << "Error in lattice file at line " << ts.linenum()
271 << endl
272 << " arc ends at out of range node "
273 << endnode << endl;
274 return false;
275 }
276
277 // make arc
280 temp_arcs[arcindex].logprob = logprob;
281 temp_arcs[arcindex].word = word;
282 arcindex++;
283
284 }
285
286 narcs=-1;
287 nnodes=-1;
288 nodenum=-1;
289 arcnum=-1;
290 startnode=-1;
291 endnode=-1;
292 logprob=-1;
293 word="";
294
295 }
296
297 }
298
299 if(arcindex != numarcs){
300 cerr << "Error in lattice file at line "
301 << "found " << arcindex << " arcs, but expected "
302 << numarcs << endl;
303 return false;
304 }
305
306 if(nodeindex != numnodes){
307 cerr << "Error in lattice file at line "
308 << "found " << nodeindex << " nodes, but expected "
309 << numnodes << endl;
310 return false;
311 }
312
313
314 // make nmap
317
318 for(i=0;i<numarcs;i++){
319
320 if(seen_before.lookup(temp_arcs[i].word) != (void *)(1)){
321 seen_before.add(temp_arcs[i].word,(void *)(1));
322 list_nmap.append(temp_arcs[i].word);
323 }
324 }
325 qsort(list_nmap);
326
327 //cerr << "here is the list nmap" << list_nmap << endl;
328
329 i=0;
331 bool flag;
332 for(l_ptr=list_nmap.head();l_ptr != 0; l_ptr=l_ptr->next())
333 i++;
334
335 // transfer to array
336 lattice.nmap.resize(i);
337 i=0;
338 for(l_ptr=list_nmap.head();l_ptr != 0; l_ptr=l_ptr->next())
339 lattice.nmap[i++] = list_nmap(l_ptr);
340
341 list_nmap.clear();
342 cerr << "Built nmap with " << i << " entries" << endl;
343
344
345 // make qmap
346 // should be a separate fn
347
349
350 float error_margin = 1.0e-02; // temporary hack
351
352 for(i=0;i<numarcs;i++){
353
354 flag = false;
355 for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
357 flag = true;
358 break;
359 }
360
361 if(!flag)
362 list_qmap.append(temp_arcs[i].logprob);
363
364 }
365
366 // special zero (within error_margin) entry, if not already there
367 flag = false;
368 for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
370 flag = true;
371 break;
372 }
373
374 if(!flag)
375 list_qmap.append(0);
376
377 qsort(list_qmap);
378
379 i=0;
380 for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
381 i++;
382
383 // transfer to array
384 lattice.qmap.resize(i);
385 i=0;
386 for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
387 lattice.qmap[i++] = list_qmap(l_ptr);
388
389 list_qmap.clear();
390 cerr << "Built qmap with " << i << " entries" << endl;
391
392
393 // make alphabet
394
395 // temporary list
396 bool **used; // index nmap,qmap
397 int nl = lattice.nmap.n();
398 int ql = lattice.qmap.n();
399 used = new bool*[nl];
400 for(i=0;i<nl;i++)
401 used[i] = new bool[ql];
402
403 for(i=0;i<nl;i++)
404 for(j=0;j<ql;j++)
405 used[i][j] = false;
406
407 // get all combinations of word and log probability actually used
408 for(i=0;i<numarcs;i++){
409
410 //cerr << "arc " << i << " " << temp_arcs[i].logprob
411 //<< " " << temp_arcs[i].word << endl;
412
413 used[lattice.nmap_name_to_index(temp_arcs[i].word)][lattice.qmap_value_to_index(temp_arcs[i].logprob)] = true;
414 }
415
416 int count = 0;
417 for(i=0;i<nl;i++)
418 for(j=0;j<ql;j++)
419 if(used[i][j])
420 count++;
421
422 lattice.alphabet.resize(count);
423 count=0;
425 // ordered this way so already sorted, first by q then by n
426 for(j=0;j<ql;j++)
427 for(i=0;i<nl;i++)
428 if(used[i][j]){
430 sym->nmap_index=i;
431 sym->qmap_index=j;
432 lattice.alphabet[count++] = *sym;
433
434 }
435
436 cerr << "Alphabet has " << count << " symbols " << endl;
437
438 // make lattice itself
439
440 // nodes
441 for(i=0;i<numnodes;i++){
444 lattice.nodes.append(new_node);
445
446 }
447
448 // arcs
450 for(j=0;j<numarcs;j++){
451
452 // find from and to nodes by counting down node list
453
454 // from node
455
456 for (n_ptr =lattice. nodes.head(),count=0;
457 count<temp_arcs[j].start;
458 n_ptr = n_ptr->next(),count++){
459
460 if(n_ptr == NULL){
461 cerr << "Couldn't find 'from' node ";
462 return false;
463 }
464 }
466
467 // double check
468 if(from_node == NULL){
469 cerr << "Couldn't find from node, index "
470 << temp_arcs[j].start << endl;
471 return false;
472 }
473
474
475 for (n_ptr = lattice.nodes.head(),count=0;
476 count<temp_arcs[j].end;
477 n_ptr = n_ptr->next(),count++){
478
479 if(n_ptr == NULL){
480 cerr << "Couldn't find 'to' node ";
481 return false;
482 }
483 }
485
486 // double check
487 if(to_node == NULL){
488 cerr << "Couldn't find to node, index "
489 << temp_arcs[j].end << endl;
490 return false;
491 }
492
493
494 int word_index = lattice.nmap_name_to_index(temp_arcs[j].word);
495
496 // get arc symbol
497 int symbol = lattice.alphabet_index_lookup(word_index,
498 lattice.qmap_value_to_index(temp_arcs[j].logprob));
499 if(symbol < 0){
500 cerr << "Couldn't lookup symbol in alphabet !" << endl;
501 return false;
502 }
503
505 new_arc->label = symbol;
506 new_arc->to = to_node;
507
508 if(to_node->name.head() == NULL)
509 to_node->name.append(word_index); // only name of first arc in .. !
510
511 from_node->arcs_out.append(new_arc);
512
513 }
514
515 // find final nodes
516 for (n_ptr = lattice.nodes.head(),count=0;
517 n_ptr!= NULL;
518 n_ptr = n_ptr->next()){
519
520 if(lattice.nodes(n_ptr)->arcs_out.head() == NULL){
521 lattice.final_nodes.append(lattice.nodes(n_ptr));
522 count++;
523 }
524 }
525
526 cerr << "found " << count << " final nodes" << endl;
527
528
529
530 cerr << "Lattice loaded !" << endl;
531
532 delete [] used;
533 delete [] temp_arcs;
534
535 return true;
536}
537
EST_String before(int pos, int len=0) const
Part before position.
Definition EST_String.h:286
int contains(const char *s, int pos=-1) const
Does it contain this substring?
Definition EST_String.h:375
EST_String after(int pos, int len=1) const
Part after pos+len.
Definition EST_String.h:318