Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
tlcompile.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1999 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Alan W Black */
34/* Date : September 1999 */
35/*-----------------------------------------------------------------------*/
36/* */
37/* Building tree lexicons from rules. This could almost be done by the */
38/* regular grammar compiler but for efficiently reasons we have a */
39/* specific compiler. As the ends of the trees are never minimized */
40/* Actually it will take full context free grammars and convert them */
41/* up to a specified rewrite depth */
42/* */
43/* Based loosely on "Finite State Machines from Features Grammars" by */
44/* Black, International Workshop of Parsing Technologies, CMU 89 */
45/* */
46/*=======================================================================*/
47#include <iostream>
48#include "EST_cutils.h"
49#include "EST_THash.h"
50#include "EST_WFST.h"
51
52static LISP tl_find_l_w(LISP rules);
53static int production_index(LISP state,
55 int proposed);
56
57void tlcompile(LISP tl, EST_WFST &all_wfst)
58{
59 // Build a transducer from given tree-lexicon.
61// LISP sets=siod_nth(2,tl);
62 LISP rules=siod_nth(3,tl);
63
64 l_w = tl_find_l_w(rules);
65 letters = car(l_w); // or phones or "bits" or whatever
66 words = cdr(l_w); // the things you are indexing
67
68 all_wfst.build_tree_lex(letters,words,rules);
69
70}
71
72static LISP tl_find_l_w(LISP rules)
73{
74 // Find the alphabets used in the rules
75 LISP letters=NIL,words=NIL,r,s;
76
77 for (r=rules; r != NIL; r=cdr(r))
78 {
79 for (s = car(r); s != NIL; s=cdr(s))
80 {
81 if (streq("->",get_c_string(car(s))))
82 {
83 if (!siod_member_str(get_c_string(car(cdr(s))),words))
84 words = cons(car(cdr(s)),words);
85 break;
86 }
87 else if (!siod_member_str(get_c_string(car(s)),letters))
88 {
89 letters = cons(car(s),letters);
90 }
91 }
92 }
93
94 return cons(letters,words);
95}
96
97
98void EST_WFST::build_tree_lex(LISP inalpha, LISP outalpha,
99 LISP wlist)
100{
101 // Build a determinized tree lexicon for given list
102 LISP w,l;
103 int cs,ns;
104 EST_WFST_Transition *trans;
105 EST_TStringHash<int> index(100);
106 int fff;
107
108 clear();
110 int i_epsilon = in_epsilon();
111 int o_epsilon = out_epsilon();
112 float weight;
113
114 // Create a starting state and add it to this WFST
115 p_start_state = add_state(wfst_nonfinal);
116 fff = add_state(wfst_final);
117
118 for (w=wlist; w; w=cdr(w))
119 {
120// lprint(car(w));
121 weight = get_c_float(car(siod_last(car(w))));
122 for (cs=p_start_state,l=car(w); l; l=cdr(l))
123 {
124 if (streq("->",get_c_string(car(l))))
125 {
126 // reached word
128 p_out_symbols.name(get_c_string(car(cdr(l)))));
129 if (trans == 0)
130 {
131 p_states[cs]
132 ->add_transition(weight,
134 p_out_symbols.name(get_c_string(car(cdr(l)))));
135 }
136// else // duplicate word
137// {
138// cerr << "WFST: tlcompile, duplicate word ignored\n";
139// }
140 break;
141 }
142 else
143 {
144 trans = find_transition(cs,
145 p_in_symbols.name(get_c_string(car(l))),
146 o_epsilon);
147 if (trans == 0)
148 {
149 ns = production_index(cdr(l),index,p_num_states);
150 if (ns == p_num_states)
151 ns = add_state(wfst_nonfinal);
152 p_states[cs]
153 ->add_transition(weight,
154 ns,
155 p_in_symbols.name(get_c_string(car(l))),
156 o_epsilon);
157 }
158 else // increment count
159 {
160 ns = trans->state();
161 trans->set_weight(trans->weight()+weight);
162 }
163 }
164 cs = ns;
165 }
166 }
167
168 // normalized transition weights into probabilities
170}
171
172static int production_index(LISP state,
174 int proposed)
175{
176 // Returns proposed if ms is not already in index, otherwise
177 // returns the value that was proposed when it was first new.
178
179 // I'll have to make this more efficient in future.
181 LISP p;
182 int ns,found;
183
184 for (p=state; p != NIL; p = cdr(p))
185 istring += EST_String(get_c_string(car(p))) + " ";
186
187 ns = index.val(istring,found);
188 if (found)
189 return ns;
190 else
191 {
192 index.add_item(istring,proposed);
193 return proposed;
194 }
195}
const EST_String & name(const int n) const
The name given the index.
int add_state(enum wfst_state_type state_type)
Add a new state, returns new name.
Definition EST_WFST.cc:652
EST_WFST_Transition * find_transition(int state, int in, int out) const
Find (first) transition given in and out symbols.
Definition EST_WFST.cc:267
void init(int init_num_states=10)
Clear with (estimation of number of states required)
Definition EST_WFST.cc:145
void clear()
clear removing existing states if any
Definition EST_WFST.cc:115
int out_epsilon() const
Internal index for output epsilon.
Definition EST_WFST.h:220
int in_epsilon() const
Internal index for input epsilon.
Definition EST_WFST.h:218
void stop_cumulate()
Stop cumulation and calculate probabilities on transitions.
Definition EST_WFST.cc:685