Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
ltscompile.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1996 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Alan W Black */
34/* Date : December 1997 */
35/*-----------------------------------------------------------------------*/
36/* */
37/* A LTS rule compiler, where rules are contextual rewrite rules. Rules */
38/* are of the for LC [ x ] RC => y where LC and RC are regexs on the */
39/* tape only and x and y are simple strings on symbols. That is the */
40/* standard form of LTS rules used in Festival. */
41/* */
42/*=======================================================================*/
43#include <iostream>
44#include "EST_cutils.h"
45#include "EST_WFST.h"
46
47static LISP lts_find_feasible_pairs(LISP rules);
48static LISP make_fp(LISP in, LISP out);
49static LISP find_outs(LISP rule);
50static LISP find_ins(LISP rule);
51static LISP add_alpha(LISP n, LISP existing);
52static LISP lts_find_alphabets(LISP rules);
53static void ltsrule_compile(LISP inalpha, LISP outalpha,
54 LISP fp, LISP sets, LISP rule,
56static LISP analyse_rule(LISP rule);
57static LISP expand_sets(LISP l, LISP fp, LISP sets);
58static LISP expand_set(LISP p, LISP fp, LISP sets);
59static LISP find_notMAP(LISP MAP,LISP fp);
60
61void ltscompile(LISP lts_rules, EST_WFST &all_wfst)
62{
63 // Build a transducer from given LTS rules. Because the interpretation
64 // of these rules is normally ordered and the WFST is not, the
65 // complement of each cumulative WFST must be generated before
66 // adding the next rule
67 LISP r;
68 LISP fp; // feasible pairs, those pairs with rules (rather than IxO)
70 LISP sets=siod_nth(2,lts_rules);
71 LISP rules=siod_nth(3,lts_rules);
73
74 alphas = lts_find_alphabets(lts_rules);
75 inalpha = car(alphas);
76 outalpha = cdr(alphas);
77
78 fp = lts_find_feasible_pairs(rules);
79
80 // set up an empty WFST, accepts nothing
81 all_wfst.build_from_regex(inalpha,outalpha,NIL);
82 // matches things not matched by rules, everything to begin with
83 nots.build_from_regex(inalpha,outalpha,
84 cons(rintern("*"),
85 cons(cons(rintern("or"),fp),NIL)));
86 nots.save("-");
87
88 for (r=rules; r != NIL; r=cdr(r))
89 {
90 EST_WFST a, not_a,b,c,d;
91
92 // all = all u (all' n r)
93 ltsrule_compile(inalpha,outalpha,fp,sets,car(r),a,not_a);
94 pprint(car(r));
95 a.save("-");
96 c.intersection(a,nots);
97 c.save("-");
98
99 // Add for next rule
100 b.uunion(nots,not_a);
101 not_a.save("-");
102 b.save("-");
103 nots = b;
104
105 d.uunion(all_wfst,c);
106 all_wfst = d;
107 all_wfst.save("-");
108 }
109}
110
111static LISP lts_find_alphabets(LISP rules)
112{
113 // Find the alphabets used in the rules
114 LISP r;
115 LISP in=NIL, out=NIL;
116
117 for (r=siod_nth(3,rules); r != NIL; r=cdr(r))
118 {
119 in = add_alpha(find_ins(car(r)),in);
120 out = add_alpha(find_outs(car(r)),out);
121 }
122
123 return cons(in,out);
124}
125
126static LISP add_alpha(LISP n, LISP existing)
127{
128 // Add values in n if not already in existing
129 LISP t;
131
132 for (t=n; t != NIL; t=cdr(t))
133 if (!siod_member_str(get_c_string(car(t)),e))
134 e = cons(car(t),e);
135
136 return e;
137}
138
139static LISP find_ins(LISP rule)
140{
141 // find all symbols in [] in rule
142 LISP c;
143 int state=FALSE;
144 LISP ins = NIL;
145
146 for (c=rule; c != NIL; c=cdr(c))
147 {
148 if (streq("[",get_c_string(car(c))))
149 state=TRUE;
150 else if (streq("]",get_c_string(car(c))))
151 break;
152 else if (state)
153 ins = cons(car(c),ins);
154 }
155 return reverse(ins);
156}
157
158static LISP find_outs(LISP rule)
159{
160 // find all symbols after = rule
161 LISP c;
162 int state=FALSE;
163 LISP outs = NIL;
164
165 for (c=rule; c != NIL; c=cdr(c))
166 {
167 if (streq("=",get_c_string(car(c))))
168 state=TRUE;
169 else if (state)
170 outs = cons(car(c),outs);
171 }
172 return reverse(outs);
173}
174
175static LISP lts_find_feasible_pairs(LISP rules)
176{
177 // Find the set of pairs that have rules associated with them
178 // This effectively defines the transducer alphabet.
179 // We take the surface part in [] and the part after the = and
180 // linearly match them to form a set of pairs, padded with epsilon
181 // if necessary.
182 LISP fp = NIL;
183 LISP r;
184
185 for (r=rules; r != NIL; r=cdr(r))
186 {
187 LISP in = find_ins(car(r));
188 LISP out = find_outs(car(r));
189
190 LISP pairs = make_fp(in,out);
191
192 for (LISP p=pairs; p != NIL; p=cdr(p))
193 {
194 if (!siod_member_str(get_c_string(car(p)),fp))
195 fp = cons(car(p),fp);
196 }
197 }
198 return fp;
199}
200
201static LISP make_fp(LISP in, LISP out)
202{
203 // Returns a list of pairs by matching each member of in to out
204 // padding the shorted one with _epsilon_ if necessary
205 LISP i,o;
206 LISP fp=NIL;
207 EST_String is,os;
208 int m;
209
210 if (siod_llength(in) > siod_llength(out))
211 m = siod_llength(in);
212 else
213 m = siod_llength(out);
214
215 for (i=in,o=out ; m > 0; --m,i=cdr(i),o=cdr(o))
216 {
217 if (i == NIL)
218 is = "__epsilon__";
219 else
220 is = get_c_string(car(i));
221 if (o == NIL)
222 os = "__epsilon__";
223 else
224 os = get_c_string(car(o));
225 fp = cons(strintern(is+"/"+os),fp);
226 }
227 return reverse(fp);
228}
229
230static void ltsrule_compile(LISP inalpha, LISP outalpha,
231 LISP fp, LISP sets, LISP rule,
233{
234 // Return two regexs, one matching with rewrites and another
235 // that matches things this rule doesn't match.
236 LISP LC,MAP,RC,notMAP,r;
237
238 r = analyse_rule(rule);
239 LC = siod_nth(0,r);
240 MAP = siod_nth(1,r);
241 RC = siod_nth(2,r);
242
243 LC = expand_sets(LC,fp,sets);
244 RC = expand_sets(RC,fp,sets);
245 notMAP = find_notMAP(MAP,fp);
246
247
248 LISP kk = cons(LC,cons(MAP,cons(RC,NIL)));
249 cout << "kk rule" << endl;
250 pprint(kk);
251 a.kkrule_compile(inalpha,outalpha,fp,kk,NIL);
252
253 // (or (* <fp>) (not <rule>)) ;; everything except the rule
254 LISP regex_r = cons(rintern("and"),append(LC,append(MAP,RC)));
255// LISP nn = cons(rintern("or"),
256// cons(cons(rintern("*"),cons(cons(rintern("or"),fp),NIL)),
257// cons(cons(rintern("not"),cons(regex_r,NIL)),
258// NIL)));
259 LISP nn = cons(rintern("not"),cons(regex_r,NIL));
260 not_a.build_from_regex(inalpha,outalpha,nn);
261
262}
263
264static LISP analyse_rule(LISP rule)
265{
266 // return the left context, map and right context;
267 LISP LC=NIL, RC=NIL, in=NIL, out=NIL;
268 LISP l;
269 int state=0;
270
271 for (l=rule; l != NIL; l=cdr(l))
272 {
273 if ((state==0) && (!streq("[",get_c_string(car(l)))))
274 LC = cons(car(l),LC);
275 else if ((state==0) && (streq("[",get_c_string(car(l)))))
276 state = 1;
277 else if ((state==1) && (!streq("]",get_c_string(car(l)))))
278 in = cons(car(l),in);
279 else if ((state==1) && (streq("]",get_c_string(car(l)))))
280 state = 2;
281 else if ((state==2) && (!streq("=",get_c_string(car(l)))))
282 RC = cons(car(l),RC);
283 else if ((state==2) && (streq("=",get_c_string(car(l)))))
284 state = 3;
285 else if (state == 3)
286 {
287 out = l;
288 break;
289 }
290 }
291
292 return cons(reverse(LC),
293 cons(make_fp(reverse(in),out),
294 cons(reverse(RC),NIL)));
295
296}
297
298static LISP expand_sets(LISP l, LISP fp, LISP sets)
299{
300 // Expand sets in l and fix regex characters
301 LISP r,es=NIL;
302
303 for (r=l; r != NIL; r=cdr(r))
304 {
305 LISP s = expand_set(car(r),fp,sets);
306 if (cdr(r) && (streq("*",get_c_string(car(cdr(r))))))
307 {
308 es = cons(cons(rintern("*"),s),es);
309 r=cdr(r);
310 }
311 else if (cdr(r) && (streq("+",get_c_string(car(cdr(r))))))
312 {
313 es = cons(cons(rintern("+"),s),es);
314 r=cdr(r);
315 }
316 else
317 es = cons(cons(rintern("and"),s),es);
318 }
319 return reverse(es);
320}
321
322static LISP expand_set(LISP p, LISP fp, LISP sets)
323{
324 // expand p with respect to sets and feasible pairs
325 LISP set = siod_assoc_str(get_c_string(p),sets);
326 LISP s,f;
327 LISP r=NIL;
328
329 if (set == NIL)
330 set = cons(p,NIL);
331
332 for (s=set; s != NIL; s=cdr(s))
333 {
334 for (f=fp; f != NIL; f=cdr(f))
335 {
336 EST_String ss = get_c_string(car(s));
337 EST_String sf = get_c_string(car(f));
338
339 if (sf.contains(ss+"/"))
340 r = cons(car(f),r);
341 }
342 }
343
344 return reverse(r);
345}
346
347static LISP find_notMAP(LISP MAP,LISP fp)
348{
349 // Returns REGEX that matches everything except MAP, this doesn't
350 // try all possible epsilons though
351 LISP r,notrp=NIL,m,np;
352 EST_String s,l,p,sr,lr,rr;
353
354 for (m=MAP; m != NIL; m=cdr(m))
355 {
356 p = get_c_string(car(m));
357 if (p.contains("/"))
358 {
359 s = p.before("/");
360 l = p.after("/");
361 }
362 else
363 {
364 s = p;
365 l = p;
366 }
367
368 for (np=NIL,r=fp; r != NIL; r = cdr(r))
369 {
370 rr = get_c_string(car(r));
371 if (rr.contains("/"))
372 {
373 sr = rr.before("/");
374 lr = rr.after("/");
375 }
376 else
377 {
378 sr = rr;
379 lr = rr;
380 }
381 if ((s == sr) && (l != lr))
382 np = cons(car(r),np);
383 }
384 notrp = cons(cons(rintern("or"),np),notrp);
385 }
386
387 return reverse(notrp);
388}
389
EST_String before(int pos, int len=0) const
Part before position.
Definition EST_String.h:286
int contains(const char *s, int pos=-1) const
Does it contain this substring?
Definition EST_String.h:375
EST_String after(int pos, int len=1) const
Part after pos+len.
Definition EST_String.h:318
void uunion(EST_TList< EST_WFST > &wl)
EST_write_status save(const EST_String &filename, const EST_String type="ascii")
?
Definition EST_WFST.cc:349
void intersection(EST_TList< EST_WFST > &wl)
Definition wfst_ops.cc:356