Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
srpd1.3.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1997 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Paul Bagshaw */
34/* Date : 1993 */
35/*************************************************************************/
36/* */
37/* The above copyright was given by Paul Bagshaw, he retains */
38/* his original rights */
39/* */
40/*************************************************************************/
41 /****************************************************************************
42 * *
43 * Pitch Determination Algorithm. *
44 * *
45 * Super Resolution Pitch Determinator with No Headers (SRPD_HD). *
46 * *
47 * Analysis synchronised with cepstral analysis, pitch biasing option, and *
48 * optimised for minimum gross pitch errors and accurate voiced/unvoiced *
49 * classification. All known bugs resolved! *
50 * *
51 * 4th February 1992: *
52 * Additional option [-w] added to give an artificial frame length, thus *
53 * allowing the output data to be synchronised with other signal processing *
54 * algorithms such as cepstral analysis and formant tracking. *
55 * *
56 * Y. Medan, E. Yair, and D. Chazan, "Super resolution pitch determination *
57 * of speech signals," IEEE Trans. Signal Processing Vol.39 No.1 *
58 * pp.40-48 (1991). *
59 * *
60 * Implementation by Paul Bagshaw, Centre for Speech Technology Research, *
61 * University of Edinburgh, 80 South Bridge, Edinburgh EH1 1HN. *
62 * *
63 *****************************************************************************/
64
65/************************
66 * include header files *
67 ************************/
68
69#include <cmath>
70#include <cstdlib>
71#include <iostream>
72#include "srpd.h"
73#include "EST_cutils.h"
74#include "EST_Wave.h"
75
76#ifndef MAXSHORT
77#define MAXSHORT 32767
78#endif
79
80void super_resolution_pda (struct Srpd_Op *paras, SEGMENT_ seg,
82{
83
84 static int zx_lft_N, zx_rht_N;
85 static double prev_pf = BREAK_NUMBER;
86
87 int n, j, k, N0 = 0, N1, N2, N_, q, lower_found = 0, score = 1, apply_bias;
89 int zx_rate = 0, zx_at_N0 = 0, prev_sign;
90 int seg1_zxs = 0, seg2_zxs = 0, total_zxs;
91 short prev_seg1, prev_seg2;
92 short x_max = -MAXSHORT, x_min = MAXSHORT;
93 short y_max = -MAXSHORT, y_min = MAXSHORT;
94 double xx = 0.0, yy = 0.0, zz = 0.0, xy = 0.0, yz = 0.0, xz = 0.0;
95 double max_cc = 0.0, coefficient, coeff_weight;
96 double xx_N, yy_N, xy_N, y1y1_N, xy1_N, yy1_N, beta;
97 LIST_ *sig_pks_hd, *sig_pks_tl, *sig_peak, *head, *tail;
98
99 sig_pks_hd = head = NULL;
100 sig_pks_tl = tail = NULL;
101 /* set correlation coefficient threshold */
102 if (p_status->v_uv == UNVOICED || p_status->v_uv == SILENT)
103 p_status->threshold = paras->Thigh;
104 else /* p_status->v_uv == VOICED */
105 p_status->threshold = (paras->Tmin > paras->Tmax_ratio *
106 p_status->cc_max) ? paras->Tmin : paras->Tmax_ratio *
107 p_status->cc_max;
108 /* determine if a bias should be applied */
109 if (paras->peak_tracking && prev_pf != BREAK_NUMBER &&
110 p_status->v_uv == VOICED && p_status->s_h != HOLD &&
111 p_status->pitch_freq < 1.75 * prev_pf &&
112 p_status->pitch_freq > 0.625 * prev_pf)
113 apply_bias = 1;
114 else
115 apply_bias = 0;
116 /* consider first two segments of period n = Nmin */
117 prev_seg1 = seg.data[paras->Nmax - paras->Nmin] < 0 ? -1 : 1;
118 prev_seg2 = seg.data[paras->Nmax] < 0 ? -1 : 1;
119 for (j = 0; j < paras->Nmin; j += paras->L) {
120 /* find max and min amplitudes in x and y segments */
121 x_index = paras->Nmax - paras->Nmin + j;
122 y_index = paras->Nmax + j;
123 if (seg.data[x_index] > x_max) x_max = seg.data[x_index];
124 if (seg.data[x_index] < x_min) x_min = seg.data[x_index];
125 if (seg.data[y_index] > y_max) y_max = seg.data[y_index];
126 if (seg.data[y_index] < y_min) y_min = seg.data[y_index];
127 /* does new sample in x or y segment represent an input zero-crossing */
128 if (seg.data[x_index] * prev_seg1 < 0) {
129 prev_seg1 *= -1;
130 seg1_zxs++;
131 }
132 if (seg.data[y_index] * prev_seg2 < 0) {
133 prev_seg2 *= -1;
134 seg2_zxs++;
135 }
136 /* calculate parts for first correlation coefficient */
137 xx += (double) seg.data[x_index] * seg.data[x_index];
138 yy += (double) seg.data[y_index] * seg.data[y_index];
139 xy += (double) seg.data[x_index] * seg.data[y_index];
140 }
141 /* low amplitude segment represents silence */
142 if (abs (x_max) + abs (x_min) < 2 * paras->Tsilent ||
143 abs (y_max) + abs (y_min) < 2 * paras->Tsilent) {
144 for (q = 0; q < p_cc->size; p_cc->coeff[q++] = 0.0);
145 prev_pf = p_status->pitch_freq;
146 p_status->pitch_freq = BREAK_NUMBER;
147 p_status->v_uv = SILENT;
148 p_status->s_h = SEND;
149 p_status->cc_max = 0.0;
150 return;
151 }
152 /* determine first correlation coefficients, for period n = Nmin */
153 p_cc->coeff[0] = p_status->cc_max = xy / sqrt (xx) / sqrt (yy);
154 for (q = 1; q < p_cc->size && q < paras->L; p_cc->coeff[q++] = 0.0);
156 prev_sign = p_cc->coeff[0] < 0.0 ? -1 : 1;
157 prev_seg1 = seg.data[paras->Nmax - paras->Nmin] < 0 ? -1 : 1;
158 /* iteratively determine correlation coefficient for next possible period */
159 for (n = paras->Nmin + paras->L; n <= paras->Nmax; n += paras->L,
160 j += paras->L) {
161 x_index = paras->Nmax - n;
162 y_index = paras->Nmax + j;
163 /* does new samples in x or y segment represent an input zero-crossing */
164 if (seg.data[x_index] * prev_seg1 < 0) {
165 prev_seg1 *= -1;
166 total_zxs++;
167 }
168 if (seg.data[y_index] * prev_seg2 < 0) {
169 prev_seg2 *= -1;
170 total_zxs++;
171 }
172 /* determine next coefficient */
173 xx += (double) seg.data[x_index] * seg.data[x_index];
174 yy += (double) seg.data[y_index] * seg.data[y_index];
175 for (k = 0, xy = 0.0; k < n; k += paras->L)
176 xy += (double) seg.data[paras->Nmax - n + k] * seg.data[paras->Nmax + k];
177 p_cc->coeff[n - paras->Nmin] = xy / sqrt (xx) / sqrt (yy);
178 if (p_cc->coeff[n - paras->Nmin] > p_status->cc_max)
179 p_status->cc_max = p_cc->coeff[n - paras->Nmin];
180 /* set unknown coefficients to zero */
181 for (q = n - paras->Nmin + 1;
182 q < p_cc->size && q < n - paras->Nmin + paras->L;
183 p_cc->coeff[q++] = 0.0);
184 /* is there a slope with positive gradient in the coefficients track yet */
185 if (p_cc->coeff[n - paras->Nmin] > p_cc->coeff[n - paras->Nmin - paras->L])
186 lower_found = 1;
187 /* has new coefficient resulted in a zero-crossing */
188 if (p_cc->coeff[n - paras->Nmin] * prev_sign < 0.0) {
189 prev_sign *= -1;
190 zx_rate++;
191 }
192 /* does the new coefficient represent a pitch period candidate */
193 if (N0 != 0 && zx_rate > zx_at_N0) {
194 add_to_list (&sig_pks_hd, &sig_pks_tl, N0, 1);
195 N0 = 0;
196 max_cc = 0.0;
197 }
198 if (apply_bias && n > zx_lft_N && n < zx_rht_N)
199 coeff_weight = 2.0;
200 else
201 coeff_weight = 1.0;
202 if (p_cc->coeff[n - paras->Nmin] > max_cc && total_zxs > 3 && lower_found) {
203 max_cc = p_cc->coeff[n - paras->Nmin];
204 if (max_cc * coeff_weight >= p_status->threshold) {
206 N0 = n;
207 }
208 }
209 }
210 /* unvoiced if no significant peak found in coefficients track */
211 if (sig_pks_hd == NULL) {
212 prev_pf = p_status->pitch_freq;
213 p_status->pitch_freq = BREAK_NUMBER;
214 p_status->v_uv = UNVOICED;
215 p_status->s_h = SEND;
216 return;
217 }
218 /* find which significant peak in list corresponds to true pitch period */
220 while (sig_peak != NULL) {
221 yy = zz = yz = 0.0;
222 for (j = 0; j < sig_peak->N0; j++) {
223 y_index = paras->Nmax + j;
224 z_index = paras->Nmax + sig_peak->N0 + j;
225 yy += (double) seg.data[y_index] * seg.data[y_index];
226 zz += (double) seg.data[z_index] * seg.data[z_index];
227 yz += (double) seg.data[y_index] * seg.data[z_index];
228 }
229 if (yy == 0.0 || zz == 0.0)
230 coefficient = 0.0;
231 else
232 coefficient = yz / sqrt (yy) / sqrt (zz);
233 if (apply_bias && sig_peak->N0 > zx_lft_N && sig_peak->N0 < zx_rht_N)
234 coeff_weight = 2.0;
235 else
236 coeff_weight = 1.0;
237 if (coefficient * coeff_weight >= p_status->threshold) {
238 sig_peak->score = 2;
239 if (head == NULL) {
240 head = sig_peak;
241 score = 2;
242 }
243 tail = sig_peak;
244 }
245 sig_peak = sig_peak->next_item;
246 }
247 if (head == NULL) head = sig_pks_hd;
248 if (tail == NULL) tail = sig_pks_tl;
249 N0 = head->N0;
250 if (tail != head) {
251 xx = 0.0;
252 for (j = 0; j < tail->N0; j++)
253 xx += (double) seg.data[paras->Nmax - tail->N0 + j] *
254 seg.data[paras->Nmax - tail->N0 + j];
255 sig_peak = head;
256 while (sig_peak != NULL) {
257 if (sig_peak->score == score) {
258 xz = zz = 0.0;
259 for (j = 0; j < tail->N0; j++) {
260 z_index = paras->Nmax + sig_peak->N0 + j;
261 xz += (double) seg.data[paras->Nmax - tail->N0 + j] *
262 seg.data[z_index];
263 zz += (double) seg.data[z_index] * seg.data[z_index];
264 }
265 coefficient = xz / sqrt (xx) / sqrt (zz);
266 if (sig_peak == head)
267 max_cc = coefficient;
268 else if (coefficient * paras->Tdh > max_cc) {
269 N0 = sig_peak->N0;
270 max_cc = coefficient;
271 }
272 }
273 sig_peak = sig_peak->next_item;
274 }
275 }
276 p_status->cc_max = p_cc->coeff[N0 - paras->Nmin];
277 /* voiced segment period now found */
278 if ((tail == head && score == 1 && p_status->v_uv != VOICED) ||
279 p_cc->coeff[N0 - paras->Nmin] < p_status->threshold)
280 p_status->s_h = HOLD;
281 else
282 p_status->s_h = SEND;
283 /* find left and right boundaries of peak in coefficients track */
284 zx_lft_N = zx_rht_N = 0;
285 for (q = N0; q >= paras->Nmin; q -= paras->L)
286 if (p_cc->coeff[q - paras->Nmin] < 0.0) {
287 zx_lft_N = q;
288 break;
289 }
290 for (q = N0; q <= paras->Nmax; q += paras->L)
291 if (p_cc->coeff[q - paras->Nmin] < 0.0) {
292 zx_rht_N = q;
293 break;
294 }
295 /* define small region around peak */
296 if (N0 - paras->L < paras->Nmin) {
297 N1 = N0;
298 N2 = N0 + 2 * paras->L;
299 }
300 else if (N0 + paras->L > paras->Nmax) {
301 N1 = N0 - 2 * paras->L;
302 N2 = N0;
303 }
304 else {
305 N1 = N0 - paras->L;
306 N2 = N0 + paras->L;
307 }
308 /* compensate for decimation factor L */
309 if (paras->L != 1) {
310 xx = yy = xy = 0.0;
311 for (j = 0; j < N1; j++) {
312 x_index = paras->Nmax - N1 + j;
313 y_index = paras->Nmax + j;
314 xx += (double) seg.data[x_index] * seg.data[x_index];
315 xy += (double) seg.data[x_index] * seg.data[y_index];
316 yy += (double) seg.data[y_index] * seg.data[y_index];
317 }
318 p_cc->coeff[N1 - paras->Nmin] = p_status->cc_max =
319 xy / sqrt (xx) / sqrt (yy);
320 N0 = N1;
321 for (n = N1 + 1; n <= N2; n++, j++) {
322 xx += (double) seg.data[paras->Nmax - n] * seg.data[paras->Nmax - n];
323 yy += (double) seg.data[paras->Nmax + j] * seg.data[paras->Nmax + j];
324 for (k = 0, xy = 0.0; k < n; k++)
325 xy += (double) seg.data[paras->Nmax - n + k] * seg.data[paras->Nmax + k];
326 p_cc->coeff[n - paras->Nmin] = xy / sqrt (xx) / sqrt (yy);
327 if (p_cc->coeff[n - paras->Nmin] > p_status->cc_max) {
328 p_status->cc_max = p_cc->coeff[n - paras->Nmin];
329 N0 = n;
330 }
331 }
332 }
333 /* compensate for finite resolution in estimating pitch */
334 if (N0 - 1 < paras->Nmin || N0 == N1) N_ = N0;
335 else if (N0 + 1 > paras->Nmax || N0 == N2) N_ = N0 - 1;
336 else if (p_cc->coeff[N0 - paras->Nmin] - p_cc->coeff[N0 - paras->Nmin - 1] <
337 p_cc->coeff[N0 - paras->Nmin] - p_cc->coeff[N0 - paras->Nmin + 1])
338 N_ = N0 - 1;
339 else
340 N_ = N0;
341 xx_N = yy_N = xy_N = y1y1_N = xy1_N = yy1_N = 0.0;
342 for (j = 0; j < N_; j++) {
343 x_index = paras->Nmax - N_ + j;
344 y_index = paras->Nmax + j;
345 xx_N += (double) seg.data[x_index] * seg.data[x_index];
346 yy_N += (double) seg.data[y_index] * seg.data[y_index];
347 xy_N += (double) seg.data[x_index] * seg.data[y_index];
348 y1y1_N += (double) seg.data[y_index + 1] * seg.data[y_index + 1];
349 xy1_N += (double) seg.data[x_index] * seg.data[y_index + 1];
350 yy1_N += (double) seg.data[y_index] * seg.data[y_index + 1];
351 }
352 beta = (xy1_N * yy_N - xy_N * yy1_N) /
353 (xy1_N * (yy_N - yy1_N) + xy_N * (y1y1_N - yy1_N));
354 if (beta < 0.0) {
355 N_--;
356 beta = 0.0;
357 }
358 else if (beta >= 1.0) {
359 N_++;
360 beta = 0.0;
361 }
362 else
363 p_status->cc_max = ((1.0 - beta) * xy_N + beta * xy1_N) /
364 sqrt (xx_N * ((1.0 - beta) * (1.0 - beta) * yy_N +
365 2.0 * beta * (1.0 - beta) * yy1_N +
366 beta * beta * y1y1_N));
367 prev_pf = p_status->pitch_freq;
368 p_status->pitch_freq = (double) (paras->sample_freq) / (double) (N_ + beta);
369 p_status->v_uv = VOICED;
370 free_list (&sig_pks_hd);
371 return;
372
373}
374
375/************* * LEVEL TWO * ************/
376
377void add_to_list (LIST_ **p_list_hd, LIST_ **p_list_tl, int N_val,
378 int score_val)
379{
380
382
383 new_node = walloc(LIST_ ,1);
385 new_node->N0 = N_val;
386 new_node->score = score_val;
387 new_node->next_item = NULL;
388 if (*p_list_hd == NULL)
390 else
391 last_node->next_item = new_node;
393
394}
395
396/********************
397 * define functions *
398 ********************/
399
400/************* * LEVEL ONE * ************/
401
402void error (error_flags err_type)
403{
404
405 char prog[15]; /* program file name */
406
407 strcpy (prog, "srpd");
408 fprintf (stderr, "%s: ", prog);
409 switch (err_type) {
410 case CANT_WRITE:
411 fprintf (stderr, "cannot write to output file");
412 break;
413 case DECI_FCTR:
414 fprintf (stderr, "decimation factor not set");
415 break;
416 case INSUF_MEM:
417 fprintf (stderr, "insufficient memory available");
418 break;
419 case FILE_ERR:
420 perror ("");
421 break;
422 case FILE_SEEK:
423 fprintf (stderr, "improper fseek () to reposition a stream");
424 break;
425 case LEN_OOR:
426 fprintf (stderr, "artificial frame length set out of range");
427 break;
428 case MAX_FREQ:
429 fprintf (stderr, "maximum pitch frequency value (Hz) not set");
430 break;
431 case MIN_FREQ:
432 fprintf (stderr, "minimum pitch frequency value (Hz) not set");
433 break;
434 case MISUSE:
435 fprintf (stderr, "usage: %s -i lpf_sample_file ", prog);
436 fprintf (stderr, "-o pitch_file [options]\n");
437 fprintf (stderr, "\nOptions {with default values}\n");
438 fprintf (stderr, "-a form pitch_file in ascii format\n");
439 fprintf (stderr, "-l 'lower pitch frequency limit' {%f (Hz)}\n",
440 DEFAULT_MIN_PITCH);
441 fprintf (stderr, "-u 'upper pitch frequency limit' {%f (Hz)}\n",
442 DEFAULT_MAX_PITCH);
443 fprintf (stderr, "-d 'decimation factor' {%d (samples)}\n",
444 DEFAULT_DECIMATION);
445 fprintf (stderr, "-n 'noise floor (abs. amplitude)' {%d}\n",
446 DEFAULT_TSILENT);
447 fprintf (stderr, "-h 'unvoiced to voiced coeff threshold' {%f}\n",
448 DEFAULT_THIGH);
449 fprintf (stderr, "-m 'min. voiced to unvoiced coeff threshold' {%f}\n",
450 DEFAULT_TMIN);
451 fprintf (stderr, "-r 'voiced to unvoiced coeff threshold ratio' {%f}\n",
452 DEFAULT_TMAX_RATIO);
453 fprintf (stderr, "-t 'anti pitch doubling/halving threshold' {%f}\n",
454 DEFAULT_TDH);
455 fprintf (stderr, "-p perform peak tracking\n");
456 fprintf (stderr, "-f 'sampling frequency' {%d (Hz)}\n", DEFAULT_SF);
457 fprintf (stderr, "-s 'frame shift' {%f (ms)}\n", DEFAULT_SHIFT);
458 fprintf (stderr, "-w 'artificial frame length' {%f (ms)}\n",
459 DEFAULT_LENGTH);
460 break;
461 case NOISE_FLOOR:
462 fprintf (stderr, "noise floor set below minimum amplitude");
463 break;
464 case SAMPLE_FREQ:
465 fprintf (stderr, "attempt to set sampling frequency negative");
466 break;
467 case SFT_OOR:
468 fprintf (stderr, "frame shift set out of range");
469 break;
470 case THR_DH:
471 fprintf (stderr, "anti pitch doubling/halving threshold not set");
472 break;
473 case THR_HIGH:
474 fprintf (stderr, "unvoiced to voiced coeff threshold not set");
475 break;
476 case THR_MAX_RTO:
477 fprintf (stderr, "voiced to unvoiced coeff threshold ratio not set");
478 break;
479 case THR_MIN:
480 fprintf (stderr, "minimum voiced to unvoiced coeff threshold not set");
481 break;
482 default:
483 fprintf (stderr, "undefined error, %u occurred", err_type);
484 break;
485 }
486 fprintf (stderr, "\n");
487 exit (-1);
488
489}
490
491void initialise_parameters (struct Srpd_Op *p_par)
492{
493 p_par->L = DEFAULT_DECIMATION;
494 p_par->min_pitch = DEFAULT_MIN_PITCH;
495 p_par->max_pitch = DEFAULT_MAX_PITCH;
496 p_par->shift = DEFAULT_SHIFT;
497 p_par->length = DEFAULT_LENGTH;
498 p_par->Tsilent = DEFAULT_TSILENT;
499 p_par->Tmin = DEFAULT_TMIN;
500 p_par->Tmax_ratio = DEFAULT_TMAX_RATIO;
501 p_par->Thigh = DEFAULT_THIGH;
502 p_par->Tdh = DEFAULT_TDH;
503 p_par->make_ascii = 0;
504 p_par->peak_tracking = 0;
505 p_par->sample_freq = DEFAULT_SF;
506 /* p_par->Nmax and p_par->Nmin cannot be initialised */
507 return;
508
509}
510
511void initialise_structures (struct Srpd_Op *p_par, SEGMENT_ *p_seg,
513{
514 p_par->Nmax = (int) ceil((float)p_par->sample_freq / p_par->min_pitch);
515 p_par->Nmin = (int) floor((float)p_par->sample_freq / p_par->max_pitch);
516 p_par->min_pitch = (float)p_par->sample_freq / (float)p_par->Nmax;
517 p_par->max_pitch = (float)p_par->sample_freq / (float)p_par->Nmin;
518
519 p_seg->size = 3 * p_par->Nmax;
520 p_seg->shift = (int) rint( p_par->shift / 1000.0 * (float)p_par->sample_freq );
521 p_seg->length = (int) rint( p_par->length / 1000.0 * (float)p_par->sample_freq );
522 p_seg->data = walloc(short,p_seg->size);
523
524 p_cc->size = p_par->Nmax - p_par->Nmin + 1;
525 p_cc->coeff = walloc(double,p_cc->size);
526
527 return;
528}
529
530
531void initialise_status (struct Srpd_Op *paras, STATUS_ *p_status)
532{
533
534 p_status->pitch_freq = BREAK_NUMBER;
535 p_status->v_uv = SILENT;
536 p_status->s_h = SEND; /* SENT */
537 p_status->cc_max = 0.0;
538 p_status->threshold = paras->Thigh;
539 return;
540
541}
542
543void end_structure_use(SEGMENT_ *p_seg, CROSS_CORR_ *p_cc)
544{
545
546 wfree (p_seg->data);
547 wfree (p_cc->coeff);
548 return;
549
550}
551
552#define BEGINNING 1
553#define MIDDLE_ 2
554#define END 3
555
556int read_next_segment (FILE *voxfile, struct Srpd_Op *paras, SEGMENT_ *p_seg)
557{
558
559 static int status = BEGINNING, padding= -1, tracklen = 0;
560
561 int samples_read = 0;
562 long init_file_position, offset;
563
564 if (status == BEGINNING) {
565 if (padding == -1) {
566 if (fseek (voxfile, 0L, 2)) error (FILE_SEEK);
567 tracklen = ((ftell (voxfile) / sizeof (short)) - p_seg->length) /
568 p_seg->shift + 1;
569 cout << "track len " << tracklen;
570 rewind (voxfile);
571 if (paras->Nmax < p_seg->length / 2) {
572 offset = (long) (p_seg->length / 2 - paras->Nmax) * sizeof (short);
573 if (fseek (voxfile, offset, 1)) error (FILE_SEEK);
574 padding = 0;
575 }
576 else {
577 if ((paras->Nmax - p_seg->length / 2) % p_seg->shift != 0) {
578 offset = (long) (p_seg->shift - ((paras->Nmax - p_seg->length / 2) %
579 p_seg->shift)) * sizeof (short);
580 if (fseek (voxfile, offset, 1)) error (FILE_SEEK);
581 }
582 padding = (paras->Nmax - p_seg->length / 2) / p_seg->shift +
583 ((paras->Nmax - p_seg->length / 2) % p_seg->shift == 0 ? 0 : 1);
584 }
585 }
586 cout << "padding " << padding << endl;
587 if (padding-- == 0)
588 status = MIDDLE_;
589 else if (tracklen-- <= 0)
590 return (0);
591 else
592 return (2);
593 }
594 cout << "tl " << tracklen << endl;
595 if (status == MIDDLE_) {
596 if (tracklen > 0) {
598 offset = (long) (p_seg->shift * sizeof (short));
599 samples_read = fread ((short *) p_seg->data, sizeof (short),
600 p_seg->size, voxfile);
601 if (samples_read == p_seg->size) {
602 if (fseek (voxfile, init_file_position + offset, 0)) error (FILE_SEEK);
603 tracklen--;
604 return (1);
605 }
606 else {
607 status = END;
608 }
609 }
610 else
611 return (0);
612 }
613 if (status == END) {
614 if (tracklen-- > 0)
615 return (2);
616 else
617 return (0);
618 }
619 return (0);
620
621}
622
623int read_next_wave_segment(EST_Wave &sig, Srpd_Op *paras, SEGMENT_ *p_seg)
624{
625 static int status = BEGINNING, padding = -1, tracklen = 0;
626 int i;
627 long offset;
628 static int wave_pos;
629
630
631 //printf("read: size %d shift %d length %d\n", p_seg->size, p_seg->shift, p_seg->length);
632
633 if (status == BEGINNING)
634 {
635 if (padding == -1)
636 {
637 tracklen = (sig.num_samples() - p_seg->length)
638 / p_seg->shift + 1;
639 if (paras->Nmax < p_seg->length / 2)
640 {
641 offset = p_seg->length / 2 - paras->Nmax;
642 wave_pos = offset;
643 padding = 0;
644 }
645 else
646 {
647 if ((paras->Nmax - p_seg->length / 2) % p_seg->shift != 0) {
648 offset = p_seg->shift - ((paras->Nmax - p_seg->length / 2)%
649 p_seg->shift);
650 wave_pos = offset;
651 }
652 padding = (paras->Nmax - p_seg->length / 2) / p_seg->shift +
653 ((paras->Nmax - p_seg->length / 2)
654 % p_seg->shift == 0 ? 0 : 1);
655 }
656 }
657 if (padding-- == 0)
658 status = MIDDLE_;
659 else if (tracklen-- <= 0) {
660 status = BEGINNING;
661 padding = -1;
662 tracklen = 0;
663 return (0);
664 }
665 else
666 return (2);
667 }
668 if (status == MIDDLE_)
669 {
670 if (tracklen > 0)
671 {
672 offset = p_seg->shift;
673 for (i = 0; (i < p_seg->size) && (i+wave_pos)<sig.num_samples();
674 ++i)
675 p_seg->data[i] = sig.a(i + wave_pos,0);
676 for ( ; i < p_seg->size; ++i)
677 p_seg->data[i] = 0;
678
679 if (wave_pos <= sig.num_samples())
680 {
681 wave_pos += offset;
682 tracklen--;
683 return (1);
684 }
685 else
686 status = END;
687 }
688 else {
689 status = BEGINNING;
690 padding = -1;
691 tracklen = 0;
692 return (0);
693 }
694 }
695 if (status == END)
696 {
697 if (tracklen-- > 0)
698 return (2);
699 else {
700 status = BEGINNING;
701 padding = -1;
702 tracklen = 0;
703 return (0);
704 }
705 }
706 status = BEGINNING;
707 padding = -1;
708 tracklen = 0;
709 return (0);
710}
711
712void write_track(STATUS_ status, struct Srpd_Op paras, FILE *outfile)
713{
714 if (paras.make_ascii)
715 {
716 if (fprintf(outfile,"%7g\n",status.pitch_freq) != 8)
717 error(CANT_WRITE);
718 }
719 else
720 if (!fwrite ((double *) &status.pitch_freq, sizeof (double), 1, outfile))
721 error (CANT_WRITE);
722 return;
723
724}
725
726void free_list (LIST_ **p_list_hd)
727{
728
729 LIST_ *next;
730
731 while (*p_list_hd != NULL) {
732 next = (*p_list_hd)->next_item;
733 wfree (*p_list_hd);
734 *p_list_hd = next;
735 }
736
737}
Definition srpd.h:105
Definition srpd.h:99
Definition srpd.h:86