45#include "EST_String.h"
47#include "EST_simplestats.h"
56const int est_64to32(
void *c)
68 for (i=0,d=0,
x=1; i<24; i++)
79#define tprob_int(X) (est_64to32(X))
82EST_DiscreteProbDistribution::EST_DiscreteProbDistribution(
const EST_Discrete *d,
85 type = tprob_discrete;
100 num_samples = b.num_samples;
101 discrete = b.discrete;
122 type = tprob_discrete;
127 for (i=0; i<icounts.
length(); i++)
137 type = tprob_discrete;
141 for (i=0; i<icounts.
length(); i++)
147 icounts[tprob_int(i)] += count;
148 num_samples += count;
154 num_samples += count;
161 if (type == tprob_discrete)
163 int idx = discrete->index(s);
164 icounts[
idx] += count;
168 for (p=scounts.
list.head(); p != 0; p=p->next())
170 if (scounts.
list(p).k == s)
172 scounts.
list(p).v += count;
179 num_samples += count;
187 if (type == tprob_discrete)
190 for (i=0; i < icounts.
length(); i++)
205 *prob = probability(
pt);
206 return discrete->name(
pt);
212 for (p=scounts.
list.head(); p != 0; p=p->next())
213 if (scounts.
list(p).v > max)
216 max = scounts.
list(p).v;
227 *prob = (
double)max/num_samples;
228 return scounts.
list(t).k;
233double EST_DiscreteProbDistribution::probability(
const EST_String &s)
const
235 if (frequency(s) == 0.0)
238 return (
double)frequency(s)/num_samples;
241double EST_DiscreteProbDistribution::probability(
const int i)
const
243 if (frequency(i) == 0.0)
246 return (
double)frequency(i)/num_samples;
249double EST_DiscreteProbDistribution::frequency(
const EST_String &s)
const
251 if (type == tprob_discrete)
252 return icounts.
a_no_check(discrete->index(s));
257double EST_DiscreteProbDistribution::frequency(
const int i)
const
259 if (type == tprob_discrete)
263 cerr <<
"ProbDistribution: can't access string type pd with int\n";
270 if (type == tprob_discrete)
272 num_samples -= icounts.
a_no_check(discrete->index(s));
278 num_samples -= scounts.
val_def(s,0);
286 if (type == tprob_discrete)
288 num_samples -= icounts[i];
294 cerr <<
"ProbDistribution: can't access string type pd with int\n";
301 if (type == tprob_discrete)
303 num_samples -= icounts[tprob_int(i)];
305 icounts[tprob_int(i)] = c;
309 cerr <<
"ProbDistribution: can't access string type pd with int\n";
317 if (type == tprob_discrete)
325 if (type == tprob_discrete)
328 cerr <<
"ProbDistribution: can't access string type pd with int\n";
333 if (type == tprob_discrete)
334 icounts[tprob_int(i)] = c;
336 cerr <<
"ProbDistribution: can't access string type pd with int\n";
346 if (type == tprob_discrete)
348 for (i=0; i < icounts.
length(); i++)
350 double prob = icounts.
a_no_check(i)/num_samples;
352 e += prob *
log(prob);
357 for (p=scounts.
list.head(); p != 0; p=p->next())
359 double prob = scounts.
list(p).v/num_samples;
361 e += prob *
log(prob);
372 if (type == tprob_discrete)
375 return scounts.
list.head();
380 if (type == tprob_discrete)
381 return (tprob_int(
idx) >= icounts.
length());
388 if (type == tprob_discrete)
396 if (type == tprob_discrete)
397 return discrete->name(tprob_int(
idx));
404 if (type == tprob_discrete)
406 s = discrete->name(tprob_int(
idx));
407 freq = icounts(tprob_int(
idx));
418 if (type == tprob_discrete)
420 prob = probability(tprob_int(
idx));
421 s = discrete->name(tprob_int(
idx));
442 s <<
"(" << name <<
"=" << prob <<
") ";
446 << pd.
samples() <<
" sum=" << sum <<
")";
EST_Litem * item_next(EST_Litem *idx) const
Used for iterating through members of the distribution.
void item_freq(EST_Litem *idx, EST_String &s, double &freq) const
During iteration returns name and frequency given index
EST_Litem * item_start() const
Used for iterating through members of the distribution.
void item_prob(EST_Litem *idx, EST_String &s, double &prob) const
During iteration returns name and probability given index.
const EST_String & most_probable(double *prob=NULL) const
Return the most probable member of the distribution.
const EST_String & item_name(EST_Litem *idx) const
During iteration returns name given index.
double samples(void) const
Total number of example found.
void clear(void)
Reset, clearing all counts and vocabulary.
void override_frequency(const EST_String &s, double c)
Sets the frequency of named item, without modifying {\tt num_samples}.
double entropy(void) const
void copy(const EST_DiscreteProbDistribution &b)
Copy all data from another DPD to this.
void cumulate(const EST_String &s, double count=1)
Add this observation, may specify number of occurrences.
void set_frequency(const EST_String &s, double c)
int item_end(EST_Litem *idx) const
Used for iterating through members of the distribution.
const int length(void) const
The number of members in the discrete.
static const EST_String Empty
Constant empty string.
int add_item(const K &rkey, const V &rval, int no_search=0)
add key-val pair to list
EST_TList< EST_TKVI< K, V > > list
Linked list of key-val pairs. Don't use this as it will be made private in the future.
const V & val_def(const K &rkey, const V &def) const
value or default
void resize(int n, int set=1)
resize vector
INLINE int length() const
number of items in vector.
INLINE const T & a_no_check(int n) const
read-only const access operator: without bounds checking