19#include "GenotypeLists.h"
20#include "MemoryInfo.h"
27bool Pedigree::sexAsCovariate =
false;
28String Pedigree::missing(
"-99.999");
30Pedigree::Pedigree() : pd()
32 haveTwins = count = 0;
34 persons =
new Person *[size];
36 families =
new Family * [1];
43 for (
int i = 0; i < count; i++)
46 for (
int i = 0; i < familyCount; i++)
58 QuickSort(persons, count,
sizeof(
Person *),
59 COMPAREFUNC Pedigree::ComparePersons);
67 for (
int i = 1; i < count; i++)
68 if (ComparePersons((
const Person **) &persons[i-1],
69 (
const Person **) &persons[i]) == 0)
71 printf(
"Family %s: Person %s is duplicated\n",
72 (
const char *) persons[i]->famid,
73 (
const char *) persons[i]->pid);
81 ComparePersons((
const Person **) &persons[i-1],
82 (
const Person **) &persons[i]) == 0);
86 for (
int i = 0; i < count; i++)
88 persons[i]->serial = i;
89 persons[i]->father = FindPerson(persons[i]->famid, persons[i]->fatid);
90 persons[i]->mother = FindPerson(persons[i]->famid, persons[i]->motid);
92 problem |= !persons[i]->CheckParents();
94 persons[i]->AssessStatus();
97 haveTwins |= persons[i]->zygosity;
101 error(
"Please correct problems with pedigree structure\n");
107void Pedigree::MakeSibships()
110 for (
int i = 0; i < count; i++)
111 sibs[i] = persons[i];
113 QuickSort(sibs, count,
sizeof(
Person *),
114 COMPAREFUNC Pedigree::CompareParents);
116 for (
int first = 0; first < count; first++)
117 if (!sibs[first]->isFounder())
119 int last = first + 1;
121 if (sibs[first]-> mother != sibs[last]->mother ||
122 sibs[first]-> father != sibs[last]->father)
127 for (
int j = first; j <= last; j++)
129 if (sibs[j]->sibCount)
delete [] sibs[j]->sibs;
130 sibs[j]->sibCount = last - first + 1;
131 sibs[j]->sibs =
new Person * [sibs[j]->sibCount];
132 for (
int k = first; k <= last; k++)
133 sibs[j]->sibs[k - first] = sibs[k];
140void Pedigree::MakeFamilies()
142 for (
int i = 0; i < familyCount; i++)
147 families =
new Family * [count];
149 for (
int first=0; first < count; first++)
153 if (SlowCompare(persons[first]->famid, persons[last]->famid) == 0)
157 families[familyCount] =
new Family(*
this, first, --last, familyCount);
174 int result = SlowCompare(key->famid, (**p).famid);
179 return SlowCompare(key->pid, (**p).pid);
184 return SlowCompare(key->famid, (**f).famid);
187Person * Pedigree::FindPerson(
const char * famid,
const char * pid)
194 (&key, persons, count,
sizeof(
Person *),
195 COMPAREFUNC CompareKeyToPerson);
197 return (result == NULL) ? (
Person *) NULL : *result;
200Person * Pedigree::FindPerson(
const char *famid,
const char *pid,
int universe)
207 (&key, persons, universe,
sizeof(
Person *),
208 COMPAREFUNC CompareKeyToPerson);
210 return (result == NULL) ? (
Person *) NULL : *result;
213Family * Pedigree::FindFamily(
const char * famid)
219 (&key, families, familyCount,
sizeof(
Family *),
220 COMPAREFUNC CompareKeyToFamily);
222 return (result == NULL) ? (
Family *) NULL : *result;
225int Pedigree::CountAlleles(
int marker)
227 return ::CountAlleles(*
this, marker);
230void Pedigree::LumpAlleles(
double min,
bool reorder)
233 printf(
"Lumping alleles with frequencies of %.2f or less...\n\n", min);
235 for (
int m=0; m < markerCount; m++)
236 ::LumpAlleles(*
this, m, min, reorder);
239void Pedigree::EstimateFrequencies(
int estimator,
bool quiet)
241 bool estimated =
false;
244 const char * estimators[] =
245 {
"using all genotypes",
"using founder genotypes",
"assumed equal" };
247 bool condensed = markerCount > 100;
248 int grain = markerCount / 50, estimates = 0;
250 for (
int m=0; m < markerCount; m++)
251 if (::EstimateFrequencies(*
this, m, estimator))
255 printf(
"Estimating allele frequencies... [%s]\n ",
256 estimators[estimator]), estimated =
true;
260 if (estimates++ % grain == 0)
268 if (line + markerNames[m].Length() + 1 > 79)
269 printf(
"\n "), line = 3;
271 printf(
"%s ", (
const char *) markerNames[m]);
272 line += markerNames[m].Length() + 1;
276 printf(condensed ?
"\nDone estimating frequencies for %d markers\n\n" :
"\n\n", estimates);
279int Pedigree::ComparePersons(
const Person ** p1,
const Person ** p2)
281 int result = SlowCompare((**p1).famid, (**p2).famid);
283 if (result != 0)
return result;
285 return SlowCompare((**p1).pid, (**p2).pid);
288int Pedigree::CompareParents(
const Person ** p1,
const Person ** p2)
290 int result = SlowCompare((**p1).famid, (**p2).famid);
292 if (result)
return result;
294 result = SlowCompare((**p1).fatid, (**p2).fatid);
296 if (result)
return result;
298 return SlowCompare((**p1).motid, (**p2).motid);
306 if (temp == NULL) error(
"Out of memory");
308 for (
int i=0; i<count; i++)
309 temp[i] = persons[i];
315void Pedigree::Add(
Person & rhs)
320 persons[count] =
new Person();
321 persons[count++]->Copy(rhs);
324void Pedigree::WriteDataFile(FILE * output)
330 fprintf(output,
" Z Zygosity\n");
332 for (
int m = 0; m < markerCount; m++)
333 fprintf(output,
" M %s\n", (
const char *) markerNames[m]);
335 for (
int t = 0; t < traitCount; t++)
336 fprintf(output,
" T %s\n", (
const char *) traitNames[t]);
338 for (
int a = 0; a < affectionCount; a++)
339 fprintf(output,
" A %s\n", (
const char *) affectionNames[a]);
341 for (
int c = 0; c < covariateCount; c++)
342 fprintf(output,
" C %s\n", (
const char *) covariateNames[c]);
344 for (
int s = 0; s < stringCount; s++)
345 fprintf(output,
" $ %s\n", (
const char *) stringNames[s]);
347 fprintf(output,
" E END-OF-DATA \n");
350void Pedigree::WritePedigreeFile(FILE * output)
354 for (
int i = 0; i < markerCount; i++)
355 info[i] = GetMarkerInfo(i);
357 for (
int i = 0; i < count; i++)
358 WriteRecodedPerson(output, i, info);
359 fprintf(output,
"end\n");
364void Pedigree::WritePerson(FILE * output,
int person,
const char * famid,
365 const char * pid,
const char * fatid,
const char * motid)
367 WriteRecodedPerson(output, person, NULL, famid, pid, fatid, motid);
370void Pedigree::WriteRecodedPerson(
371 FILE * output,
int person,
MarkerInfo ** markerInfo,
372 const char * famid,
const char * pid,
const char * fatid,
375 Person * p = persons[person];
377 if (famid == NULL) famid = p->famid;
378 if (pid == NULL) pid = p->pid;
379 if (fatid == NULL) fatid = p->fatid;
380 if (motid == NULL) motid = p->motid;
385 fprintf(output,
"%s\t%s\t%s\t%s\t%d\t",
386 famid, pid, fatid, motid, p->sex);
388 const char * twinCodes[] = {
"0",
"MZ",
"DZ"};
392 if (p->zygosity <= 2)
393 fprintf(output,
"%s\t", twinCodes[p->zygosity]);
395 fprintf(output,
"%d\t", p->zygosity);
398 for (
int m = 0; m < markerCount; m++)
399 if (markerInfo == NULL)
400 fprintf(output, markerCount < 20 ?
"%3d/%3d\t" :
"%d/%d\t",
401 p->markers[m][0], p->markers[m][1]);
403 fprintf(output, markerCount < 20 ?
"%3s/%3s\t" :
"%s/%s\t",
404 (const char *) markerInfo[m]->GetAlleleLabel(p->markers[m][0]),
405 (const char *) markerInfo[m]->GetAlleleLabel(p->markers[m][1]));
407 for (
int t = 0; t < traitCount; t++)
408 if (p->isPhenotyped(t))
409 fprintf(output,
"%.3f\t", p->traits[t]);
411 fprintf(output,
"x\t");
413 for (
int a = 0; a < affectionCount; a++)
414 if (p->isDiagnosed(a))
415 fprintf(output,
"%d\t", p->affections[a]);
417 fprintf(output,
"x\t");
419 for (
int c = 0; c < covariateCount; c++)
420 if (p->isControlled(c))
421 fprintf(output,
"%.3f\t", p->covariates[c]);
423 fprintf(output,
"x\t");
425 for (
int s = 0; s < stringCount; s++)
426 if (!p->strings[s].IsEmpty())
427 fprintf(output,
"%s\t", (
const char *) p->strings[s]);
429 fprintf(output,
".\t");
431 fprintf(output,
"\n");
434void Pedigree::WriteDataFile(
const char * output)
436 FILE * f = fopen(output,
"wt");
437 if (f == NULL) error(
"Couldn't open data file %s", output);
442void Pedigree::WritePedigreeFile(
const char * output)
444 FILE * f = fopen(output,
"wt");
445 if (f == NULL) error(
"Couldn't open pedigree file %s", output);
446 WritePedigreeFile(f);
450void Pedigree::PrepareDichotomization()
453 for (
int t = 0; t < traitCount; t++)
455 String new_affection = traitNames[t] +
"*";
456 GetAffectionID(new_affection);
460int Pedigree::Dichotomize(
int t,
double mean)
462 String new_affection = traitNames[t] +
"*";
464 int af = GetAffectionID(new_affection);
470 for (
int i = 0; i < count; i++)
471 if (persons[i]->isPhenotyped(t) &&
472 !persons[i]->isFounder())
474 mean += persons[i]->traits[t];
478 if (!dcount)
return af;
483 printf(
"Dichotomizing %s around mean of %.3f ...\n",
484 (
const char *) traitNames[t], mean);
486 for (
int i = 0; i < count; i++)
487 if (persons[i]->isPhenotyped(t) && !persons[i]->isFounder())
488 persons[i]->affections[af] = persons[i]->traits[t] > mean ? 2 : 1;
490 persons[i]->affections[af] = 0;
497void Pedigree::DichotomizeAll(
double mean)
499 for (
int t = 0; t < traitCount; t++)
500 Dichotomize(t, mean);
503bool Pedigree::InheritanceCheck(
bool abortIfInconsistent)
507 if (haveTwins) fail |= TwinCheck();
510 fail |= SexLinkedCheck();
512 fail |= AutosomalCheck();
514 if (fail && abortIfInconsistent)
515 error(
"Mendelian inheritance errors detected\n");
520bool Pedigree::AutosomalCheck()
523 IntArray haplos, genos, counts, failedFamilies;
528 for (
int m = 0; m < markerCount; m++)
533 int alleleCount = CountAlleles(m);
534 int genoCount = alleleCount * (alleleCount + 1) / 2;
537 haplos.Dimension(alleleCount + 1);
540 genos.Dimension(genoCount + 1);
543 failedFamilies.Dimension(familyCount);
544 failedFamilies.Zero();
546 counts.Dimension(alleleCount + 1);
548 for (
int f = 0; f < familyCount; f++)
549 for (
int i = families[f]->first; i <= families[f]->last; i++)
550 if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i])
553 Alleles fat = persons[i]->father->markers[m];
554 Alleles mot = persons[i]->mother->markers[m];
555 bool fgeno = fat.isKnown();
556 bool mgeno = mot.isKnown();
559 int haplo = 0, homo = 0, diplo = 0;
565 bool too_many_genos =
false;
567 for (
int j = 0; j < persons[i]->sibCount; j++)
568 if (persons[i]->sibs[j]->isGenotyped(m))
570 Alleles geno = persons[i]->sibs[j]->markers[m];
572 int fat1 = fat.hasAllele(geno.one);
573 int fat2 = fat.hasAllele(geno.two);
574 int mot1 = mot.hasAllele(geno.one);
575 int mot2 = mot.hasAllele(geno.two);
577 if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) ||
578 (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2)))
580 printf(
"%s - Fam %s: Child %s [%s/%s] has ",
581 (
const char *) markerNames[m],
582 (
const char *) persons[i]->sibs[j]->famid,
583 (
const char *) persons[i]->sibs[j]->pid,
584 (
const char *) info->GetAlleleLabel(geno.one),
585 (
const char *) info->GetAlleleLabel(geno.two));
587 if (!fgeno || !mgeno)
588 printf(
"%s [%s/%s]\n",
589 fgeno ?
"father" :
"mother",
590 (const char *) info->GetAlleleLabel(fgeno ? fat.one : mot.one),
591 (const char *) info->GetAlleleLabel(fgeno ? fat.two : mot.two));
593 printf(
"parents [%s/%s]*[%s/%s]\n",
594 (
const char *) info->GetAlleleLabel(fat.one),
595 (
const char *) info->GetAlleleLabel(fat.two),
596 (
const char *) info->GetAlleleLabel(mot.one),
597 (
const char *) info->GetAlleleLabel(mot.two));
600 failedFamilies[f] =
true;
604 if (haplos[geno.one] != i)
607 haplos[geno.one] = i;
609 if (haplos[geno.two] != i)
612 haplos[geno.two] = i;
615 int index = geno.SequenceCoded();
617 if (genos[index] != i)
622 if (geno.isHomozygous())
626 if (counts[geno.one] > 2) too_many_genos =
true;
627 if (counts[geno.two] > 2) too_many_genos =
true;
634 if (haplos[fat.one] != i)
639 if (haplos[fat.two] != i)
644 homo += fat.isHomozygous();
649 if (haplos[mot.one] != i)
654 if (haplos[mot.two] != i)
659 homo += mot.isHomozygous();
662 if (diplo > 4 || haplo + homo > 4 || (haplo == 4 && too_many_genos))
664 printf(
"%s - Fam %s: ",
665 (
const char *) markerNames[m],
666 (
const char *) persons[i]->famid);
667 if (persons[i]->father->markers[m].isKnown())
668 printf(
"Father %s [%s/%s] has children [",
669 (
const char *) persons[i]->father->pid,
670 (
const char *) info->GetAlleleLabel(fat.one),
671 (
const char *) info->GetAlleleLabel(fat.two));
672 else if (persons[i]->mother->markers[m].isKnown())
673 printf(
"Mother %s [%s/%s] has children [",
674 (
const char *) persons[i]->mother->pid,
675 (
const char *) info->GetAlleleLabel(mot.one),
676 (
const char *) info->GetAlleleLabel(mot.two));
678 printf(
"Couple %s * %s has children [",
679 (
const char *) persons[i]->mother->pid,
680 (
const char *) persons[i]->father->pid);
682 for (
int j = 0; j < persons[i]->sibCount; j++)
683 printf(
"%s%s/%s", j == 0 ?
"" :
" ",
684 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one),
685 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two));
689 failedFamilies[f] =
true;
693 for (
int f = 0; f < familyCount; f++)
694 if (!failedFamilies[f] &&
695 (families[f]->count > families[f]->founders + 1) &&
696 !families[f]->isNuclear())
697 fail |= !GenotypeList::EliminateGenotypes(*
this, families[f], m);
701 printf(
"\nMendelian inheritance errors detected\n");
706bool Pedigree::SexLinkedCheck()
712 IntArray failedFamilies(familyCount);
715 for (
int m = 0; m < markerCount; m++)
719 failedFamilies.Zero();
722 for (
int f = 0; f < familyCount; f++)
723 for (
int i = families[f]->first; i <= families[f]->last; i++)
724 if (persons[i]->sex == SEX_MALE && persons[i]->markers[m].isKnown() &&
725 !persons[i]->markers[m].isHomozygous())
727 printf(
"%s - Fam %s: Male %s has two X alleles [%s/%s]\n",
728 (
const char *) markerNames[m],
729 (
const char *) persons[i]->famid, (
const char *) persons[i]->pid,
730 (
const char *) info->GetAlleleLabel(persons[i]->markers[m].one),
731 (
const char *) info->GetAlleleLabel(persons[i]->markers[m].two));
734 persons[i]->markers[m][0] = persons[i]->markers[m][1] = 0;
737 failedFamilies[f] =
true;
742 for (
int f = 0; f < familyCount; f++)
743 for (
int i = families[f]->first; i <= families[f]->last; i++)
744 if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i])
747 Alleles fat = persons[i]->father->markers[m];
748 Alleles mot = persons[i]->mother->markers[m];
750 bool fgeno = fat.isKnown();
751 bool mgeno = mot.isKnown();
757 bool mother_ok =
true;
761 for (
int j = 0; j < persons[i]->sibCount; j++)
762 if (persons[i]->sibs[j]->isGenotyped(m))
764 Alleles geno = persons[i]->sibs[j]->markers[m];
766 bool fat1 = fat.hasAllele(geno.one);
767 bool fat2 = fat.hasAllele(geno.two);
768 bool mot1 = mot.hasAllele(geno.one);
769 bool mot2 = mot.hasAllele(geno.two);
771 int sex = persons[i]->sibs[j]->sex;
777 printf(
"%s - Fam %s: Child %s [%s/Y] has mother [%s/%s]\n",
778 (
const char *) markerNames[m],
779 (
const char *) persons[i]->famid,
780 (
const char *) persons[i]->sibs[j]->pid,
781 (
const char *) info->GetAlleleLabel(geno.one),
782 (
const char *) info->GetAlleleLabel(mot.one),
783 (
const char *) info->GetAlleleLabel(mot.two));
785 failedFamilies[f] =
true;
788 mother_ok &= inferred_mother.AddAllele(geno.one);
790 if (sex == SEX_FEMALE)
792 if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) ||
793 (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2)))
795 printf(
"%s - Fam %s: Child %s [%s/%s] has ",
796 (
const char *) markerNames[m],
797 (
const char *) persons[i]->famid,
798 (
const char *) persons[i]->sibs[j]->pid,
799 (
const char *) info->GetAlleleLabel(geno.one),
800 (
const char *) info->GetAlleleLabel(geno.two));
803 printf(
"mother [%s/%s]\n",
804 (
const char *) info->GetAlleleLabel(mot.one),
805 (
const char *) info->GetAlleleLabel(mot.two));
807 printf(
"father [%s/Y]\n",
808 (
const char *) info->GetAlleleLabel(fat.one));
810 printf(
"parents [%s/Y]*[%s/%s]\n",
811 (
const char *) info->GetAlleleLabel(fat.one),
812 (
const char *) info->GetAlleleLabel(mot.one),
813 (
const char *) info->GetAlleleLabel(mot.two));
816 failedFamilies[f] =
true;
821 inferred_father = first_sister = geno;
822 else if (first_sister != geno)
824 inferred_father.Intersect(geno);
826 mother_ok &= inferred_mother.AddAllele(
827 geno.otherAllele(inferred_father.one));
828 mother_ok &= inferred_mother.AddAllele(
829 first_sister.otherAllele(inferred_father.one));
832 if (!fgeno && (mot1 ^ mot2))
833 inferred_father.Intersect(mot1 ? geno.two : geno.one);
835 if (!mgeno && (fat1 ^ fat2))
836 mother_ok &= inferred_mother.AddAllele(fat1 ? geno.two : geno.one);
841 if (!mother_ok || (sisters && !inferred_father.isKnown()))
843 printf(
"%s - Fam %s: ",
844 (
const char *) markerNames[m],
845 (
const char *) persons[i]->famid);
847 printf(
"Father %s [%s/Y] has children [",
848 (
const char *) persons[i]->father->pid,
849 (
const char *) info->GetAlleleLabel(fat.one));
851 printf(
"Mother %s [%s/%s] has children [",
852 (
const char *) persons[i]->mother->pid,
853 (
const char *) info->GetAlleleLabel(mot.one),
854 (
const char *) info->GetAlleleLabel(mot.two));
856 printf(
"Couple %s * %s has children [",
857 (
const char *) persons[i]->mother->pid,
858 (
const char *) persons[i]->father->pid);
860 for (
int j = 0; j < persons[i]->sibCount; j++)
862 persons[i]->sibs[j]->sex == SEX_MALE ?
"%s%s/Y" :
"%s%s/%s",
864 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one),
865 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two));
868 failedFamilies[f] =
true;
872 for (
int f = 0; f < familyCount; f++)
873 if (!failedFamilies[f] &&
874 (families[f]->count > families[f]->founders + 1) &&
875 !families[f]->isNuclear())
876 fail |= !GenotypeList::EliminateGenotypes(*
this, families[f], m);
880 printf(
"\nMendelian inheritance errors detected\n");
885void Pedigree::ExtractFamily(
int id,
Pedigree & single_fam_ped)
887 for (
int i = families[
id]->first; i <= families[id]->last; i++)
888 single_fam_ped.Add(*persons[i]);
890 single_fam_ped.Sort();
893void Pedigree::ExtractOnAffection(
int a,
Pedigree & new_ped,
int target_status)
895 for (
int i = 0; i < count; i++)
896 if (persons[i]->affections[a] == target_status)
897 new_ped.Add(*persons[i]);
901 blank_person.CopyIDs(*persons[i]);
902 new_ped.Add(blank_person);
908void Pedigree::Filter(
IntArray & filter)
910 if (filter.Length() != count)
911 error(
"Pedigree:Size of pedigree filter doesn't match number of persons in pedigree");
913 for (
int i = 0; i < count; i++)
916 persons[i]->WipePhenotypes();
917 persons[i]->filter =
true;
921void Pedigree::AddPerson(
const char * famid,
const char * pid,
922 const char * fatid,
const char * motid,
923 int sex,
bool delay_sort)
925 if (count == size) Grow();
927 persons[count] =
new Person;
929 persons[count]->famid = famid;
930 persons[count]->pid = pid;
931 persons[count]->fatid = fatid;
932 persons[count]->motid = motid;
933 persons[count]->sex = sex;
937 if (!delay_sort) Sort();
940void Pedigree::ShowMemoryInfo()
942 unsigned int bytes = 0;
944 for (
int i = 0; i < count; i++)
945 bytes += persons[i]->famid.BufferSize() + persons[i]->pid.BufferSize() +
946 persons[i]->fatid.BufferSize() + persons[i]->motid.BufferSize();
948 bytes += count * (markerCount *
sizeof(
Alleles) + traitCount *
sizeof(
double) +
949 covariateCount *
sizeof(double) + affectionCount *
sizeof(
char) +
952 printf(
" %40s %s\n",
"Pedigree file ...", (
const char *) MemoryInfo(bytes));