libStatGen Software 1
Loading...
Searching...
No Matches
SamTags Class Reference

Class for parsing/creating/operating on SAM/BAM record tags. More...

#include <SamTags.h>

Constants for parsing tags.

static const char * BQ_TAG = "BQ"
 
static const char BQ_TAG_TYPE = 'Z'
 
static const char * MD_TAG = "MD"
 
static const char MD_TAG_TYPE = 'Z'
 
static const char * ORIG_POS_TAG = "OP"
 
static const char ORIG_POS_TAG_TYPE = 'i'
 
static const char * ORIG_CIGAR_TAG = "OC"
 
static const char ORIG_CIGAR_TAG_TYPE = 'Z'
 
static const char * ORIG_QUAL_TAG = "OQ"
 
static const char ORIG_QUAL_TAG_TYPE = 'Z'
 
static bool createMDTag (String &outputMDtag, SamRecord &inputRec, GenomeSequence &genome)
 Create the MD tag for the specified input record and the genome.
 
static bool isMDTagCorrect (SamRecord &inputRec, GenomeSequence &genome)
 Check to see if the MD tag in the record is accurate.
 
static bool updateMDTag (SamRecord &inputRec, GenomeSequence &genome)
 

Detailed Description

Class for parsing/creating/operating on SAM/BAM record tags.

Definition at line 26 of file SamTags.h.

Member Function Documentation

◆ createMDTag()

bool SamTags::createMDTag ( String outputMDtag,
SamRecord inputRec,
GenomeSequence genome 
)
static

Create the MD tag for the specified input record and the genome.

Returns
returns true if an MD tag was created, false if one could not be created.

Definition at line 34 of file SamTags.cpp.

36{
37 outputMDtag.Clear();
38 // Get the cigar to use for determing alignment.
39 Cigar* cigarInfo = inputRec.getCigarInfo();
40 if(cigarInfo == NULL)
41 {
42 throw(std::runtime_error("Cannot createMDTag - failed to get the cigar"));
43 return(false);
44 }
45 int32_t queryIndex = Cigar::INDEX_NA;
46
47 // get where this read starts on the reference.
48 uint32_t startOfReadOnRefIndex =
49 genome.getGenomePosition(inputRec.getReferenceName());
50 if(startOfReadOnRefIndex == (uint32_t)INVALID_CHROMOSOME_INDEX)
51 {
52 // Failed to find the reference for this chromosome, so return false.
53 return(false);
54 }
55 startOfReadOnRefIndex += inputRec.get0BasedPosition();
56
57 // Track the number of consecutive matches.
58 int32_t matchCount = 0;
59 // Track if it is currently in a deletion so it knows when not to add
60 // a '^'.
61 bool currentDeletion = false;
62
63 // Loop through the Reference indices (ignores insertions/pads/clips).
64 for(int refOffset = 0;
65 refOffset < cigarInfo->getExpectedReferenceBaseCount();
66 ++refOffset)
67 {
68 // Get the query index for this reference position..
69 queryIndex = cigarInfo->getQueryIndex(refOffset);
70
71 char refBase = genome[startOfReadOnRefIndex + refOffset];
72
73 if(queryIndex != Cigar::INDEX_NA)
74 {
75 // Both the reference and the read have a base, so get the bases.
76 char readBase = inputRec.getSequence(queryIndex);
77 currentDeletion = false;
78
79 // If neither base is unknown and they are the same, count it
80 // as a match.
81 if(!BaseUtilities::isAmbiguous(readBase) &&
82 !BaseUtilities::isAmbiguous(refBase) &&
83 (BaseUtilities::areEqual(readBase, refBase)))
84 {
85 // Match, so update counter.
86 ++matchCount;
87 }
88 else
89 {
90 // Mismatch, so output the number of matches if any.
91 if(matchCount != 0)
92 {
93 outputMDtag += matchCount;
94 matchCount = 0;
95 }
96 outputMDtag += refBase;
97 }
98 }
99 else
100 {
101 // This reference position is not in the query, so it is a deletion.
102 // Deletion, so output the number of matches if any.
103 if(matchCount != 0)
104 {
105 outputMDtag += matchCount;
106 matchCount = 0;
107 }
108
109 if(!currentDeletion)
110 {
111 // Not currently in a deletion, so add the ^
112 outputMDtag += '^';
113 }
114 // Add the deleted base.
115 outputMDtag += refBase;
116 currentDeletion = true;
117 }
118 }
119
120 // output the match count at the end.
121 outputMDtag += matchCount;
122 return(true);
123}
static bool isAmbiguous(char base)
Returns whether or not the specified bases is an indicator for ambiguity.
static bool areEqual(char base1, char base2)
Returns whether or not two bases are equal (case insensitive), if one of the bases is '=',...
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that).
Definition Cigar.h:84
static const int32_t INDEX_NA
Value associated with an index that is not applicable/does not exist, used for converting between que...
Definition Cigar.h:492
int getExpectedReferenceBaseCount() const
Return the number of bases in the reference that this CIGAR "spans".
Definition Cigar.cpp:120
int32_t getQueryIndex(int32_t refOffset)
Return the query index associated with the specified reference offset or INDEX_NA based on this cigar...
Definition Cigar.cpp:202
genomeIndex_t getGenomePosition(const char *chromosomeName, unsigned int chromosomeIndex) const
given a chromosome name and position, return the genome position
const char * getReferenceName()
Get the reference sequence name (RNAME) of the record.
Cigar * getCigarInfo()
Returns a pointer to the Cigar object associated with this record.
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
const char * getSequence()
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTran...

References BaseUtilities::areEqual(), SamRecord::get0BasedPosition(), SamRecord::getCigarInfo(), Cigar::getExpectedReferenceBaseCount(), GenomeSequence::getGenomePosition(), Cigar::getQueryIndex(), SamRecord::getReferenceName(), SamRecord::getSequence(), Cigar::INDEX_NA, and BaseUtilities::isAmbiguous().

Referenced by isMDTagCorrect(), and SamValidator::isValidTags().

◆ isMDTagCorrect()

bool SamTags::isMDTagCorrect ( SamRecord inputRec,
GenomeSequence genome 
)
static

Check to see if the MD tag in the record is accurate.

Definition at line 126 of file SamTags.cpp.

127{
128 String calcMDtag;
129 if(!createMDTag(calcMDtag, inputRec, genome))
130 {
131 // Could not generate the MD tag, so just return that it is incorrect.
132 return(false);
133 }
134
135 const String* origMDtag = inputRec.getStringTag(MD_TAG);
136
137 if(origMDtag == NULL)
138 {
139 // There was no tag.
140 // if there is not a new tag, then they are the same and true
141 // should be returned. If there is a new tag, then the old one was
142 // wrong so false should be returned. So just return the result of
143 // IsEmpty.
144 return(calcMDtag.IsEmpty());
145 }
146 else
147 {
148 // origMDtag is not NULL, so just compare the two tags.
149 return(calcMDtag == *origMDtag);
150 }
151}
const String * getStringTag(const char *tag)
Get the string value for the specified tag.
static bool createMDTag(String &outputMDtag, SamRecord &inputRec, GenomeSequence &genome)
Create the MD tag for the specified input record and the genome.
Definition SamTags.cpp:34

References createMDTag(), and SamRecord::getStringTag().

Referenced by SamValidator::isValidTags().

◆ updateMDTag()

bool SamTags::updateMDTag ( SamRecord inputRec,
GenomeSequence genome 
)
static

Definition at line 155 of file SamTags.cpp.

156{
157 // Calculate the new MD tag.
158 String calcMDtag;
159 createMDTag(calcMDtag, inputRec, genome);
160
161 // Add the MD tag. If it is already there and is different it will
162 // replace it. If it is already there and it is the same, it won't
163 // do anything.
164 return(inputRec.addTag(MD_TAG, MD_TAG_TYPE, calcMDtag.c_str()));
165}
bool addTag(const char *tag, char vtype, const char *value)
Add the specified tag,vtype,value to the record.

Member Data Documentation

◆ BQ_TAG

const char * SamTags::BQ_TAG = "BQ"
static

Definition at line 32 of file SamTags.h.

◆ BQ_TAG_TYPE

const char SamTags::BQ_TAG_TYPE = 'Z'
static

Definition at line 33 of file SamTags.h.

◆ MD_TAG

const char * SamTags::MD_TAG = "MD"
static

Definition at line 34 of file SamTags.h.

◆ MD_TAG_TYPE

const char SamTags::MD_TAG_TYPE = 'Z'
static

Definition at line 35 of file SamTags.h.

◆ ORIG_CIGAR_TAG

const char * SamTags::ORIG_CIGAR_TAG = "OC"
static

Definition at line 38 of file SamTags.h.

◆ ORIG_CIGAR_TAG_TYPE

const char SamTags::ORIG_CIGAR_TAG_TYPE = 'Z'
static

Definition at line 39 of file SamTags.h.

◆ ORIG_POS_TAG

const char * SamTags::ORIG_POS_TAG = "OP"
static

Definition at line 36 of file SamTags.h.

◆ ORIG_POS_TAG_TYPE

const char SamTags::ORIG_POS_TAG_TYPE = 'i'
static

Definition at line 37 of file SamTags.h.

◆ ORIG_QUAL_TAG

const char * SamTags::ORIG_QUAL_TAG = "OQ"
static

Definition at line 40 of file SamTags.h.

◆ ORIG_QUAL_TAG_TYPE

const char SamTags::ORIG_QUAL_TAG_TYPE = 'Z'
static

Definition at line 41 of file SamTags.h.


The documentation for this class was generated from the following files: