20#include "SamQuerySeqWithRefHelper.h"
21#include "BaseUtilities.h"
24SamQuerySeqWithRefIter::SamQuerySeqWithRefIter(
SamRecord& record,
28 myRefSequence(refSequence),
30 myStartOfReadOnRefIndex(INVALID_GENOME_INDEX),
34 myCigar = myRecord.getCigarInfo();
35 myStartOfReadOnRefIndex =
38 if(myStartOfReadOnRefIndex != INVALID_GENOME_INDEX)
42 myStartOfReadOnRefIndex += myRecord.get0BasedPosition();
47 myQueryIndex = myRecord.getReadLength() - 1;
52SamQuerySeqWithRefIter::~SamQuerySeqWithRefIter()
69 myStartOfReadOnRefIndex =
72 if(myStartOfReadOnRefIndex != INVALID_GENOME_INDEX)
106 if(!SamFlag::isMapped(myRecord.
getFlag()))
116 throw(std::runtime_error(
"Cannot determine matches/mismatches since failed to retrieve the cigar"));
122 if(myStartOfReadOnRefIndex == INVALID_GENOME_INDEX)
140 int32_t refOffset = myCigar->
getRefOffset(myQueryIndex);
152 char refBase = myRefSequence[myStartOfReadOnRefIndex + refOffset];
173 matchMismatchInfo.
setType(SamSingleBaseMatchInfo::MATCH);
181 matchMismatchInfo.
setType(SamSingleBaseMatchInfo::MISMATCH);
193void SamQuerySeqWithRefIter::nextIndex()
206SamSingleBaseMatchInfo::SamSingleBaseMatchInfo()
213SamSingleBaseMatchInfo::~SamSingleBaseMatchInfo()
226 return(myQueryIndex);
238 myQueryIndex = queryIndex;
244 int32_t seq0BasedPos,
246 const char* referenceName,
248 std::string& updatedSeq)
250 updatedSeq = currentSeq;
252 int32_t seqLength = updatedSeq.length();
253 int32_t queryIndex = 0;
255 uint32_t startOfReadOnRefIndex =
258 if(startOfReadOnRefIndex == INVALID_GENOME_INDEX)
264 startOfReadOnRefIndex += seq0BasedPos;
267 while(queryIndex < seqLength)
276 char readBase = currentSeq[queryIndex];
277 char refBase = refSequence[startOfReadOnRefIndex + refOffset];
286 updatedSeq[queryIndex] =
'=';
297 int32_t seq0BasedPos,
299 const char* referenceName,
301 std::string& updatedSeq)
303 updatedSeq = currentSeq;
305 int32_t seqLength = updatedSeq.length();
306 int32_t queryIndex = 0;
308 uint32_t startOfReadOnRefIndex =
311 if(startOfReadOnRefIndex == INVALID_GENOME_INDEX)
317 startOfReadOnRefIndex += seq0BasedPos;
320 while(queryIndex < seqLength)
329 char readBase = currentSeq[queryIndex];
330 char refBase = refSequence[startOfReadOnRefIndex + refOffset];
339 updatedSeq[queryIndex] = refBase;
static bool isAmbiguous(char base)
Returns whether or not the specified bases is an indicator for ambiguity.
static bool areEqual(char base1, char base2)
Returns whether or not two bases are equal (case insensitive), if one of the bases is '=',...
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that).
static const int32_t INDEX_NA
Value associated with an index that is not applicable/does not exist, used for converting between que...
int32_t getRefOffset(int32_t queryIndex)
Return the reference offset associated with the specified query index or INDEX_NA based on this cigar...
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
genomeIndex_t getGenomePosition(const char *chromosomeName, unsigned int chromosomeIndex) const
given a chromosome name and position, return the genome position
bool reset(bool forward=true)
Reset to start at the beginning of the record.
bool getNextMatchMismatch(SamSingleBaseMatchInfo &matchMismatchInfo)
Returns information for the next position where the query and the reference match or mismatch.
static void seqWithoutEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence converting '=' to the appropriate base using the reference.
static void seqWithEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence with '=' in any position where the sequence matches the reference.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
const char * getReferenceName()
Get the reference sequence name (RNAME) of the record.
@ NONE
Leave the sequence as is.
Cigar * getCigarInfo()
Returns a pointer to the Cigar object associated with this record.
uint16_t getFlag()
Get the flag (FLAG).
int32_t getReadLength()
Get the length of the read.
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
const char * getSequence()
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTran...
This class contains the match/mismatch information between the reference and a read for a single base...
void setQueryIndex(int32_t queryIndex)
Set the query index for this object.
void setType(Type newType)
Set the type (match/mismatch/unkown) for this object.
Type
More types can be added later as needed.
int32_t getQueryIndex()
Get the query index for this object.
Type getType()
Get the type (match/mismatch/unknown) for this object.