libStatGen Software 1
Loading...
Searching...
No Matches
BamIndex Class Reference
Inheritance diagram for BamIndex:
Collaboration diagram for BamIndex:

Public Member Functions

virtual void resetIndex ()
 Reset the member data for a new index file.
 
SamStatus::Status readIndex (const char *filename)
 
bool getChunksForRegion (int32_t refID, int32_t start, int32_t end, SortedChunkList &chunkList)
 Get the list of chunks associated with this region.
 
uint64_t getMaxOffset () const
 
bool getReferenceMinMax (int32_t refID, uint64_t &minOffset, uint64_t &maxOffset) const
 Get the minimum and maximum file offsets for the specfied reference ID.
 
int32_t getNumMappedReads (int32_t refID)
 Get the number of mapped reads for this reference id.
 
int32_t getNumUnMappedReads (int32_t refID)
 Get the number of unmapped reads for this reference id.
 
void printIndex (int32_t refID, bool summary=false)
 Print the index information.
 
- Public Member Functions inherited from IndexBase
int32_t getNumRefs () const
 Get the number of references in this index.
 
bool getMinOffsetFromLinearIndex (int32_t refID, uint32_t position, uint64_t &minOffset) const
 

Static Public Attributes

static const int32_t UNKNOWN_NUM_READS = -1
 The number used for an unknown number of reads.
 
static const int32_t REF_ID_UNMAPPED = -1
 The number used for the reference id of unmapped reads.
 
static const int32_t REF_ID_ALL = -2
 The number used to indicate that all reference ids should be used.
 

Additional Inherited Members

- Static Protected Member Functions inherited from IndexBase
static void getBinsForRegion (uint32_t start, uint32_t end, bool binMap[MAX_NUM_BINS+1])
 
- Protected Attributes inherited from IndexBase
int32_t n_ref
 
std::vector< ReferencemyRefs
 
- Static Protected Attributes inherited from IndexBase
static const uint32_t MAX_NUM_BINS = 37450
 
static const uint32_t MAX_POSITION = 536870911
 
static const uint32_t LINEAR_INDEX_SHIFT = 14
 

Detailed Description

Definition at line 31 of file BamIndex.h.

Constructor & Destructor Documentation

◆ BamIndex()

BamIndex::BamIndex ( )

Definition at line 21 of file BamIndex.cpp.

22 : IndexBase(),
23 maxOverallOffset(0),
24 myUnMappedNumReads(-1)
25{
26}

◆ ~BamIndex()

BamIndex::~BamIndex ( )
virtual

Definition at line 29 of file BamIndex.cpp.

30{
31}

Member Function Documentation

◆ getChunksForRegion()

bool BamIndex::getChunksForRegion ( int32_t  refID,
int32_t  start,
int32_t  end,
SortedChunkList chunkList 
)

Get the list of chunks associated with this region.

For an entire reference ID, set start and end to -1. To start at the beginning of the region, set start to 0/-1. To go to the end of the region, set end to -1.

Definition at line 218 of file BamIndex.cpp.

220{
221 chunkList.clear();
222
223 // If start is >= to end, there will be no sections, return no
224 // regions.
225 if((start >= end) && (end != -1))
226 {
227 std::cerr << "Warning, requesting region where start <= end, so "
228 << "no values will be returned.\n";
229 return(false);
230 }
231
232 // Handle REF_ID_UNMAPPED. This uses a default chunk which covers
233 // from the max offset to the end of the file.
234 if(refID == REF_ID_UNMAPPED)
235 {
236 Chunk refChunk;
237 // The start of the unmapped region is the max offset found
238 // in the index file.
239 refChunk.chunk_beg = getMaxOffset();
240 // The end of the unmapped region is the end of the file, so
241 // set chunk end to the max value.
242 refChunk.chunk_end = Chunk::MAX_CHUNK_VALUE;
243 return(chunkList.insert(refChunk));
244 }
245
246 if((refID < 0) || (refID >= n_ref))
247 {
248 // The specified refID is out of range, return false.
249 std::cerr << "Warning, requesting refID is out of range, so "
250 << "no values will be returned.\n";
251 return(false);
252 }
253
254 const Reference* ref = &(myRefs[refID]);
255
256 // Handle where start/end are defaults.
257 if(start == -1)
258 {
259 if(end == -1)
260 {
261 // This is whole chromosome, so take a shortcut.
262 if(ref->maxChunkOffset == 0)
263 {
264 // No chunks for this region, but this is not an error.
265 return(true);
266 }
267 Chunk refChunk;
268 refChunk.chunk_beg = ref->minChunkOffset;
269 refChunk.chunk_end = ref->maxChunkOffset;
270 return(chunkList.insert(refChunk));
271 }
272 else
273 {
274 start = 0;
275 }
276 }
277 if(end == -1)
278 {
279 // MAX_POSITION is inclusive, but end is exclusive, so add 1.
280 end = MAX_POSITION + 1;
281 }
282
283 // Determine the minimum offset for the given start position. This
284 // is done by using the linear index for the specified start position.
285 uint64_t minOffset = 0;
286 getMinOffsetFromLinearIndex(refID, start, minOffset);
287
288 bool binInRangeMap[MAX_NUM_BINS+1];
289
290 getBinsForRegion(start, end, binInRangeMap);
291
292 // Loop through the bins in the ref and if they are in the region, get the chunks.
293 for(int i = 0; i < ref->n_bin; ++i)
294 {
295 const Bin* bin = &(ref->bins[i]);
296 if(binInRangeMap[bin->bin] == false)
297 {
298 // This bin is not in the region, so check the next one.
299 continue;
300 }
301
302 // Add each chunk in the bin to the map.
303 for(int chunkIndex = 0; chunkIndex < bin->n_chunk; chunkIndex++)
304 {
305 // If the end of the chunk is less than the minimum offset
306 // for the 16K block that starts our region, then no
307 // records in this chunk will cross our region, so do
308 // not add it to the chunks we need to use.
309 if(bin->chunks[chunkIndex].chunk_end < minOffset)
310 {
311 continue;
312 }
313 // Add the chunk to the map.
314 if(!chunkList.insert(bin->chunks[chunkIndex]))
315 {
316 // Failed to add to the map, return false.
317 std::cerr << "Warning, Failed to add a chunk, so "
318 << "no values will be returned.\n";
319 return(false);
320 }
321 }
322 }
323
324 // Now that all chunks have been added to the list,
325 // handle overlapping chunks.
326 return(chunkList.mergeOverlapping());
327}
static const int32_t REF_ID_UNMAPPED
The number used for the reference id of unmapped reads.
Definition BamIndex.h:86

References REF_ID_UNMAPPED.

◆ getMaxOffset()

uint64_t BamIndex::getMaxOffset ( ) const

Definition at line 331 of file BamIndex.cpp.

332{
333 return(maxOverallOffset);
334}

◆ getNumMappedReads()

int32_t BamIndex::getNumMappedReads ( int32_t  refID)

Get the number of mapped reads for this reference id.

Returns -1 for out of range refIDs.

Parameters
refIDreference ID for which to extract the number of mapped reads.
Returns
number of mapped reads for the specified reference id.

Definition at line 355 of file BamIndex.cpp.

356{
357 // If it is the reference id of unmapped reads, return
358 // that there are no mapped reads.
359 if(refID == REF_ID_UNMAPPED)
360 {
361 // These are by definition all unmapped reads.
362 return(0);
363 }
364
365 if((refID < 0) || (refID >= (int32_t)myRefs.size()))
366 {
367 // Reference ID is out of range for this index file.
368 return(-1);
369 }
370
371 // Get this reference.
372 return(myRefs[refID].n_mapped);
373}

References REF_ID_UNMAPPED.

Referenced by SamFile::getNumMappedReadsFromIndex(), and SamFile::getNumMappedReadsFromIndex().

◆ getNumUnMappedReads()

int32_t BamIndex::getNumUnMappedReads ( int32_t  refID)

Get the number of unmapped reads for this reference id.

Returns -1 for out of range refIDs.

Parameters
refIDreference ID for which to extract the number of unmapped reads.
Returns
number of unmapped reads for the specified reference id

Definition at line 377 of file BamIndex.cpp.

378{
379 // If it is the reference id of unmapped reads, return
380 // that value.
381 if(refID == REF_ID_UNMAPPED)
382 {
383 return(myUnMappedNumReads);
384 }
385
386 if((refID < 0) || (refID >= (int32_t)myRefs.size()))
387 {
388 // Reference ID is out of range for this index file.
389 return(-1);
390 }
391
392 // Get this reference.
393 return(myRefs[refID].n_unmapped);
394}

References REF_ID_UNMAPPED.

Referenced by SamFile::getNumUnMappedReadsFromIndex(), and SamFile::getNumUnMappedReadsFromIndex().

◆ getReferenceMinMax()

bool BamIndex::getReferenceMinMax ( int32_t  refID,
uint64_t &  minOffset,
uint64_t &  maxOffset 
) const

Get the minimum and maximum file offsets for the specfied reference ID.

Parameters
refIDthe reference ID to locate in the file.
minOffsetreturns the min file offset for the specified reference
maxOffsetreturns the max file offset for the specified reference
Returns
whether or not the reference was found in the file

Definition at line 337 of file BamIndex.cpp.

340{
341 if((refID < 0) || (refID >= (int32_t)myRefs.size()))
342 {
343 // Reference ID is out of range for this index file.
344 return(false);
345 }
346
347 // Get this reference.
348 minOffset = myRefs[refID].minChunkOffset;
349 maxOffset = myRefs[refID].maxChunkOffset;
350 return(true);
351}

◆ printIndex()

void BamIndex::printIndex ( int32_t  refID,
bool  summary = false 
)

Print the index information.

Parameters
refIDreference ID for which to print info for. -1 means print for all references.
summarywhether or not to just print a summary (defaults to false). The summary just contains summary info for each reference and not every bin/chunk.

Definition at line 398 of file BamIndex.cpp.

399{
400 std::cout << "BAM Index: " << std::endl;
401 std::cout << "# Reference Sequences: " << n_ref << std::endl;
402
403 unsigned int startRef = 0;
404 unsigned int endRef = myRefs.size() - 1;
405 std::vector<Reference> refsToProcess;
406 if(refID != -1)
407 {
408 // Set start and end ref to the specified reference id.
409 startRef = refID;
410 endRef = refID;
411 }
412
413 // Print out the information for each bin.
414 for(unsigned int i = startRef; i <= endRef; ++i)
415 {
416 std::cout << std::dec
417 << "\tReference ID: " << std::setw(4) << i
418 << "; #Bins: "<< std::setw(6) << myRefs[i].n_bin
419 << "; #Linear Index Entries: "
420 << std::setw(6) << myRefs[i].n_intv
421 << "; Min Chunk Offset: "
422 << std::setw(18) << std::hex << std::showbase << myRefs[i].minChunkOffset
423 << "; Max Chunk Offset: "
424 << std::setw(18) << myRefs[i].maxChunkOffset
425 << std::dec;
426 // Print the mapped/unmapped if set.
427 if(myRefs[i].n_mapped != Reference::UNKNOWN_MAP_INFO)
428 {
429 std::cout << "; " << myRefs[i].n_mapped << " Mapped Reads";
430 }
431 if(myRefs[i].n_mapped != Reference::UNKNOWN_MAP_INFO)
432 {
433 std::cout << "; " << myRefs[i].n_unmapped << " Unmapped Reads";
434 }
435 std::cout << std::endl;
436
437 // Only print more details if not summary.
438 if(!summary)
439 {
440 std::vector<Bin>::iterator binIter;
441 for(binIter = myRefs[i].bins.begin();
442 binIter != myRefs[i].bins.end();
443 ++binIter)
444 {
445 Bin* binPtr = &(*binIter);
446 if(binPtr->bin == Bin::NOT_USED_BIN)
447 {
448 // This bin is not used, continue.
449 continue;
450 }
451 // Print the bin info.
452 std::cout << "\t\t\tBin Name: " << binPtr->bin << std::endl;
453 std::cout << "\t\t\t# Chunks: " << binPtr->n_chunk << std::endl;
454 std::cout << std::hex << std::showbase;
455
456 for(int chunkIndex = 0; chunkIndex < binPtr->n_chunk;
457 ++chunkIndex)
458 {
459 // If this is the last chunk of the MAX_NUM_BINS - it
460 // contains a mapped/unmapped count rather than the regular
461 // chunk addresses.
462 if((binPtr->bin != MAX_NUM_BINS) ||
463 (chunkIndex != (binPtr->n_chunk - 1)))
464 {
465 std::cout << "\t\t\t\tchunk_beg: "
466 << binPtr->chunks[chunkIndex].chunk_beg
467 << std::endl;
468 std::cout << "\t\t\t\tchunk_end: "
469 << binPtr->chunks[chunkIndex].chunk_end
470 << std::endl;
471 }
472 }
473 }
474 std::cout << std::dec;
475
476 // Print the linear index.
477 for(int linearIndex = 0; linearIndex < myRefs[i].n_intv;
478 ++linearIndex)
479 {
480 if(myRefs[i].ioffsets[linearIndex] != 0)
481 {
482 std::cout << "\t\t\tLinearIndex["
483 << std::dec << linearIndex << "] Offset: "
484 << std::hex << myRefs[i].ioffsets[linearIndex]
485 << std::endl;
486 }
487 }
488 }
489 }
490}

◆ readIndex()

SamStatus::Status BamIndex::readIndex ( const char *  filename)
virtual
Parameters
filenamethe bam index file to be read.
Returns
the status of the read.

Implements IndexBase.

Definition at line 45 of file BamIndex.cpp.

46{
47 // Reset the index from anything that may previously be set.
48 resetIndex();
49
50 IFILE indexFile = ifopen(filename, "rb");
51
52 // Failed to open the index file.
53 if(indexFile == NULL)
54 {
55 return(SamStatus::FAIL_IO);
56 }
57
58 // generate the bam index structure.
59
60 // Read the magic string.
61 char magic[4];
62 if(ifread(indexFile, magic, 4) != 4)
63 {
64 // Failed to read the magic
65 ifclose(indexFile);
66 return(SamStatus::FAIL_IO);
67 }
68
69 // If this is not an index file, set num references to 0.
70 if (magic[0] != 'B' || magic[1] != 'A' || magic[2] != 'I' || magic[3] != 1)
71 {
72 // Not a BAM Index file.
73 ifclose(indexFile);
75 }
76
77 // It is a bam index file.
78 // Read the number of reference sequences.
79 if(ifread(indexFile, &n_ref, 4) != 4)
80 {
81 // Failed to read.
82 ifclose(indexFile);
83 return(SamStatus::FAIL_IO);
84 }
85
86 // Size the references.
87 myRefs.resize(n_ref);
88
89 for(int refIndex = 0; refIndex < n_ref; refIndex++)
90 {
91 // Read each reference.
92 Reference* ref = &(myRefs[refIndex]);
93
94 // Read the number of bins.
95 if(ifread(indexFile, &(ref->n_bin), 4) != 4)
96 {
97 // Failed to read the number of bins.
98 // Return failure.
99 ifclose(indexFile);
100 return(SamStatus::FAIL_PARSE);
101 }
102
103 // If there are no bins, then there are no
104 // mapped/unmapped reads.
105 if(ref->n_bin == 0)
106 {
107 ref->n_mapped = 0;
108 ref->n_unmapped = 0;
109 }
110
111 // Resize the bins so they can be indexed by bin number.
112 ref->bins.resize(ref->n_bin + 1);
113
114 // Read each bin.
115 for(int binIndex = 0; binIndex < ref->n_bin; binIndex++)
116 {
117 uint32_t binNumber;
118
119 // Read in the bin number.
120 if(ifread(indexFile, &(binNumber), 4) != 4)
121 {
122 // Failed to read the bin number.
123 // Return failure.
124 ifclose(indexFile);
125 return(SamStatus::FAIL_IO);
126 }
127
128 // Add the bin to the reference and get the
129 // pointer back so the values can be set in it.
130 Bin* binPtr = &(ref->bins[binIndex]);
131 binPtr->bin = binNumber;
132
133 // Read in the number of chunks.
134 if(ifread(indexFile, &(binPtr->n_chunk), 4) != 4)
135 {
136 // Failed to read number of chunks.
137 // Return failure.
138 ifclose(indexFile);
139 return(SamStatus::FAIL_IO);
140 }
141
142 // Read in the chunks.
143 // Allocate space for the chunks.
144 uint32_t sizeOfChunkList = binPtr->n_chunk * sizeof(Chunk);
145 binPtr->chunks = (Chunk*)malloc(sizeOfChunkList);
146 if(ifread(indexFile, binPtr->chunks, sizeOfChunkList) != sizeOfChunkList)
147 {
148 // Failed to read the chunks.
149 // Return failure.
150 ifclose(indexFile);
151 return(SamStatus::FAIL_IO);
152 }
153
154 // Determine the min/max for this bin if it is not the max bin.
155 if(binPtr->bin != MAX_NUM_BINS)
156 {
157 for(int i = 0; i < binPtr->n_chunk; i++)
158 {
159 if(binPtr->chunks[i].chunk_beg < ref->minChunkOffset)
160 {
161 ref->minChunkOffset = binPtr->chunks[i].chunk_beg;
162 }
163 if(binPtr->chunks[i].chunk_end > ref->maxChunkOffset)
164 {
165 ref->maxChunkOffset = binPtr->chunks[i].chunk_end;
166 }
167 if(binPtr->chunks[i].chunk_end > maxOverallOffset)
168 {
169 maxOverallOffset = binPtr->chunks[i].chunk_end;
170 }
171 }
172 }
173 else
174 {
175 // Mapped/unmapped are the last chunk of the
176 // MAX BIN
177 ref->n_mapped = binPtr->chunks[binPtr->n_chunk - 1].chunk_beg;
178 ref->n_unmapped = binPtr->chunks[binPtr->n_chunk - 1].chunk_end;
179 }
180 }
181
182 // Read the number of intervals.
183 if(ifread(indexFile, &(ref->n_intv), 4) != 4)
184 {
185 // Failed to read, set to 0.
186 ref->n_intv = 0;
187 // Return failure.
188 ifclose(indexFile);
189 return(SamStatus::FAIL_IO);
190 }
191
192 // Allocate space for the intervals and read them.
193 uint32_t linearIndexSize = ref->n_intv * sizeof(uint64_t);
194 ref->ioffsets = (uint64_t*)malloc(linearIndexSize);
195 if(ifread(indexFile, ref->ioffsets, linearIndexSize) != linearIndexSize)
196 {
197 // Failed to read the linear index.
198 // Return failure.
199 ifclose(indexFile);
200 return(SamStatus::FAIL_IO);
201 }
202 }
203
204 int32_t numUnmapped = 0;
205 if(ifread(indexFile, &numUnmapped, sizeof(int32_t)) == sizeof(int32_t))
206 {
207 myUnMappedNumReads = numUnmapped;
208 }
209
210 // Successfully read the bam index file.
211 ifclose(indexFile);
212 return(SamStatus::SUCCESS);
213}
IFILE ifopen(const char *filename, const char *mode, InputFile::ifileCompression compressionMode=InputFile::DEFAULT)
Open a file with the specified name and mode, using a filename of "-" to indicate stdin/stdout.
Definition InputFile.h:562
int ifclose(IFILE &file)
Close the file.
Definition InputFile.h:580
unsigned int ifread(IFILE file, void *buffer, unsigned int size)
Read up to size bytes from the file into the buffer.
Definition InputFile.h:600
virtual void resetIndex()
Reset the member data for a new index file.
Definition BamIndex.cpp:35
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition InputFile.h:37
@ SUCCESS
method completed successfully.
@ FAIL_IO
method failed due to an I/O issue.
@ FAIL_PARSE
failed to parse a record/header - invalid format.

References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_PARSE, ifclose(), ifopen(), ifread(), resetIndex(), and StatGenStatus::SUCCESS.

Referenced by SamFile::ReadBamIndex().

◆ resetIndex()

void BamIndex::resetIndex ( )
virtual

Reset the member data for a new index file.

Reimplemented from IndexBase.

Definition at line 35 of file BamIndex.cpp.

36{
38
39 maxOverallOffset = 0;
40 myUnMappedNumReads = -1;
41}
virtual void resetIndex()
Reset the member data for a new index file.

References IndexBase::resetIndex().

Referenced by readIndex().

Member Data Documentation

◆ REF_ID_ALL

const int32_t BamIndex::REF_ID_ALL = -2
static

The number used to indicate that all reference ids should be used.

Definition at line 89 of file BamIndex.h.

Referenced by SamFile::resetFile(), and SamFile::SetReadSection().

◆ REF_ID_UNMAPPED

const int32_t BamIndex::REF_ID_UNMAPPED = -1
static

◆ UNKNOWN_NUM_READS

const int32_t BamIndex::UNKNOWN_NUM_READS = -1
static

The number used for an unknown number of reads.

Definition at line 83 of file BamIndex.h.


The documentation for this class was generated from the following files: