libStatGen Software 1
Loading...
Searching...
No Matches
InputFile.cpp
1/*
2 * Copyright (C) 2010-2012 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "InputFile.h"
19#include "StringBasics.h"
20#include "GzipHeader.h"
21#include "BgzfFileType.h"
22#include "BgzfFileTypeRecovery.h"
23#include "GzipFileType.h"
24#include "UncompressedFileType.h"
25
26#include <stdarg.h>
27
28InputFile::InputFile(const char * filename, const char * mode,
29 InputFile::ifileCompression compressionMode)
30{
31 // XXX duplicate code
32 myAttemptRecovery = false;
33 myFileTypePtr = NULL;
34 myBufferIndex = 0;
35 myCurrentBufferSize = 0;
36 myAllocatedBufferSize = DEFAULT_BUFFER_SIZE;
37 myFileBuffer = new char[myAllocatedBufferSize];
38 myFileName.clear();
39
40 openFile(filename, mode, compressionMode);
41}
42
43
44int InputFile::readTilChar(const std::string& stopChars, std::string& stringRef)
45{
46 int charRead = 0;
47 size_t pos = std::string::npos;
48 // Loop until the character was not found in the stop characters.
49 while(pos == std::string::npos)
50 {
51 charRead = ifgetc();
52
53 // First Check for EOF. If EOF is found, just return -1
54 if(charRead == EOF)
55 {
56 return(-1);
57 }
58
59 // Try to find the character in the stopChars.
60 pos = stopChars.find(charRead);
61
62 if(pos == std::string::npos)
63 {
64 // Didn't find a stop character and it is not an EOF,
65 // so add it to the string.
66 stringRef += charRead;
67 }
68 }
69 return(pos);
70}
71
72
73int InputFile::readTilChar(const std::string& stopChars)
74{
75 int charRead = 0;
76 size_t pos = std::string::npos;
77 // Loop until the character was not found in the stop characters.
78 while(pos == std::string::npos)
79 {
80 charRead = ifgetc();
81
82 // First Check for EOF. If EOF is found, just return -1
83 if(charRead == EOF)
84 {
85 return(-1);
86 }
87
88 // Try to find the character in the stopChars.
89 pos = stopChars.find(charRead);
90 }
91 return(pos);
92}
93
94
96{
97 int charRead = 0;
98 // Loop until the character was not found in the stop characters.
99 while((charRead != EOF) && (charRead != '\n'))
100 {
101 charRead = ifgetc();
102 }
103 // First Check for EOF. If EOF is found, just return -1
104 if(charRead == EOF)
105 {
106 return(-1);
107 }
108 return(0);
109}
110
111
112int InputFile::readLine(std::string& line)
113{
114 int charRead = 0;
115 while(!ifeof())
116 {
117 charRead = ifgetc();
118 if(charRead == EOF)
119 {
120 return(-1);
121 }
122 if(charRead == '\n')
123 {
124 return(0);
125 }
126 line += charRead;
127 }
128 // Should never get here.
129 return(-1);
130}
131
132
133int InputFile::readTilTab(std::string& field)
134{
135 int charRead = 0;
136 while(!ifeof())
137 {
138 charRead = ifgetc();
139 if(charRead == EOF)
140 {
141 return(-1);
142 }
143 if(charRead == '\n')
144 {
145 return(0);
146 }
147 if(charRead == '\t')
148 {
149 return(1);
150 }
151 field += charRead;
152 }
153 return(-1);
154}
155
156
157#ifdef __ZLIB_AVAILABLE__
158
159// Open a file. Called by the constructor.
160// Returns true if the file was successfully opened, false otherwise.
161bool InputFile::openFile(const char * filename, const char * mode,
162 InputFile::ifileCompression compressionMode)
163{
164 //
165 // if recovering, we don't want to issue big readaheads, since
166 // that interferes with the decompression - we only want to
167 // decompress one at a time, and handle the exceptions immediately
168 // rather than at some indeterminate point in time.
169 //
170 if(myAttemptRecovery) {
171 bufferReads(1);
172 }
173 // If a file is for write, just open a new file.
174 if (mode[0] == 'w' || mode[0] == 'W')
175 {
176 openFileUsingMode(filename, mode, compressionMode);
177 }
178 else
179 {
180 // Check if reading from stdin.
181 if((strcmp(filename, "-") == 0) || (strcmp(filename, "-.gz") == 0))
182 {
183 // Reading from stdin, open it based on the
184 // compression mode.
185 openFileUsingMode(filename, mode, compressionMode);
186 }
187 else
188 {
189 // Not from stdin, so determine the file type.
190
191 // Open the file read only to determine file type.
192 UncompressedFileType file(filename, "r");
193 // If the file could not be opened, either create a new one or
194 // return failure.
195 if (!file.isOpen())
196 {
197 // If the mode is for read, then the file must exist, otherwise,
198 // create a new file.
199 if (mode[0] == 'r' || mode[0] == 'R')
200 {
201 // File must exist.
202 if (myFileTypePtr != NULL)
203 {
204 delete myFileTypePtr;
205 myFileTypePtr = NULL;
206 }
207 // Return false, was not opened.
208 return false;
209 }
210 else
211 {
212 openFileUsingMode(filename, mode, compressionMode);
213 }
214 }
215 else
216 {
217 // File was successfully opened, so try to determine the
218 // filetype from the file.
219 // Read the file to see if it a gzip file.
220 GzipHeader gzipHeader;
221 bool isGzip = gzipHeader.readHeader(file);
222
223 // The file header has been read, so close the file, so it can
224 // be re-opened as the correct type.
225 file.close();
226
227 if (isGzip)
228 {
229 // This file is a gzip file.
230 // Check to see if it is BGZF Compression.
231 if (gzipHeader.isBgzfFile())
232 {
233 // This file has BGZF Compression, so set the file
234 // pointer.
235 if(myAttemptRecovery) {
236 // NB: this reader will throw std::runtime_error when it recovers
237 myFileTypePtr = new BgzfFileTypeRecovery(filename, mode);
238 } else {
239 // use the standard bgzf reader (samtools)
240 myFileTypePtr = new BgzfFileType(filename, mode);
241 }
242 }
243 else
244 {
245 // Not BGZF, just a normal gzip.
246 myFileTypePtr = new GzipFileType(filename, mode);
247 }
248 }
249 else
250 {
251 // The file is a uncompressed, uncompressed file,
252 // so set the myFileTypePtr accordingly.
253 myFileTypePtr = new UncompressedFileType(filename, mode);
254 }
255 }
256 }
257 }
258 if(myFileTypePtr == NULL)
259 {
260 return(false);
261 }
262 if (!myFileTypePtr->isOpen())
263 {
264 // The file was not opened, so delete the pointer and set to null.
265 delete myFileTypePtr;
266 myFileTypePtr = NULL;
267 return false;
268 }
269
270 if(myAllocatedBufferSize == 1)
271 {
272 myFileTypePtr->setBuffered(false);
273 }
274 else
275 {
276 myFileTypePtr->setBuffered(true);
277 }
278 myFileName = filename;
279 return true;
280}
281
282
283// Open a file. This method will open a file with the specified name and
284// mode with the fileTypePtr associated with the specified compressionMode.
285void InputFile::openFileUsingMode(const char * filename, const char * mode,
286 ifileCompression compressionMode)
287{
288 switch (compressionMode)
289 {
290 case GZIP:
291 // Gzipped.
292 myFileTypePtr = new GzipFileType(filename, mode);
293 break;
294 case BGZF:
295 //
296 // BGZF compression - recovery is possible, so use
297 // Bgzf recovery reader if asked.
298 //
299 if(myAttemptRecovery && ((mode[0] == 'r') || (mode[0] == 'R')))
300 {
301 // NB: this reader will throw std::runtime_error when it recovers
302 myFileTypePtr = new BgzfFileTypeRecovery(filename, mode);
303 }
304 else
305 {
306 myFileTypePtr = new BgzfFileType(filename, mode);
307 }
308 break;
309 case UNCOMPRESSED:
310 myFileTypePtr = new UncompressedFileType(filename, mode);
311 break;
313 default:
314 // Check the extension. If it is ".gz", treat as gzip.
315 // otherwise treat it as UNCOMPRESSED.
316 int lastchar = 0;
317 while (filename[lastchar] != 0) lastchar++;
318 if ((lastchar >= 3 &&
319 filename[lastchar - 3] == '.' &&
320 filename[lastchar - 2] == 'g' &&
321 filename[lastchar - 1] == 'z'))
322 {
323 // .gz files files should be gzipped.
324 myFileTypePtr = new GzipFileType(filename, mode);
325 }
326 else
327 {
328 // Create an uncompressed file.
329 myFileTypePtr = new UncompressedFileType(filename, mode);
330 }
331 break;
332 }
333
334 if(myFileTypePtr == NULL)
335 {
336 return;
337 }
338 if(myAllocatedBufferSize == 1)
339 {
340 myFileTypePtr->setBuffered(false);
341 }
342 else
343 {
344 myFileTypePtr->setBuffered(true);
345 }
346}
347
348#else
349
350// No zlib, so just treat all files as std files.
351// Open a file. Called by the constructor.
352// Returns true if the file was successfully opened, false otherwise.
353bool InputFile::openFile(const char * filename, const char * mode,
354 InputFile::ifileCompression compressionMode)
355{
356 // No zlib, so it is a uncompressed, uncompressed file.
357 myFileTypePtr = new UncompressedFileType(filename, mode);
358
359 if(myFileTypePtr == NULL)
360 {
361 return(false);
362 }
363 if (!myFileTypePtr->isOpen())
364 {
365 // The file was not opened, so delete the pointer and set to null.
366 delete myFileTypePtr;
367 myFileTypePtr = NULL;
368 return false;
369 }
370 if(myAllocatedBufferSize == 1)
371 {
372 myFileTypePtr->setBuffered(false);
373 }
374 else
375 {
376 myFileTypePtr->setBuffered(true);
377 }
378 myFileName = filename;
379 return true;
380}
381
382#endif
383
384
386{
387 delete myFileTypePtr;
388 myFileTypePtr = NULL;
389
390 if(myFileBuffer != NULL)
391 {
392 delete[] myFileBuffer;
393 myFileBuffer = NULL;
394 }
395}
396
397
398int ifprintf(IFILE output, const char * format, ...)
399{
400 String buffer;
401
402 va_list ap;
403 va_start(ap, format);
404
405 buffer.vprintf(format, ap);
406
407 va_end(ap);
408
409 return ::ifwrite(output, (const char *) buffer, buffer.Length());
410}
411
412
413InputFile& operator << (InputFile& stream, double num)
414{
415 String val;
416 val = num;
417 stream << val;
418 return(stream);
419}
420
421
422InputFile& operator << (InputFile& stream, int num)
423{
424 String val;
425 val = num;
426 stream << val;
427 return(stream);
428}
429
430
431InputFile& operator << (InputFile& stream, unsigned int num)
432{
433 String val;
434 val = num;
435 stream << val;
436 return(stream);
437}
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition InputFile.h:37
void bufferReads(unsigned int bufferSize=DEFAULT_BUFFER_SIZE)
Set the buffer size for reading from files so that bufferSize bytes are read at a time and stored unt...
Definition InputFile.h:83
~InputFile()
Destructor.
int readLine(std::string &line)
Read, appending the characters into the specified string until new line or EOF is found,...
InputFile()
Default constructor.
Definition InputFile.h:52
int ifeof() const
Check to see if we have reached the EOF.
Definition InputFile.h:386
int ifgetc()
Get a character from the file.
Definition InputFile.h:324
int discardLine()
Read until the end of the line, discarding the characters, returning -1 returned for EOF and returnin...
Definition InputFile.cpp:95
ifileCompression
Compression to use when writing a file & decompression used when reading a file from stdin.
Definition InputFile.h:44
@ BGZF
bgzf file.
Definition InputFile.h:48
@ GZIP
gzip file.
Definition InputFile.h:47
@ DEFAULT
Check the extension, if it is ".gz", treat as gzip, otherwise treat it as UNCOMPRESSED.
Definition InputFile.h:45
@ UNCOMPRESSED
uncompressed file.
Definition InputFile.h:46
int readTilTab(std::string &field)
Read, appending the characters into the specified string until tab, new line, or EOF is found,...
int readTilChar(const std::string &stopChars, std::string &stringRef)
Read until the specified characters, returning which character was found causing the stop,...
Definition InputFile.cpp:44