Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * readfastq.h
- *
- * Created on: Sep 27, 2011
- * Author: michael
- * Edited by: scarysandwich
- * Edited on: April 25, 2013
- */
- #ifndef READFASTQ_H_
- #define READFASTQ_H_
- #include<iostream> //to open files
- #include<fstream> //file stream
- #include<stdio.h>
- #include<stdlib.h> //to use malloc
- #include<string.h>
- #include<string>
- #include"shortread.h"
- #include<vector>
- #include <istream>
- #include <boost/iostreams/device/file.hpp>
- #include <boost/iostreams/stream.hpp>
- #ifdef GZIP
- #include <boost/iostreams/device/file.hpp>
- #include <boost/iostreams/stream.hpp>
- #include <boost/iostreams/filtering_streambuf.hpp>
- #include <boost/iostreams/filtering_stream.hpp>
- #include <boost/iostreams/filter/gzip.hpp>
- #endif
- #define TIMEMEASURE
- #define TIMECALC(start, end) (((end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec)) / 1000)
- using namespace std;
- class fastqfile{
- std::vector<string> _names;
- std::vector<char> _seqs;
- std::vector<char> _phred;
- int _filelength;
- int _SRlength;
- int _blocksize;
- public:
- fastqfile(char *filename, int SRlength, uint32_t blocksize){
- readfastq(filename, SRlength, blocksize);
- }
- string *getNames(){ return &(_names[0]);}
- char *getSeqs(){ return &(_seqs[0]);}
- char *getPhred(){ return &(_phred[0]);}
- uint32_t getNumberOfSR(){ return _filelength;}
- uint32_t getSRlegth() {return _SRlength;}
- string getNameAt(uint32_t i){ return _names.at(i);}
- void getSRAt(uint32_t i, char *ret){ strncpy(ret, &(_seqs[i]), _SRlength);}
- void getPhredAt(uint32_t i, char *ret){strncpy(ret, &(_phred[i]), _SRlength);}
- void setNumberOfSRs(uint32_t num){
- _filelength = num;
- }
- void readfastq(char *filename, int SRlength) {
- _filelength = 0;
- _SRlength = SRlength;
- size_t bytes_read, bytes_expected;
- timeval start, end;
- gettimeofday(&start, 0);
- FILE *fp;
- fp = fopen(filename, "r");
- fseek(fp, 0L, SEEK_END); //go to the end of file
- bytes_expected = ftell(fp); //get filesize
- fseek(fp, 0L, SEEK_SET); //go to the begining of the file
- fclose(fp);
- if ((_seqarray = (char *) malloc(bytes_expected/2)) == NULL) //allocate space for file
- err(EX_OSERR, "data malloc");
- string name;
- string seqtemp;
- string garbage;
- string phredtemp;
- boost::iostreams::stream<boost::iostreams::file_source>file(filename);
- while (std::getline(file, name)) {
- std::getline(file, seqtemp);
- std::getline(file, garbage);
- std::getline(file, phredtemp);
- if (seqtemp.size() != SRlength) {
- if (seqtemp.size() != 0)
- printf("Error on read in fastq: size is invalid\n");
- } else {
- _names.push_back(name);
- strncpy( &(_seqarray[SRlength*_filelength]), seqtemp.c_str(), seqtemp.length()); //do not handle special letters here, do on GPU
- _filelength++;
- }
- }
- gettimeofday(&end, 0);
- #ifdef TIMEMEASURE
- cerr << "FastQ block read in " << TIMECALC(start, end) << "ms" << endl;
- #endif
- }
- void print(){
- for(int i=0; i<_seqs.size(); i++){ //three lines each record
- printf("%i-", _seqs.at(i));
- }
- printf("\n");
- }
- };
- #endif /* READFASTQ_H_ */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement