forked from freebayes/freebayes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBGZF.h
320 lines (289 loc) · 10.9 KB
/
BGZF.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
// ***************************************************************************
// BGZF.h (c) 2009 Derek Barnett, Michael Str�mberg
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
// Last modified: 20 October 2010 (DB)
// ---------------------------------------------------------------------------
// BGZF routines were adapted from the bgzf.c code developed at the Broad
// Institute.
// ---------------------------------------------------------------------------
// Provides the basic functionality for reading & writing BGZF files
// ***************************************************************************
#ifndef BGZF_H
#define BGZF_H
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include "zlib.h"
// Platform-specific large-file support
#ifndef BAMTOOLS_LFS
#define BAMTOOLS_LFS
#ifdef WIN32
#define ftell64(a) _ftelli64(a)
#define fseek64(a,b,c) _fseeki64(a,b,c)
#else
#define ftell64(a) ftello(a)
#define fseek64(a,b,c) fseeko(a,b,c)
#endif
#endif // BAMTOOLS_LFS
// Platform-specific type definitions
#ifndef BAMTOOLS_TYPES
#define BAMTOOLS_TYPES
#ifdef _MSC_VER
typedef char int8_t;
typedef unsigned char uint8_t;
typedef short int16_t;
typedef unsigned short uint16_t;
typedef int int32_t;
typedef unsigned int uint32_t;
typedef long long int64_t;
typedef unsigned long long uint64_t;
#else
#include <stdint.h>
#endif
#endif // BAMTOOLS_TYPES
namespace BamTools {
// zlib constants
const int GZIP_ID1 = 31;
const int GZIP_ID2 = 139;
const int CM_DEFLATE = 8;
const int FLG_FEXTRA = 4;
const int OS_UNKNOWN = 255;
const int BGZF_XLEN = 6;
const int BGZF_ID1 = 66;
const int BGZF_ID2 = 67;
const int BGZF_LEN = 2;
const int GZIP_WINDOW_BITS = -15;
const int Z_DEFAULT_MEM_LEVEL = 8;
// BZGF constants
const int BLOCK_HEADER_LENGTH = 18;
const int BLOCK_FOOTER_LENGTH = 8;
const int MAX_BLOCK_SIZE = 65536;
const int DEFAULT_BLOCK_SIZE = 65536;
struct BgzfData {
// data members
public:
unsigned int UncompressedBlockSize;
unsigned int CompressedBlockSize;
unsigned int BlockLength;
unsigned int BlockOffset;
uint64_t BlockAddress;
bool IsOpen;
bool IsWriteOnly;
bool IsWriteUncompressed;
FILE* Stream;
char* UncompressedBlock;
char* CompressedBlock;
// constructor & destructor
public:
BgzfData(void);
~BgzfData(void);
// main interface methods
public:
// closes BGZF file
void Close(void);
// opens the BGZF file (mode is either "rb" for reading, or "wb" for writing)
bool Open(const std::string& filename, const char* mode, bool isWriteUncompressed = false);
// reads BGZF data into a byte buffer
int Read(char* data, const unsigned int dataLength);
// seek to position in BGZF file
bool Seek(int64_t position);
// get file position in BGZF file
int64_t Tell(void);
// writes the supplied data into the BGZF buffer
unsigned int Write(const char* data, const unsigned int dataLen);
// internal methods
private:
// compresses the current block
int DeflateBlock(void);
// flushes the data in the BGZF block
void FlushBlock(void);
// de-compresses the current block
int InflateBlock(const int& blockLength);
// reads a BGZF block
bool ReadBlock(void);
// static 'utility' methods
public:
// checks BGZF block header
static inline bool CheckBlockHeader(char* header);
// packs an unsigned integer into the specified buffer
static inline void PackUnsignedInt(char* buffer, unsigned int value);
// packs an unsigned short into the specified buffer
static inline void PackUnsignedShort(char* buffer, unsigned short value);
// unpacks a buffer into a double
static inline double UnpackDouble(char* buffer);
static inline double UnpackDouble(const char* buffer);
// unpacks a buffer into a float
static inline float UnpackFloat(char* buffer);
static inline float UnpackFloat(const char* buffer);
// unpacks a buffer into a signed int
static inline signed int UnpackSignedInt(char* buffer);
static inline signed int UnpackSignedInt(const char* buffer);
// unpacks a buffer into a signed short
static inline signed short UnpackSignedShort(char* buffer);
static inline signed short UnpackSignedShort(const char* buffer);
// unpacks a buffer into an unsigned int
static inline unsigned int UnpackUnsignedInt(char* buffer);
static inline unsigned int UnpackUnsignedInt(const char* buffer);
// unpacks a buffer into an unsigned short
static inline unsigned short UnpackUnsignedShort(char* buffer);
static inline unsigned short UnpackUnsignedShort(const char* buffer);
};
// -------------------------------------------------------------
// static 'utility' method implementations
// checks BGZF block header
inline
bool BgzfData::CheckBlockHeader(char* header) {
return (header[0] == GZIP_ID1 &&
header[1] == (char)GZIP_ID2 &&
header[2] == Z_DEFLATED &&
(header[3] & FLG_FEXTRA) != 0 &&
BgzfData::UnpackUnsignedShort(&header[10]) == BGZF_XLEN &&
header[12] == BGZF_ID1 &&
header[13] == BGZF_ID2 &&
BgzfData::UnpackUnsignedShort(&header[14]) == BGZF_LEN );
}
// 'packs' an unsigned integer into the specified buffer
inline
void BgzfData::PackUnsignedInt(char* buffer, unsigned int value) {
buffer[0] = (char)value;
buffer[1] = (char)(value >> 8);
buffer[2] = (char)(value >> 16);
buffer[3] = (char)(value >> 24);
}
// 'packs' an unsigned short into the specified buffer
inline
void BgzfData::PackUnsignedShort(char* buffer, unsigned short value) {
buffer[0] = (char)value;
buffer[1] = (char)(value >> 8);
}
// 'unpacks' a buffer into a double (includes both non-const & const char* flavors)
inline
double BgzfData::UnpackDouble(char* buffer) {
union { double value; unsigned char valueBuffer[sizeof(double)]; } un;
un.value = 0;
un.valueBuffer[0] = buffer[0];
un.valueBuffer[1] = buffer[1];
un.valueBuffer[2] = buffer[2];
un.valueBuffer[3] = buffer[3];
un.valueBuffer[4] = buffer[4];
un.valueBuffer[5] = buffer[5];
un.valueBuffer[6] = buffer[6];
un.valueBuffer[7] = buffer[7];
return un.value;
}
inline
double BgzfData::UnpackDouble(const char* buffer) {
union { double value; unsigned char valueBuffer[sizeof(double)]; } un;
un.value = 0;
un.valueBuffer[0] = buffer[0];
un.valueBuffer[1] = buffer[1];
un.valueBuffer[2] = buffer[2];
un.valueBuffer[3] = buffer[3];
un.valueBuffer[4] = buffer[4];
un.valueBuffer[5] = buffer[5];
un.valueBuffer[6] = buffer[6];
un.valueBuffer[7] = buffer[7];
return un.value;
}
// 'unpacks' a buffer into a float (includes both non-const & const char* flavors)
inline
float BgzfData::UnpackFloat(char* buffer) {
union { float value; unsigned char valueBuffer[sizeof(float)]; } un;
un.value = 0;
un.valueBuffer[0] = buffer[0];
un.valueBuffer[1] = buffer[1];
un.valueBuffer[2] = buffer[2];
un.valueBuffer[3] = buffer[3];
return un.value;
}
inline
float BgzfData::UnpackFloat(const char* buffer) {
union { float value; unsigned char valueBuffer[sizeof(float)]; } un;
un.value = 0;
un.valueBuffer[0] = buffer[0];
un.valueBuffer[1] = buffer[1];
un.valueBuffer[2] = buffer[2];
un.valueBuffer[3] = buffer[3];
return un.value;
}
// 'unpacks' a buffer into a signed int (includes both non-const & const char* flavors)
inline
signed int BgzfData::UnpackSignedInt(char* buffer) {
union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;
un.value = 0;
un.valueBuffer[0] = buffer[0];
un.valueBuffer[1] = buffer[1];
un.valueBuffer[2] = buffer[2];
un.valueBuffer[3] = buffer[3];
return un.value;
}
inline
signed int BgzfData::UnpackSignedInt(const char* buffer) {
union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;
un.value = 0;
un.valueBuffer[0] = buffer[0];
un.valueBuffer[1] = buffer[1];
un.valueBuffer[2] = buffer[2];
un.valueBuffer[3] = buffer[3];
return un.value;
}
// 'unpacks' a buffer into a signed short (includes both non-const & const char* flavors)
inline
signed short BgzfData::UnpackSignedShort(char* buffer) {
union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;
un.value = 0;
un.valueBuffer[0] = buffer[0];
un.valueBuffer[1] = buffer[1];
return un.value;
}
inline
signed short BgzfData::UnpackSignedShort(const char* buffer) {
union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;
un.value = 0;
un.valueBuffer[0] = buffer[0];
un.valueBuffer[1] = buffer[1];
return un.value;
}
// 'unpacks' a buffer into an unsigned int (includes both non-const & const char* flavors)
inline
unsigned int BgzfData::UnpackUnsignedInt(char* buffer) {
union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;
un.value = 0;
un.valueBuffer[0] = buffer[0];
un.valueBuffer[1] = buffer[1];
un.valueBuffer[2] = buffer[2];
un.valueBuffer[3] = buffer[3];
return un.value;
}
inline
unsigned int BgzfData::UnpackUnsignedInt(const char* buffer) {
union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;
un.value = 0;
un.valueBuffer[0] = buffer[0];
un.valueBuffer[1] = buffer[1];
un.valueBuffer[2] = buffer[2];
un.valueBuffer[3] = buffer[3];
return un.value;
}
// 'unpacks' a buffer into an unsigned short (includes both non-const & const char* flavors)
inline
unsigned short BgzfData::UnpackUnsignedShort(char* buffer) {
union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;
un.value = 0;
un.valueBuffer[0] = buffer[0];
un.valueBuffer[1] = buffer[1];
return un.value;
}
inline
unsigned short BgzfData::UnpackUnsignedShort(const char* buffer) {
union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;
un.value = 0;
un.valueBuffer[0] = buffer[0];
un.valueBuffer[1] = buffer[1];
return un.value;
}
} // namespace BamTools
#endif // BGZF_H