forked from euske/pdfminer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlzw.py
106 lines (93 loc) · 2.78 KB
/
lzw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python2
import sys
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
## LZWDecoder
##
class LZWDecoder(object):
debug = 0
def __init__(self, fp):
self.fp = fp
self.buff = 0
self.bpos = 8
self.nbits = 9
self.table = None
self.prevbuf = None
return
def readbits(self, bits):
v = 0
while 1:
# the number of remaining bits we can get from the current buffer.
r = 8 - self.bpos
if bits <= r:
# |-----8-bits-----|
# |-bpos-|-bits-| |
# | |----r----|
v = (v << bits) | ((self.buff >> (r - bits)) & ((1 << bits) - 1))
self.bpos += bits
break
else:
# |-----8-bits-----|
# |-bpos-|---bits----...
# | |----r----|
v = (v << r) | (self.buff & ((1 << r) - 1))
bits -= r
x = self.fp.read(1)
if not x: raise EOFError
self.buff = ord(x)
self.bpos = 0
return v
# noinspection PyTypeChecker
def feed(self, code):
x = ''
if code == 256:
self.table = [chr(c) for c in xrange(256)] # 0-255
self.table.append(None) # 256
self.table.append(None) # 257
self.prevbuf = ''
self.nbits = 9
elif code == 257:
pass
elif not self.prevbuf:
x = self.prevbuf = self.table[code]
else:
if code < len(self.table):
x = self.table[code]
self.table.append(self.prevbuf + x[0])
else:
self.table.append(self.prevbuf + self.prevbuf[0])
x = self.table[code]
l = len(self.table)
if l == 511:
self.nbits = 10
elif l == 1023:
self.nbits = 11
elif l == 2047:
self.nbits = 12
self.prevbuf = x
return x
def run(self):
while 1:
try:
code = self.readbits(self.nbits)
except EOFError:
break
x = self.feed(code)
yield x
if self.debug:
print >> sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' %
(self.nbits, code, x, self.table[258:]))
return
# lzwdecode
def lzwdecode(data):
"""
>>> lzwdecode('\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01')
'\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42'
"""
fp = StringIO(data)
return ''.join(LZWDecoder(fp).run())
if __name__ == '__main__':
import doctest
doctest.testmod()