|
| 1 | +import huffman |
| 2 | +import bitstring |
| 3 | +from bitstring import BitArray |
| 4 | + |
| 5 | +myfile = open("foo.txt","r") |
| 6 | +allofthefile = myfile.read() |
| 7 | +myfile.close() |
| 8 | + |
| 9 | +mycharset = u"\u000A" |
| 10 | +mycharset = mycharset + " abcdefghijklmnopqrstuvwxyz"+\ |
| 11 | + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"+\ |
| 12 | + "0123456789!#$%&'()\"*+,-./:;<=>?@[\]^_`{|}~"+\ |
| 13 | + "àæçèéêôëü" |
| 14 | +countset = [0 for i in range (0,len(mycharset))] |
| 15 | + |
| 16 | +for i in range (0,len(allofthefile)): |
| 17 | + singlechar = allofthefile[i] |
| 18 | + for j in range (0,len(mycharset)): |
| 19 | + if mycharset[j] == singlechar: |
| 20 | + countset[j] = countset[j]+1 # count the apperance of the charecters |
| 21 | + break |
| 22 | + |
| 23 | +totalcount = 0 |
| 24 | +for i in range (0,len(countset)): |
| 25 | + totalcount = totalcount + countset[i] # count how many charecters text contains |
| 26 | + |
| 27 | +probabilityset = [0 for i in range (0,len(mycharset))] |
| 28 | + |
| 29 | +for i in range (0,len(countset)): |
| 30 | + probabilityset[i] = countset[i]/totalcount # calculate appearance probability of the charecters |
| 31 | + |
| 32 | +mydict = {} |
| 33 | + |
| 34 | +for i in range (0, len(mycharset)): |
| 35 | + if countset[i] != 0: # precaution to dont create a Huffman code for zero elements |
| 36 | + mydict[str(mycharset[i])] = probabilityset[i] |
| 37 | +mycodebook = huffman.codebook(mydict.items()) |
| 38 | + |
| 39 | + |
| 40 | +for i in range (0,len(mycharset)): |
| 41 | + if countset[i] != 0: # suppress the zero appearance charecters |
| 42 | + print(mycharset[i] , " has " , '{0:04d}'.format(countset[i]) , " times appeared. "+\ |
| 43 | + "Probability = " , '{:.10f}'.format(probabilityset[i]) + " Huffman: " + mycodebook[str(mycharset[i])]) # just a print out operation |
| 44 | + |
| 45 | +onesandzeros = "" |
| 46 | +for i in range (0, len(allofthefile)): |
| 47 | + onesandzeros = onesandzeros + mycodebook[str(allofthefile[i])] |
| 48 | + |
| 49 | +binary_file = open('compressed_foo.bin', 'wb') |
| 50 | + |
| 51 | +i = 0 |
| 52 | +while (i < len(onesandzeros)): |
| 53 | + b = BitArray(bin=onesandzeros[i:i+8]) # divide array with 8 many bits and make them into a byte |
| 54 | + b.tofile(binary_file) |
| 55 | + i = i+8 |
| 56 | + |
| 57 | +binary_file.close() |
| 58 | + |
| 59 | +binary_file = open('compressed_foo.bin', "rb") |
| 60 | +allofthebinaryfile = binary_file.read() |
| 61 | +binary_file.close() |
| 62 | + |
| 63 | +newonesandzeros = "" |
| 64 | + |
| 65 | +for i in range (0, len(allofthebinaryfile)): |
| 66 | + newonesandzeros = newonesandzeros + str(bin(allofthebinaryfile[i])[2:].zfill(8)) # tranform bytes into bit array |
| 67 | + |
| 68 | +mynewfile = "" |
| 69 | +i=0 |
| 70 | +while (i < len(newonesandzeros)): |
| 71 | + for j in range (0, len(list(mycodebook.values()))): |
| 72 | + check = list(mycodebook.values())[j] |
| 73 | + if (newonesandzeros[i:i+len(check)] == check): # check the Binary Huffman sequence in the bit array |
| 74 | + mynewfile = mynewfile + list(mycodebook.keys())[j] # if the sequence is found, transform it into the character and add it to the character array |
| 75 | + i = i + len(check) |
| 76 | + break |
| 77 | + |
| 78 | +mynewfile = mynewfile[:-1] |
| 79 | + |
| 80 | +newfile = open("foonew.txt","w") |
| 81 | +newfile.write(mynewfile) |
| 82 | +newfile.close() |
| 83 | + |
0 commit comments