-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcount.cpp
226 lines (199 loc) · 7.85 KB
/
count.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
#include "myHuff.h"
using namespace std;
struct huffman{
int n; //节点权重
unsigned char data; //节点中存放的数据
struct huffman *p; //指向父节点
struct huffman *l; //指向左节点
struct huffman *r; //指向右节点
struct huffman *next; //后驱指针
};
struct code{
int n;
int len; //编码长度
unsigned int code; //huffman编码本体
unsigned char data; //对应的数据
struct code *next;
}codeHe;
struct huffman *head = NULL; //初始化创建一个huffman树(第一次遍历文件时作为频率表)
struct code *codeHead = &codeHe; //初始化创建一个huffman编码链表
struct code *pcode = codeHead;
int maxLen = 0; //huffman编码最大的值
int code_link_len = 0; //huffman编码链表的长度(个数)
unsigned long int sum_code_len = 0; //正文区的长度(位)
unsigned long int old_file_len = 0; //原始文件的长度(位)
long int head_len = 0; //头部区长度(位)
void sort(struct huffman **h){ //传入head指针的地址
head = *h;
struct huffman *p = (struct huffman *)malloc(sizeof(struct huffman));
p->next = head;
for( ;p->next->next!=NULL;){
if(p->next->n > p->next->next->n) {
struct huffman *tmp1 = p->next;
struct huffman *tmp2 = p->next->next;
if(p->next == head){
head = tmp2;
tmp1->next = tmp2->next;
tmp2->next = tmp1;
p = head;
}
else {
p->next = tmp2;
tmp1->next = tmp2->next;
tmp2->next = tmp1;
p = p->next;
}
}
else{
p = p->next;
}
}
}
void count(unsigned char *buf)
{
if (head == NULL){ //当前队列还为空的情况
head = (struct huffman *)malloc(sizeof(struct huffman));
head->data = *buf;
head->n = 1;
head->next = NULL;
head->p = NULL;
head->l = NULL;
head->r = NULL;
return;
}
struct huffman *p = head;
while (p != NULL){ //遍历队列找到对应节点时
if (p->data == *buf)
{
p->n++;
sort(&head);
break;
return;
}
p = p->next;
}
if (p == NULL){ //经过前一次遍历没有找到对应节点时
p = head;
head = (struct huffman *)malloc(sizeof(struct huffman));
head->data = *buf;
head->n = 1;
head->next = p;
head->p = NULL;
head->l = NULL;
head->r = NULL;
}
}
void traversal(char *file)
{
FILE *fp;
unsigned char *buf = new unsigned char;
if ((fp = fopen(file, "rb")) == NULL)
cout << "Can not open " << file << endl;
fseek(fp, 0, SEEK_END);
long int end = ftell(fp);
fseek(fp, 0, SEEK_SET);
long int start = ftell(fp);
old_file_len = end*8; //上报原始文件长度(位)
while (end != start)
{
fread(buf, 1, 1, fp);
count(buf);
start = ftell(fp);
}
fclose(fp);
}
void tree(){
while( head->next != NULL ){
struct huffman *p = (struct huffman *)malloc(sizeof(struct huffman));
p->l = head;
p->r = head->next;
p->n = head->n + head->next->n;
p->p = NULL;
p->next = head->next->next;
head->p = p;
head->next->p = p;
head = p;
sort(&head);
}
}
void _code(struct huffman * root, int code, int len){
if( root->l == NULL && root->r == NULL ) {
pcode->n = root->n;
pcode->code = code;
pcode->data = root->data;
pcode->len = len;
pcode->next = (struct code *)malloc(sizeof(struct code));
pcode->next->next = NULL; //初始化后一个节点的后驱指针为空
if(pcode->len > maxLen) maxLen = pcode->len; //取得全链表最长的huffman编码长度
pcode = pcode->next; //code链表指针后移一位,准备下次接收
}
else{
_code(root->l, code*2, len+1);
_code(root->r, code*2+1, len+1);
}
}
void code(){
_code( head, 0, 0 );
pcode = codeHead;
while(pcode->next!=NULL){
sum_code_len = sum_code_len + (pcode->n)*(pcode->len); //上报正文区总长度(位)
code_link_len++; //上报编码链表长度(个数)
pcode = pcode->next;
}
head_len = (code_link_len*9 + 12) * 8; //上报压缩文件头部区的长度(位)
cout << "原始文件大小:" << old_file_len << endl;
cout << "压缩正文大小:" << sum_code_len << endl;
cout << "压缩文件大小:" << sum_code_len+head_len << endl;
cout << "理论压缩率:" << ((float)sum_code_len)/old_file_len << endl;
cout << "实际压缩率:" << ((float)(sum_code_len+head_len))/old_file_len << endl;
}
void huffwrite(char *new_file, char *old_file){
pcode = codeHead;
ofstream outfile;
unsigned int tmp = 0; //写入huffmancode的凑整暂存区
int tmp_len = 0; //tmp已填充的长度计数
outfile.open(new_file, ios::out | ios::trunc | ios::binary ); //新建文件或覆盖,可写入模式
outfile.write((char *)&code_link_len, 4); //写入编码链表的长度(个数)
outfile.write((char *)&sum_code_len, 8); //写入正文的长度(位)
int i = 0; //向文件中写入huffman编码表,每个部分占9字节长度。
for(i; i<code_link_len; i++) {
outfile.write((char *)&pcode->code,4); //huffman编码,unsigned int形式存放,长度4字节
outfile.write((char *)&pcode->len, 4); //huffman编码长度,int形式存放,长度4字节
outfile.write((char *)&pcode->data,1); //编码所代表的数据,unsigned形式存放,长度1字节
pcode = pcode->next;
}
FILE *fp; //遍历文件,将每个字节的内容翻译为huffman编码写入新文件
unsigned char *buf = new unsigned char;
if ((fp = fopen(old_file, "rb")) == NULL)
cout << "Can not open " << old_file << endl;
fseek(fp, 0, SEEK_END);
long int end = ftell(fp);
fseek(fp, 0, SEEK_SET);
long int start = ftell(fp);
while (end != start)
{
pcode = codeHead;
fread(buf, 1, 1, fp);
while(pcode->next!=NULL) { //遍历找到此字节对应的huffcode节点
if(pcode->data == *buf) break;
pcode = pcode->next;
}
tmp_len = tmp_len + pcode->len; //tmp的计数器工作一次
if(tmp_len<=32) { //tmp未满的情况
tmp = tmp + ( pcode->code << (32-tmp_len));
}
if(tmp_len > 32) { //tmp满了的情况
tmp = tmp + ( pcode->code >> (tmp_len-32)); //向tmp中填入部分数据将其32位凑齐
outfile.write((char *)&tmp,4); //将已经满了的tmp写入
tmp = 0; //重新初始化tmp
tmp = tmp + ( pcode->code << (32-(tmp_len-32))); //刚刚没装载完的部分
tmp_len = tmp_len - 32; //重新设置tmp的计数器
}
start = ftell(fp);
}
if(tmp_len!=0){ //文件读完了最后检查凑不够整还没有写入新文件的小尾巴
outfile.write((char *)&tmp,4);
}
fclose(fp);
outfile.close();
}