forked from edsomjr/TEP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtxt_2md.py
80 lines (73 loc) · 2.89 KB
/
txt_2md.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from pdf2image import convert_from_path
import sys
import os
import re
import itertools
from collections import defaultdict
from os import listdir
from os.path import isfile, join
from math import inf
def main(argv):
if len(argv) >= 6:
num_vis = int(argv[4])
if len(argv) == num_vis*2 + 5:
filename = argv[1]
dir = argv[2]
last_page = int(argv[3])
ini = []
fin = []
it = 5
for i in range(0, num_vis):
ini.append(int(argv[it]))
fin.append(int(argv[it+1]))
it+=2
ini.append(inf)
fin.append(inf)
else:
print ('usage:\n python txt_2md.py <txt> <diretorio do pdf> <pagina final documento> <numero de visualizaçoes> <pagina inicial vis 1> <pagina final vis1> ... <pagina inicial vis N> <pagina final vis N>')
return
else:
print ('usage:\n python txt_2md.py <txt> <diretorio do pdf> <pagina final documento> <numero de visualizaçoes> <pagina inicial vis 1> <pagina final vis1> ... <pagina inicial vis N> <pagina final vis N>')
return
cpp_dir = dir+'cpp/'
images_dir = 'images/'
only_numbers = re.compile(r'^[\s\d]+$', re.U)
not_only_numbers = re.compile(r'[^\W\d_]', re.U)
code_flag = re.compile(r'{{{{{{{{{{{{{{{{{{{{{}}}}}}}}}}}}}}}}}}}}}')
codes = [join(cpp_dir, f) for f in listdir(cpp_dir) if isfile(join(cpp_dir, f))]
it = 0
lines = open(filename).readlines()
flag = False
vis_it = 0
d = defaultdict(lambda: 0)
for i in range(1, len(lines)):
if only_numbers.search(lines[i-1]) and not_only_numbers.search(lines[i]):
current_page = int(lines[i-1].strip(' ').strip('\n'))
if(current_page < ini[vis_it] or current_page > fin[vis_it]):
flag = True
else:
flag = False
if(current_page+1 == fin[vis_it]):
print('## ', lines[i].strip(' ').strip('\n'))
log = '\nPara acessar álbum de imagens [clique aqui]({0}).\n'.format(images_dir+'vis-'+str(vis_it)+'/')
print(log)
vis_it+=1
d[lines[i]] = 1
if(current_page >= last_page):
flag = False
if flag == True and not_only_numbers.search(lines[i]) and not d[lines[i]]:
line = lines[i].strip(' ').strip('\n').replace('•', '*')
if only_numbers.search(lines[i-1]):
line = "## "+line
print(line)
d[lines[i]] = 1
if code_flag.search(lines[i]):
code = open(codes[it], 'r')
print('```C++')
print(code.read())
print('```')
it+=1
code.close()
return
if __name__ == '__main__':
main(sys.argv)