forked from arsenetar/dupeguru
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexport.py
151 lines (132 loc) · 3.48 KB
/
export.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# Created By: Virgil Dupras
# Created On: 2006/09/16
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.gnu.org/licenses/gpl-3.0.html
import os.path as op
from tempfile import mkdtemp
import csv
# Yes, this is a very low-tech solution, but at least it doesn't have all these annoying dependency
# and resource problems.
MAIN_TEMPLATE = """
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<title>dupeGuru Results</title>
<style type="text/css">
BODY
{
background-color:white;
}
BODY,A,P,UL,TABLE,TR,TD
{
font-family:Tahoma,Arial,sans-serif;
font-size:10pt;
color: #4477AA;
}
TABLE
{
background-color: #225588;
margin-left: auto;
margin-right: auto;
width: 90%;
}
TR
{
background-color: white;
}
TH
{
font-weight: bold;
color: black;
background-color: #C8D6E5;
}
TH TD
{
color:black;
}
TD
{
padding-left: 2pt;
}
TD.rightelem
{
text-align:right;
/*padding-left:0pt;*/
padding-right: 2pt;
width: 17%;
}
TD.indented
{
padding-left: 12pt;
}
H1
{
font-family:"Courier New",monospace;
color:#6699CC;
font-size:18pt;
color:#6da500;
border-color: #70A0CF;
border-width: 1pt;
border-style: solid;
margin-top: 16pt;
margin-left: 5%;
margin-right: 5%;
padding-top: 2pt;
padding-bottom:2pt;
text-align: center;
}
</style>
</head>
<body>
<h1>dupeGuru Results</h1>
<table>
<tr>$colheaders</tr>
$rows
</table>
</body>
</html>
"""
COLHEADERS_TEMPLATE = "<th>{name}</th>"
ROW_TEMPLATE = """
<tr>
<td class="{indented}">{filename}</td>{cells}
</tr>
"""
CELL_TEMPLATE = """<td>{value}</td>"""
def export_to_xhtml(colnames, rows):
# a row is a list of values with the first value being a flag indicating if the row should be indented
if rows:
assert len(rows[0]) == len(colnames) + 1 # + 1 is for the "indented" flag
colheaders = "".join(COLHEADERS_TEMPLATE.format(name=name) for name in colnames)
rendered_rows = []
previous_group_id = None
for row in rows:
# [2:] is to remove the indented flag + filename
if row[0] != previous_group_id:
# We've just changed dupe group, which means that this dupe is a ref. We don't indent it.
indented = ""
else:
indented = "indented"
filename = row[1]
cells = "".join(CELL_TEMPLATE.format(value=value) for value in row[2:])
rendered_rows.append(ROW_TEMPLATE.format(indented=indented, filename=filename, cells=cells))
previous_group_id = row[0]
rendered_rows = "".join(rendered_rows)
# The main template can't use format because the css code uses {}
content = MAIN_TEMPLATE.replace("$colheaders", colheaders).replace("$rows", rendered_rows)
folder = mkdtemp()
destpath = op.join(folder, "export.htm")
fp = open(destpath, "wt", encoding="utf-8")
fp.write(content)
fp.close()
return destpath
def export_to_csv(dest, colnames, rows):
writer = csv.writer(open(dest, "wt", encoding="utf-8"))
writer.writerow(["Group ID"] + colnames)
for row in rows:
writer.writerow(row)