forked from pbinkley/annotate
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRakefile
237 lines (197 loc) · 8.83 KB
/
Rakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
require 'fileutils'
require 'json'
require 'csv'
require 'slugify'
require 'sanitize'
require 'net/http'
require 'byebug'
require 'jekyll'
require 'yaml'
task :default => [:store_annotations]
task :store_annotations do
manifests = []
# create annotations folder for every manifest in iiif folder
Dir['iiif/*/'].each { | m |
mkdir_p "annotations/#{File.basename(m, ".*")}"
manifests << File.basename(m, ".*")
manifest = File.basename(m, ".*")
if !File.exist?("iiif/" + manifest + "/clean-manifest.json") && File.exist?("iiif/" + manifest + "/manifest.json")
add_new_manifest(manifest)
end
}
manifests.each do | manifest |
puts 'Manifest: ' + manifest
unstored_canvases = Dir["annotations/" + manifest + "/*.json"].sort!
unstored_canvases.each do |canvas|
name = File.basename(canvas, ".*")
dir = "annotations/" + manifest + "/" + name
sum_annotations = JSON.parse(File.read(canvas))
FileUtils::mkdir_p dir # make dir for canvas annotations
make_anno_list(dir,name,manifest) # write canvas annotation list to file
store_anno_array(dir,name,manifest,sum_annotations) # write array of canvas annotations to file
File.delete(canvas) # remove unstored data file
end
# TODO also update if manifest is older than clean-manifest
if !unstored_canvases.empty? || (!File.exist? "iiif/#{manifest}/manifest.json")
puts "Updating #{manifest}"
update_manifest_copy(manifest)
end
end
# build jekyll site to get annotations for clippings
site = Jekyll::Site.new(Jekyll.configuration({
"source" => ".",
"destination" => "_site"})).process
manifests.each do | manifest |
make_clippings(manifest, site)
end
end
task :test do
puts "test"
end
def make_anno_list(dir,name,manifest)
listpath = dir + "/" + "list.json"
if !File.exist?(listpath) # make annotation list if necessary
puts "creating " + listpath + ".\n"
File.open(listpath, 'w') do |f|
# use manifest + '/' + name as canvas label, to ensure uniqueness across all manifests
f.write("---\nlayout: null\ncanvas: '" + manifest + '/' + name + "'\n---\n" + '{% assign anno_name = page.canvas | append: "-resources" %}{% assign annotations = site.pages | where: "label", anno_name | first %}{"@context": "http://iiif.io/api/presentation/2/context.json","@id": "{{ site.url }}{{ site.baseurl }}/annotations/' + manifest + '/' + name + '/list.json","@type": "sc:AnnotationList","resources": {{ annotations.content }} }')
end
end
end
def store_anno_array(dir,name,manifest,sum_annotations)
annopath = dir + "/" + name + ".json"
if !File.exist?(annopath) # if no preexisting annotation file
puts "creating " + annopath + ".\n"
else # if preexisting annotation file
puts "appending new annotations to " + annopath + ".\n"
old_annotations = JSON.parse(File.read(annopath).gsub(/\A---(.|\n)*?---/, ""))
sum_annotations = sum_annotations.concat old_annotations # add annotation JSON to array
end
File.open(annopath, 'w') { |f| f.write("---\nlayout: null\nlabel: " + manifest + '/' + name + "-resources\n---\n" + sum_annotations.to_json) }
end
def update_manifest_copy(manifest)
stored_canvases = []
Dir['annotations/' + manifest + "/*/"].each { | c | stored_canvases << File.basename(c, ".*") }
puts "adding annotation references for canvases " + manifest + '/' + stored_canvases.to_s + " to manifest copy."
manifest_json = JSON.parse(File.read("iiif/" + manifest + "/clean-manifest.json").gsub(/\A---(.|\n)*?---/, "").to_s)
canvases = manifest_json["sequences"][0]["canvases"].select {|c|
stored_canvases.include? c["@id"].gsub(/.+\$([0-9]+)\/canvas.*/, '\1')
}
canvases.each do | canvas |
annotation_hash = Hash.new { |hash, key| hash[key] = {} }
this_id = canvas["@id"].gsub(/.+\$([0-9]+)\/canvas.*/, '\1')
annotation_hash["@id"] = "{{ site.url }}{{ site.baseurl }}/annotations/" + manifest + "/" + this_id + "/list.json"
annotation_hash["@type"] = "sc:AnnotationList"
canvas["otherContent"] = Array.new << annotation_hash
end
# embed jekyll url and baseurl in manifest id, so that it will match the uri passed to Mirador
# this is necessary to make the preserveManifestOrder config option take effect
manifest_json['@id'].gsub!('https://iiif.archivelab.org', '{{ site.url }}{{ site.baseurl }}')
File.open("iiif/" + manifest + "/manifest.json", 'w+') { |f| f.write("---\nlayout: null\n---\n"+JSON.pretty_generate(manifest_json)) }
end
def make_clippings(manifest, site)
manifest_file = File.read("_site/iiif/" + manifest + "/manifest.json")
manifest_json = JSON.parse(manifest_file)
# select canvases with annotations from manifest
canvasesWithAnnos = manifest_json['sequences'][0]['canvases']
.select { |canvas| canvas['otherContent'] }
.select { |canvas| canvas['otherContent'][0]['@type'] == 'sc:AnnotationList' }
clippings = []
canvasesWithAnnos.each do |canvas|
canvasID = canvas['@id']
listpath = canvas['otherContent'][0]['@id'].gsub('https://timothyarthur.github.io/', '_site')
puts listpath
list_file = File.read(listpath).to_s
list_json = JSON.parse(list_file)
list_json['resources'].each do |resource|
canvasOn = resource['on'][0]['full']
next 'WTF canvas ID doesn\'t match' unless canvasID == canvasOn
# get clipping metadata: the tags and text that the user entered, and the selected xywh
tags = resource['resource'].select { |r| r['@type'] == 'oa:Tag' }
texts = resource['resource'].select { |r| r['@type'] == 'dctypes:Text' }
xywh = resource['on'][0]['selector']['default']['value'].gsub('xywh=', '')
# build label and csv from specified data elements
labelElements = []
csvElements = {id: resource['@id'], item: manifest, canvas: canvasID}
canvasNum = canvasID.gsub(/.+\$([0-9]+)\/canvas.*/, '\1')
labelElements << canvasNum
csvElements[:canvasNum] = canvasNum
tagElements = []
tags.each do |tag|
labelElements << tag['chars']
tagElements << tag['chars']
end
csvElements[:tags] = tagElements.join('|')
textElements = []
texts.each do |text|
# strip html markup
longfilename = Sanitize.clean(text['chars']).strip
filename = longfilename.length > 100 ? longfilename[0..99] : longfilename # Edited to shorten further for working in deep paths
labelElements << filename
textElements << Sanitize.clean(text['chars']).strip
end
csvElements[:texts] = textElements.join('|')
labelElements << xywh
csvElements[:xywh] = xywh
# label ends up like 1-photo-woman-with-film-camera-1235-134-1126-637
label = labelElements.join(' ').slugify
imageRoot = canvas['images'][0]['resource']['service']['@id']
clippingURL = imageRoot + '/' + xywh + '/full/0/default.jpg'
csvElements[:clippingURL] = clippingURL
clippingsPath = 'clippings/' + manifest + '/' + canvasNum
clippingImage = clippingsPath + '/' + label + '.jpg'
csvElements[:clippingImage] = clippingImage
FileUtils.mkdir_p clippingsPath
# fetch clipping image, if not already fetched
if File.exist?(clippingImage)
puts "Not fetching #{clippingImage}"
else
File.write(clippingImage, Net::HTTP.get(URI.parse(clippingURL)))
puts "Fetched #{clippingImage} from #{clippingURL}"
end
clippings << csvElements
end
end
# output clippings csv file
if clippings.count > 0
column_names = clippings.first.keys
s=CSV.generate do |csv|
csv << column_names
clippings.each do |x|
csv << x.values
end
end
File.write('clippings/' + manifest + '/clippings.csv', s)
end
end
def add_new_manifest(manifest)
manifest = File.basename(manifest)
puts "adding new manifest " + manifest
# create a copy of the new manifest called 'clean-manifest.json'
source_file = File.expand_path("iiif/" + manifest + "/manifest.json")
destination_file = File.expand_path("iiif/" + manifest + "/clean-manifest.json")
FileUtils.cp(source_file, destination_file)
# parse index.md yaml front matter
index_file = File.expand_path("index.md")
begin
file_contents = File.read(index_file)
rescue StandardError => e
puts "Error reading file: #{e.message}"
return
end
front_matter_regex = /^---\s*\n(.*?\n?)^---\s*$\n?/m
front_matter_match = front_matter_regex.match(file_contents)
if front_matter_match
front_matter = YAML.safe_load(front_matter_match[1])
# add new manifest to list
front_matter["manifests"] ||= []
front_matter["manifests"] << manifest
# replace index.md yaml front matter with new list
new_front_matter = YAML.dump(front_matter)
file_contents = file_contents.sub(front_matter_regex, new_front_matter + "---\n")
# write index.md
File.open(index_file, 'w+') { |f| f.write(file_contents) }
else
puts "Error parsing front matter"
end
end