-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwright-bl-for-mpenn.xq
114 lines (101 loc) · 3.94 KB
/
wright-bl-for-mpenn.xq
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
xquery version "3.0";
import module namespace functx="http://www.functx.com";
declare default element namespace "http://www.tei-c.org/ns/1.0";
declare variable $local:in-coll := collection("/home/arren/Documents/GitHub/britishLibrary-data/data/tei/");
(:
takes either an msDesc with no parts or an msPart.
returns a node representing a CSV row with the following fields:
- ms-uri
- part-uri
- shelfmark
- origDate
- genre
- material
- folio extent
- form
- physDesc paragraph
- link to Archive catalogue
- wright volume
- wright pages
:)
declare function local:get-data-from-part-or-desc($part as node(), $msUri as xs:string) as node()
{
let $partUri := $part/msIdentifier/idno[@type="URI"]/text()
let $shelfmark := $part/msIdentifier/altIdentifier/idno[@type="BL-Shelfmark"]/text()
let $origDate := local:get-origDate($part/history/origin/origDate)
let $wrightGenre := $part/head/listRelation[@type="Wright-BL-Taxonomy"]/relation/desc/text()
let $material := $part/physDesc/objectDesc/supportDesc/@material/string()
let $extentFolios := $part/physDesc/objectDesc/supportDesc/extent/measure[@type="composition"][@unit="leaf"]/text()
let $form := $part/physDesc/objectDesc/@form/string()
let $physDesc := $part/physDesc/p//text()
let $physDesc := string-join($physDesc, " ")
let $physDesc := normalize-space($physDesc)
let $archiveLink := $part/additional/listBibl/bibl/ref[@type="internet-archive-pdf"]/@target/string()
let $wrightVolume := $part/additional/listBibl/bibl/citedRange[@unit="pp"]/text()
let $wrightPages := substring-after($wrightVolume, ":")
let $wrightPages := normalize-space($wrightPages)
let $wrightVolume := substring-before($wrightVolume, ":")
return
<row>
<msUri>{$msUri}</msUri>
<partUri>{$partUri}</partUri>
<shelfmark>{$shelfmark}</shelfmark>
{$origDate}
<wrightGenre>{$wrightGenre}</wrightGenre>
<material>{$material}</material>
<extentFolios>{$extentFolios}</extentFolios>
<form>{$form}</form>
<physicalDescription>{$physDesc}</physicalDescription>
<archiveLinke>{$archiveLink}</archiveLinke>
<wrightVolume>{$wrightVolume}</wrightVolume>
<wrightPages>{$wrightPages}</wrightPages>
</row>
};
(: MAY NEED TO CHECK THE greater than one bits:)
(: HANDLE cases where no text node just attributes?:)
declare function local:get-origDate($origDate as node()*) as node()*
{
let $origDate :=
if(count($origDate) = 1) then (: if there's only one origDate, check that it is not a non-Gregorian date (those have a @datingMethod attribute) :)
if($origDate/@datingMethod) then () else $origDate
else if(count($origDate) = 0) then ()
else $origDate[@calendar="Gregorian"]
return
(
<origDateLabel>{normalize-space(string-join($origDate//text(), " "))}</origDateLabel>,
<origDateNotBefore>{$origDate/@notBefore/string()}</origDateNotBefore>,
<origDateNotAfter>{$origDate/@notAfter/string()}</origDateNotAfter>,
<origDateWhen>{$origDate/@when/string()}</origDateWhen>
)
};
let $rows :=
for $doc in $local:in-coll
let $msUri := $doc//msDesc/msIdentifier/idno[@type="URI"]/text()
return
if($doc//msPart) then
for $part in $doc//msPart
return local:get-data-from-part-or-desc($part, $msUri)
else
local:get-data-from-part-or-desc($doc//msDesc, $msUri)
return csv:serialize(<csv>{$rows}</csv>, map {"header": "yes"})
(: return
try {
if($doc//msPart) then
for $part in $doc//msPart
return local:get-data-from-part-or-desc($part, $msUri)
else
local:get-data-from-part-or-desc($doc//msDesc, $msUri) }
catch*
{
let $failure :=
element {"failure"} {
element {"code"} {$err:code},
element {"description"} {$err:description},
element {"value"} {$err:value},
element {"module"} {$err:module},
element {"location"} {$err:line-number||": "||$err:column-number},
element {"additional"} {$err:additional},
element {"msUri"} {$msUri}
}
return $failure
} :)