-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathPublicationSearch.js
66 lines (61 loc) · 2.65 KB
/
PublicationSearch.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import _ from "lodash";
import { cachedFetch } from "./Cache.js";
import Publication from "./Publication.js";
export default class PublicationSearch {
constructor(query) {
this.query = query;
}
async execute() {
let dois = [];
let results = [];
// removing whitespace (e.g., through line breaks) in DOIs
this.query = this.query.replace(/(10\.\d+\/)\s?(\S{0,12})\s([^[])/g, "$1$2$3");
// splitting query by characters that must (or partly: should) be encoded differently in DOIs or by typical prefixes
// see: https://www.doi.org/doi_handbook/2_Numbering.html
// "\{|\}" necessary to read DOIs from BibTeX
this.query.split(/ |"|%|#|\?|\{|\}|doi:|doi.org\//).forEach((doi) => {
// cutting characters that might be included in DOI, but very unlikely at the end
doi = _.trim(doi, ".,;").replace("\\_", "_");
if (doi.indexOf("10.") === 0 && !dois.includes(doi)) {
dois.push(doi);
const publication = new Publication(doi);
publication.fetchData();
results.push(publication);
}
});
if (dois.length) {
console.log(`Identified ${results.length} DOI(s) in input; do not perform search.`)
return {results: results, type: "doi"};
}
const simplifiedQuery = this.query.replace(/\W+/g, "+").toLowerCase();
console.log(`Searching for publications matching '${this.query}'.`)
await cachedFetch(
`https://api.crossref.org/works?query=${simplifiedQuery}&[email protected]&filter=has-references:true`,
(data) => {
data.message.items.filter(item => item.title).forEach((item) => {
const publication = new Publication(item.DOI);
publication.fetchData();
results.push(publication);
});
}
);
return {results: results, type: "search"};
}
computeTitleSimilarity(query, title) {
const stopwords = ["the", "for", "with", "and"]; // for words with length > 2
let equivalentWordCounter = 0;
let wordCounter = 0;
let normalizedTitle = " " + title.replace(/\W+/g, " ").toLowerCase() + " "
query.split("+").forEach((word) => {
if (word.length > 2 && stopwords.indexOf(word) === -1) {
wordCounter++;
if (normalizedTitle.indexOf(" " + word + " ") >= 0) {
equivalentWordCounter++;
}
}
});
return (
equivalentWordCounter / wordCounter
);
}
}