Skip to content

Commit

Permalink
Merge sift4 script into search.js script
Browse files Browse the repository at this point in the history
To save us from making an extra HTTP request
  • Loading branch information
zoffixznet committed Dec 23, 2016
1 parent 06930ae commit 79f541f
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 105 deletions.
1 change: 0 additions & 1 deletion html/js/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -130,5 +130,4 @@ function setup_debug_mode(){
}

}

}
103 changes: 0 additions & 103 deletions html/js/sift4-common.js

This file was deleted.

1 change: 0 additions & 1 deletion template/footer.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,5 @@

<script type="text/javascript" src="/js/jquery-3.1.1.min.js?v=1"></script>
<script type="text/javascript" src="/js/jquery-ui.min.js?v=1"></script>
<script type="text/javascript" src="/js/sift4-common.js?v=1"></script>
<script type="text/javascript" src="/js/search.js?v=2"></script>
<script type="text/javascript" src="/js/main.js?v=2"></script>
104 changes: 104 additions & 0 deletions template/search_template.js
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,107 @@ $.extend( $.ui.autocomplete, {
} );
}
} );

/*
* Courtesy https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
*/

// Sift4 - common version
// online algorithm to compute the distance between two strings in O(n)
// maxOffset is the number of characters to search for matching letters
// maxDistance is the distance at which the algorithm should stop computing the value and just exit (the strings are too different anyway)
function sift4(s1, s2, maxOffset, maxDistance) {
if (!s1||!s1.length) {
if (!s2) {
return 0;
}
return s2.length;
}

if (!s2||!s2.length) {
return s1.length;
}

var l1=s1.length;
var l2=s2.length;

var c1 = 0; //cursor for string 1
var c2 = 0; //cursor for string 2
var lcss = 0; //largest common subsequence
var local_cs = 0; //local common substring
var trans = 0; //number of transpositions ('ab' vs 'ba')
var offset_arr=[]; //offset pair array, for computing the transpositions

while ((c1 < l1) && (c2 < l2)) {
if (s1.charAt(c1) == s2.charAt(c2)) {
local_cs++;
var isTrans=false;
//see if current match is a transposition
var i=0;
while (i<offset_arr.length) {
var ofs=offset_arr[i];
if (c1<=ofs.c1 || c2 <= ofs.c2) {
// when two matches cross, the one considered a transposition is the one with the largest difference in offsets
isTrans=Math.abs(c2-c1)>=Math.abs(ofs.c2-ofs.c1);
if (isTrans)
{
trans++;
} else
{
if (!ofs.trans) {
ofs.trans=true;
trans++;
}
}
break;
} else {
if (c1>ofs.c2 && c2>ofs.c1) {
offset_arr.splice(i,1);
} else {
i++;
}
}
}
offset_arr.push({
c1:c1,
c2:c2,
trans:isTrans
});
} else {
lcss+=local_cs;
local_cs=0;
if (c1!=c2) {
c1=c2=Math.min(c1,c2); //using min allows the computation of transpositions
}
//if matching characters are found, remove 1 from both cursors (they get incremented at the end of the loop)
//so that we can have only one code block handling matches
for (var i = 0; i < maxOffset && (c1+i<l1 || c2+i<l2); i++) {
if ((c1 + i < l1) && (s1.charAt(c1 + i) == s2.charAt(c2))) {
c1+= i-1;
c2--;
break;
}
if ((c2 + i < l2) && (s1.charAt(c1) == s2.charAt(c2 + i))) {
c1--;
c2+= i-1;
break;
}
}
}
c1++;
c2++;
if (maxDistance)
{
var temporaryDistance=Math.max(c1,c2)-lcss+trans;
if (temporaryDistance>=maxDistance) return Math.round(temporaryDistance);
}
// this covers the case where the last match is on the last token in list, so that it can compute transpositions correctly
if ((c1 >= l1) || (c2 >= l2)) {
lcss+=local_cs;
local_cs=0;
c1=c2=Math.min(c1,c2);
}
}
lcss+=local_cs;
return Math.round(Math.max(l1,l2)- lcss +trans); //add the cost of transpositions to the final result
}

0 comments on commit 79f541f

Please sign in to comment.