From 3b861dc286349155fe4142bce46756bdae0aec2e Mon Sep 17 00:00:00 2001 From: Leon Sorokin Date: Sat, 9 Dec 2023 16:29:29 -0600 Subject: [PATCH] remove fuzziness from number segments in SingleError mode. close #50. --- demos/testdata.json | 9 ++- dist/uFuzzy.cjs.js | 143 ++++++++++++++++++++++------------------ dist/uFuzzy.esm.js | 143 ++++++++++++++++++++++------------------ dist/uFuzzy.iife.js | 143 ++++++++++++++++++++++------------------ dist/uFuzzy.iife.min.js | 2 +- package.json | 2 +- src/uFuzzy.js | 143 ++++++++++++++++++++++------------------ 7 files changed, 322 insertions(+), 263 deletions(-) diff --git a/demos/testdata.json b/demos/testdata.json index a87fe05..6e79722 100644 --- a/demos/testdata.json +++ b/demos/testdata.json @@ -161900,6 +161900,13 @@ "который", "Alle må holde i et tau og være sikret slik at de er trygge. Etter isbreen må de gå i en ganske stor steinrøys og så er de endelig fremme.", "interface-id-face-scan-2-identification-angle-secure-human-id-person-face-security-brackets", - "Sabine State Bank and Trust Company" + "Sabine State Bank and Trust Company", + "abc1234", + "abc2134", + "ab1c234", + "abc 1234", + "abc123acb", + "abc123acb supper", + "1234" ] } \ No newline at end of file diff --git a/dist/uFuzzy.cjs.js b/dist/uFuzzy.cjs.js index a25188a..241a849 100644 --- a/dist/uFuzzy.cjs.js +++ b/dist/uFuzzy.cjs.js @@ -165,27 +165,30 @@ function uFuzzy(opts) { _intraTrn = 0, _intraDel = 0; - let plen = p.length; - - // prevent junk matches by requiring stricter rules for short terms - if (plen <= 4) { - if (plen >= 3) { - // one swap in non-first char when 3-4 chars - _intraTrn = Math.min(intraTrn, 1); - - // or one insertion when 4 chars - if (plen == 4) - _intraIns = Math.min(intraIns, 1); + // only-digits strings should match exactly, else special rules for short strings + if (/[^\d]/.test(p)) { + let plen = p.length; + + // prevent junk matches by requiring stricter rules for short terms + if (plen <= 4) { + if (plen >= 3) { + // one swap in non-first char when 3-4 chars + _intraTrn = Math.min(intraTrn, 1); + + // or one insertion when 4 chars + if (plen == 4) + _intraIns = Math.min(intraIns, 1); + } + // else exact match when 1-2 chars + } + // use supplied opts + else { + _intraSlice = intraSlice; + _intraIns = intraIns, + _intraSub = intraSub, + _intraTrn = intraTrn, + _intraDel = intraDel; } - // else exact match when 1-2 chars - } - // use supplied opts - else { - _intraSlice = intraSlice; - _intraIns = intraIns, - _intraSub = intraSub, - _intraTrn = intraTrn, - _intraDel = intraDel; } return { @@ -223,6 +226,8 @@ function uFuzzy(opts) { return needle.split(interSplit).filter(t => t != '').map(v => v === EXACT_HERE ? exacts[j++] : v); }; + const NUM_OR_ALPHA_RE = /[^\d]+|\d+/g; + const prepQuery = (needle, capt = 0, interOR = false) => { // split on punct, whitespace, num-alpha, and upper-lower boundaries let parts = split(needle); @@ -243,64 +248,72 @@ function uFuzzy(opts) { // allows single mutations within each term if (intraMode == 1) { reTpl = parts.map((p, pi) => { - let { - intraSlice, - intraIns, - intraSub, - intraTrn, - intraDel, - } = intraRules(p); - - if (intraIns + intraSub + intraTrn + intraDel == 0) - return p + contrs[pi]; - if (p[0] === '"') return escapeRegExp(p.slice(1, -1)); - let [lftIdx, rgtIdx] = intraSlice; - let lftChar = p.slice(0, lftIdx); // prefix - let rgtChar = p.slice(rgtIdx); // suffix + let reTpl = ''; - let chars = p.slice(lftIdx, rgtIdx); + // split into numeric and alpha parts, so numbers are only matched as following punct or alpha boundaries, without swaps or insertions + for (let m of p.matchAll(NUM_OR_ALPHA_RE)) { + let p = m[0]; - // neg lookahead to prefer matching 'Test' instead of 'tTest' in ManifestTest or fittest - // but skip when search term contains leading repetition (aardvark, aaa) - if (intraIns == 1 && lftChar.length == 1 && lftChar != chars[0]) - lftChar += '(?!' + lftChar + ')'; + let { + intraSlice, + intraIns, + intraSub, + intraTrn, + intraDel, + } = intraRules(p); - let numChars = chars.length; + if (intraIns + intraSub + intraTrn + intraDel == 0) + reTpl += p + contrs[pi]; + else { + let [lftIdx, rgtIdx] = intraSlice; + let lftChar = p.slice(0, lftIdx); // prefix + let rgtChar = p.slice(rgtIdx); // suffix - let variants = [p]; + let chars = p.slice(lftIdx, rgtIdx); - // variants with single char substitutions - if (intraSub) { - for (let i = 0; i < numChars; i++) - variants.push(lftChar + chars.slice(0, i) + intraChars + chars.slice(i + 1) + rgtChar); - } + // neg lookahead to prefer matching 'Test' instead of 'tTest' in ManifestTest or fittest + // but skip when search term contains leading repetition (aardvark, aaa) + if (intraIns == 1 && lftChar.length == 1 && lftChar != chars[0]) + lftChar += '(?!' + lftChar + ')'; - // variants with single transpositions - if (intraTrn) { - for (let i = 0; i < numChars - 1; i++) { - if (chars[i] != chars[i+1]) - variants.push(lftChar + chars.slice(0, i) + chars[i+1] + chars[i] + chars.slice(i + 2) + rgtChar); - } - } + let numChars = chars.length; - // variants with single char omissions - if (intraDel) { - for (let i = 0; i < numChars; i++) - variants.push(lftChar + chars.slice(0, i + 1) + '?' + chars.slice(i + 1) + rgtChar); - } + let variants = [p]; - // variants with single char insertions - if (intraIns) { - let intraInsTpl = lazyRepeat(intraChars, 1); + // variants with single char substitutions + if (intraSub) { + for (let i = 0; i < numChars; i++) + variants.push(lftChar + chars.slice(0, i) + intraChars + chars.slice(i + 1) + rgtChar); + } - for (let i = 0; i < numChars; i++) - variants.push(lftChar + chars.slice(0, i) + intraInsTpl + chars.slice(i) + rgtChar); - } + // variants with single transpositions + if (intraTrn) { + for (let i = 0; i < numChars - 1; i++) { + if (chars[i] != chars[i+1]) + variants.push(lftChar + chars.slice(0, i) + chars[i+1] + chars[i] + chars.slice(i + 2) + rgtChar); + } + } - let reTpl = '(?:' + variants.join('|') + ')' + contrs[pi]; + // variants with single char omissions + if (intraDel) { + for (let i = 0; i < numChars; i++) + variants.push(lftChar + chars.slice(0, i + 1) + '?' + chars.slice(i + 1) + rgtChar); + } + + // variants with single char insertions + if (intraIns) { + let intraInsTpl = lazyRepeat(intraChars, 1); + + for (let i = 0; i < numChars; i++) + variants.push(lftChar + chars.slice(0, i) + intraInsTpl + chars.slice(i) + rgtChar); + } + + reTpl += '(?:' + variants.join('|') + ')' + contrs[pi]; + } + } // console.log(reTpl); diff --git a/dist/uFuzzy.esm.js b/dist/uFuzzy.esm.js index 806b751..619c7a1 100644 --- a/dist/uFuzzy.esm.js +++ b/dist/uFuzzy.esm.js @@ -163,27 +163,30 @@ function uFuzzy(opts) { _intraTrn = 0, _intraDel = 0; - let plen = p.length; - - // prevent junk matches by requiring stricter rules for short terms - if (plen <= 4) { - if (plen >= 3) { - // one swap in non-first char when 3-4 chars - _intraTrn = Math.min(intraTrn, 1); - - // or one insertion when 4 chars - if (plen == 4) - _intraIns = Math.min(intraIns, 1); + // only-digits strings should match exactly, else special rules for short strings + if (/[^\d]/.test(p)) { + let plen = p.length; + + // prevent junk matches by requiring stricter rules for short terms + if (plen <= 4) { + if (plen >= 3) { + // one swap in non-first char when 3-4 chars + _intraTrn = Math.min(intraTrn, 1); + + // or one insertion when 4 chars + if (plen == 4) + _intraIns = Math.min(intraIns, 1); + } + // else exact match when 1-2 chars + } + // use supplied opts + else { + _intraSlice = intraSlice; + _intraIns = intraIns, + _intraSub = intraSub, + _intraTrn = intraTrn, + _intraDel = intraDel; } - // else exact match when 1-2 chars - } - // use supplied opts - else { - _intraSlice = intraSlice; - _intraIns = intraIns, - _intraSub = intraSub, - _intraTrn = intraTrn, - _intraDel = intraDel; } return { @@ -221,6 +224,8 @@ function uFuzzy(opts) { return needle.split(interSplit).filter(t => t != '').map(v => v === EXACT_HERE ? exacts[j++] : v); }; + const NUM_OR_ALPHA_RE = /[^\d]+|\d+/g; + const prepQuery = (needle, capt = 0, interOR = false) => { // split on punct, whitespace, num-alpha, and upper-lower boundaries let parts = split(needle); @@ -241,64 +246,72 @@ function uFuzzy(opts) { // allows single mutations within each term if (intraMode == 1) { reTpl = parts.map((p, pi) => { - let { - intraSlice, - intraIns, - intraSub, - intraTrn, - intraDel, - } = intraRules(p); - - if (intraIns + intraSub + intraTrn + intraDel == 0) - return p + contrs[pi]; - if (p[0] === '"') return escapeRegExp(p.slice(1, -1)); - let [lftIdx, rgtIdx] = intraSlice; - let lftChar = p.slice(0, lftIdx); // prefix - let rgtChar = p.slice(rgtIdx); // suffix + let reTpl = ''; - let chars = p.slice(lftIdx, rgtIdx); + // split into numeric and alpha parts, so numbers are only matched as following punct or alpha boundaries, without swaps or insertions + for (let m of p.matchAll(NUM_OR_ALPHA_RE)) { + let p = m[0]; - // neg lookahead to prefer matching 'Test' instead of 'tTest' in ManifestTest or fittest - // but skip when search term contains leading repetition (aardvark, aaa) - if (intraIns == 1 && lftChar.length == 1 && lftChar != chars[0]) - lftChar += '(?!' + lftChar + ')'; + let { + intraSlice, + intraIns, + intraSub, + intraTrn, + intraDel, + } = intraRules(p); - let numChars = chars.length; + if (intraIns + intraSub + intraTrn + intraDel == 0) + reTpl += p + contrs[pi]; + else { + let [lftIdx, rgtIdx] = intraSlice; + let lftChar = p.slice(0, lftIdx); // prefix + let rgtChar = p.slice(rgtIdx); // suffix - let variants = [p]; + let chars = p.slice(lftIdx, rgtIdx); - // variants with single char substitutions - if (intraSub) { - for (let i = 0; i < numChars; i++) - variants.push(lftChar + chars.slice(0, i) + intraChars + chars.slice(i + 1) + rgtChar); - } + // neg lookahead to prefer matching 'Test' instead of 'tTest' in ManifestTest or fittest + // but skip when search term contains leading repetition (aardvark, aaa) + if (intraIns == 1 && lftChar.length == 1 && lftChar != chars[0]) + lftChar += '(?!' + lftChar + ')'; - // variants with single transpositions - if (intraTrn) { - for (let i = 0; i < numChars - 1; i++) { - if (chars[i] != chars[i+1]) - variants.push(lftChar + chars.slice(0, i) + chars[i+1] + chars[i] + chars.slice(i + 2) + rgtChar); - } - } + let numChars = chars.length; - // variants with single char omissions - if (intraDel) { - for (let i = 0; i < numChars; i++) - variants.push(lftChar + chars.slice(0, i + 1) + '?' + chars.slice(i + 1) + rgtChar); - } + let variants = [p]; - // variants with single char insertions - if (intraIns) { - let intraInsTpl = lazyRepeat(intraChars, 1); + // variants with single char substitutions + if (intraSub) { + for (let i = 0; i < numChars; i++) + variants.push(lftChar + chars.slice(0, i) + intraChars + chars.slice(i + 1) + rgtChar); + } - for (let i = 0; i < numChars; i++) - variants.push(lftChar + chars.slice(0, i) + intraInsTpl + chars.slice(i) + rgtChar); - } + // variants with single transpositions + if (intraTrn) { + for (let i = 0; i < numChars - 1; i++) { + if (chars[i] != chars[i+1]) + variants.push(lftChar + chars.slice(0, i) + chars[i+1] + chars[i] + chars.slice(i + 2) + rgtChar); + } + } - let reTpl = '(?:' + variants.join('|') + ')' + contrs[pi]; + // variants with single char omissions + if (intraDel) { + for (let i = 0; i < numChars; i++) + variants.push(lftChar + chars.slice(0, i + 1) + '?' + chars.slice(i + 1) + rgtChar); + } + + // variants with single char insertions + if (intraIns) { + let intraInsTpl = lazyRepeat(intraChars, 1); + + for (let i = 0; i < numChars; i++) + variants.push(lftChar + chars.slice(0, i) + intraInsTpl + chars.slice(i) + rgtChar); + } + + reTpl += '(?:' + variants.join('|') + ')' + contrs[pi]; + } + } // console.log(reTpl); diff --git a/dist/uFuzzy.iife.js b/dist/uFuzzy.iife.js index 244d2b8..08d1f4d 100644 --- a/dist/uFuzzy.iife.js +++ b/dist/uFuzzy.iife.js @@ -166,27 +166,30 @@ var uFuzzy = (function () { _intraTrn = 0, _intraDel = 0; - let plen = p.length; - - // prevent junk matches by requiring stricter rules for short terms - if (plen <= 4) { - if (plen >= 3) { - // one swap in non-first char when 3-4 chars - _intraTrn = Math.min(intraTrn, 1); - - // or one insertion when 4 chars - if (plen == 4) - _intraIns = Math.min(intraIns, 1); + // only-digits strings should match exactly, else special rules for short strings + if (/[^\d]/.test(p)) { + let plen = p.length; + + // prevent junk matches by requiring stricter rules for short terms + if (plen <= 4) { + if (plen >= 3) { + // one swap in non-first char when 3-4 chars + _intraTrn = Math.min(intraTrn, 1); + + // or one insertion when 4 chars + if (plen == 4) + _intraIns = Math.min(intraIns, 1); + } + // else exact match when 1-2 chars + } + // use supplied opts + else { + _intraSlice = intraSlice; + _intraIns = intraIns, + _intraSub = intraSub, + _intraTrn = intraTrn, + _intraDel = intraDel; } - // else exact match when 1-2 chars - } - // use supplied opts - else { - _intraSlice = intraSlice; - _intraIns = intraIns, - _intraSub = intraSub, - _intraTrn = intraTrn, - _intraDel = intraDel; } return { @@ -224,6 +227,8 @@ var uFuzzy = (function () { return needle.split(interSplit).filter(t => t != '').map(v => v === EXACT_HERE ? exacts[j++] : v); }; + const NUM_OR_ALPHA_RE = /[^\d]+|\d+/g; + const prepQuery = (needle, capt = 0, interOR = false) => { // split on punct, whitespace, num-alpha, and upper-lower boundaries let parts = split(needle); @@ -244,64 +249,72 @@ var uFuzzy = (function () { // allows single mutations within each term if (intraMode == 1) { reTpl = parts.map((p, pi) => { - let { - intraSlice, - intraIns, - intraSub, - intraTrn, - intraDel, - } = intraRules(p); - - if (intraIns + intraSub + intraTrn + intraDel == 0) - return p + contrs[pi]; - if (p[0] === '"') return escapeRegExp(p.slice(1, -1)); - let [lftIdx, rgtIdx] = intraSlice; - let lftChar = p.slice(0, lftIdx); // prefix - let rgtChar = p.slice(rgtIdx); // suffix + let reTpl = ''; - let chars = p.slice(lftIdx, rgtIdx); + // split into numeric and alpha parts, so numbers are only matched as following punct or alpha boundaries, without swaps or insertions + for (let m of p.matchAll(NUM_OR_ALPHA_RE)) { + let p = m[0]; - // neg lookahead to prefer matching 'Test' instead of 'tTest' in ManifestTest or fittest - // but skip when search term contains leading repetition (aardvark, aaa) - if (intraIns == 1 && lftChar.length == 1 && lftChar != chars[0]) - lftChar += '(?!' + lftChar + ')'; + let { + intraSlice, + intraIns, + intraSub, + intraTrn, + intraDel, + } = intraRules(p); - let numChars = chars.length; + if (intraIns + intraSub + intraTrn + intraDel == 0) + reTpl += p + contrs[pi]; + else { + let [lftIdx, rgtIdx] = intraSlice; + let lftChar = p.slice(0, lftIdx); // prefix + let rgtChar = p.slice(rgtIdx); // suffix - let variants = [p]; + let chars = p.slice(lftIdx, rgtIdx); - // variants with single char substitutions - if (intraSub) { - for (let i = 0; i < numChars; i++) - variants.push(lftChar + chars.slice(0, i) + intraChars + chars.slice(i + 1) + rgtChar); - } + // neg lookahead to prefer matching 'Test' instead of 'tTest' in ManifestTest or fittest + // but skip when search term contains leading repetition (aardvark, aaa) + if (intraIns == 1 && lftChar.length == 1 && lftChar != chars[0]) + lftChar += '(?!' + lftChar + ')'; - // variants with single transpositions - if (intraTrn) { - for (let i = 0; i < numChars - 1; i++) { - if (chars[i] != chars[i+1]) - variants.push(lftChar + chars.slice(0, i) + chars[i+1] + chars[i] + chars.slice(i + 2) + rgtChar); - } - } + let numChars = chars.length; - // variants with single char omissions - if (intraDel) { - for (let i = 0; i < numChars; i++) - variants.push(lftChar + chars.slice(0, i + 1) + '?' + chars.slice(i + 1) + rgtChar); - } + let variants = [p]; - // variants with single char insertions - if (intraIns) { - let intraInsTpl = lazyRepeat(intraChars, 1); + // variants with single char substitutions + if (intraSub) { + for (let i = 0; i < numChars; i++) + variants.push(lftChar + chars.slice(0, i) + intraChars + chars.slice(i + 1) + rgtChar); + } - for (let i = 0; i < numChars; i++) - variants.push(lftChar + chars.slice(0, i) + intraInsTpl + chars.slice(i) + rgtChar); - } + // variants with single transpositions + if (intraTrn) { + for (let i = 0; i < numChars - 1; i++) { + if (chars[i] != chars[i+1]) + variants.push(lftChar + chars.slice(0, i) + chars[i+1] + chars[i] + chars.slice(i + 2) + rgtChar); + } + } - let reTpl = '(?:' + variants.join('|') + ')' + contrs[pi]; + // variants with single char omissions + if (intraDel) { + for (let i = 0; i < numChars; i++) + variants.push(lftChar + chars.slice(0, i + 1) + '?' + chars.slice(i + 1) + rgtChar); + } + + // variants with single char insertions + if (intraIns) { + let intraInsTpl = lazyRepeat(intraChars, 1); + + for (let i = 0; i < numChars; i++) + variants.push(lftChar + chars.slice(0, i) + intraInsTpl + chars.slice(i) + rgtChar); + } + + reTpl += '(?:' + variants.join('|') + ')' + contrs[pi]; + } + } // console.log(reTpl); diff --git a/dist/uFuzzy.iife.min.js b/dist/uFuzzy.iife.min.js index b0cc281..fd90954 100644 --- a/dist/uFuzzy.iife.min.js +++ b/dist/uFuzzy.iife.min.js @@ -1,2 +1,2 @@ /*! https://github.com/leeoniya/uFuzzy (v1.0.11) */ -var uFuzzy=function(){"use strict";const e=new Intl.Collator("en").compare,t=1/0,l=e=>e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&"),n="eexxaacctt",r=(e,t,l)=>e.replace("A-Z",t).replace("a-z",l),i={unicode:!1,alpha:null,interSplit:"[^A-Za-z\\d']+",intraSplit:"[a-z][A-Z]",intraBound:"[A-Za-z]\\d|\\d[A-Za-z]|[a-z][A-Z]",interLft:0,interRgt:0,interChars:".",interIns:t,intraChars:"[a-z\\d']",intraIns:0,intraContr:"'[a-z]{1,2}\\b",intraMode:0,intraSlice:[1,t],intraSub:0,intraTrn:0,intraDel:0,intraFilt:()=>!0,sort:(t,l)=>{let{idx:n,chars:r,terms:i,interLft2:s,interLft1:a,start:g,intraIns:h,interIns:f}=t;return n.map(((e,t)=>t)).sort(((t,u)=>r[u]-r[t]||h[t]-h[u]||i[u]+s[u]+.5*a[u]-(i[t]+s[t]+.5*a[t])||f[t]-f[u]||g[t]-g[u]||e(l[n[t]],l[n[u]])))}},s=(e,l)=>0==l?"":1==l?e+"??":l==t?e+"*?":e+`{0,${l}}?`,a="(?:\\b|_)";function g(e){e=Object.assign({},i,e);let{unicode:t,interLft:g,interRgt:h,intraMode:u,intraSlice:c,intraIns:o,intraSub:p,intraTrn:x,intraDel:m,intraContr:d,intraSplit:R,interSplit:b,intraBound:L,intraChars:S}=e,A=e.letters??e.alpha;if(null!=A){let e=A.toLocaleUpperCase(),t=A.toLocaleLowerCase();b=r(b,e,t),R=r(R,e,t),L=r(L,e,t),S=r(S,e,t),d=r(d,e,t)}let E=t?"u":"";const I='".+?"',z=RegExp(I,"gi"+E),C=RegExp(`(?:\\s+|^)-(?:${S}+|${I})`,"gi"+E);let{intraRules:k}=e;null==k&&(k=e=>{let t=i.intraSlice,l=0,n=0,r=0,s=0,a=e.length;return a>4?(t=c,l=o,n=p,r=x,s=m):3>a||(r=Math.min(x,1),4==a&&(l=Math.min(o,1))),{intraSlice:t,intraIns:l,intraSub:n,intraTrn:r,intraDel:s}});let y=!!R,j=RegExp(R,"g"+E),$=RegExp(b,"g"+E),w=RegExp("^"+b+"|"+b+"$","g"+E),Z=RegExp(d,"gi"+E);const M=e=>{let t=[];e=(e=e.replace(z,(e=>(t.push(e),n)))).replace(w,"").toLocaleLowerCase(),y&&(e=e.replace(j,(e=>e[0]+" "+e[1])));let l=0;return e.split($).filter((e=>""!=e)).map((e=>e===n?t[l++]:e))},D=(t,n=0,r=!1)=>{let i=M(t);if(0==i.length)return[];let f,c=Array(i.length).fill("");if(i=i.map(((e,t)=>e.replace(Z,(e=>(c[t]=e,""))))),1==u)f=i.map(((e,t)=>{let{intraSlice:n,intraIns:r,intraSub:i,intraTrn:a,intraDel:g}=k(e);if(r+i+a+g==0)return e+c[t];if('"'===e[0])return l(e.slice(1,-1));let[h,f]=n,u=e.slice(0,h),o=e.slice(f),p=e.slice(h,f);1==r&&1==u.length&&u!=p[0]&&(u+="(?!"+u+")");let x=p.length,m=[e];if(i)for(let e=0;x>e;e++)m.push(u+p.slice(0,e)+S+p.slice(e+1)+o);if(a)for(let e=0;x-1>e;e++)p[e]!=p[e+1]&&m.push(u+p.slice(0,e)+p[e+1]+p[e]+p.slice(e+2)+o);if(g)for(let e=0;x>e;e++)m.push(u+p.slice(0,e+1)+"?"+p.slice(e+1)+o);if(r){let e=s(S,1);for(let t=0;x>t;t++)m.push(u+p.slice(0,t)+e+p.slice(t)+o)}return"(?:"+m.join("|")+")"+c[t]}));else{let e=s(S,o);2==n&&o>0&&(e=")("+e+")("),f=i.map(((t,n)=>'"'===t[0]?l(t.slice(1,-1)):t.split("").map(((e,t,l)=>(1==o&&0==t&&l.length>1&&e!=l[t+1]&&(e+="(?!"+e+")"),e))).join(e)+c[n]))}let p=2==g?a:"",x=2==h?a:"",m=x+s(e.interChars,e.interIns)+p;return n>0?r?f=p+"("+f.join(")"+x+"|"+p+"(")+")"+x:(f="("+f.join(")("+m+")(")+")",f="(.??"+p+")"+f+"("+x+".*)"):(f=f.join(m),f=p+f+x),[RegExp(f,"i"+E),i,c]},T=(e,t,l)=>{let[n]=D(t);if(null==n)return null;let r=[];if(null!=l)for(let t=0;l.length>t;t++){let i=l[t];n.test(e[i])&&r.push(i)}else for(let t=0;e.length>t;t++)n.test(e[t])&&r.push(t);return r};let F=!!L,O=RegExp(b,E),B=RegExp(L,E);const U=(t,l,n)=>{let[r,i,s]=D(n,1),[a]=D(n,2),f=i.length,u=t.length,c=Array(u).fill(0),o={idx:Array(u),start:c.slice(),chars:c.slice(),terms:c.slice(),interIns:c.slice(),intraIns:c.slice(),interLft2:c.slice(),interRgt2:c.slice(),interLft1:c.slice(),interRgt1:c.slice(),ranges:Array(u)},p=1==g||1==h,x=0;for(let n=0;t.length>n;n++){let u=l[t[n]],c=u.match(r),m=c.index+c[1].length,d=m,R=!1,b=0,L=0,S=0,A=0,I=0,z=0,C=0,k=0,y=[];for(let t=0,l=2;f>t;t++,l+=2){let n=c[l].toLocaleLowerCase(),r=i[t],a='"'==r[0]?r.slice(1,-1):r+s[t],o=a.length,x=n.length,j=n==a;if(!j&&c[l+1].length>=o){let e=c[l+1].toLocaleLowerCase().indexOf(a);e>-1&&(y.push(d,x,e,o),d+=v(c,l,e,o),n=a,x=o,j=!0,0==t&&(m=d))}if(p||j){let e=d-1,r=d+x,i=!1,s=!1;if(-1==e||O.test(u[e]))j&&b++,i=!0;else{if(2==g){R=!0;break}if(F&&B.test(u[e]+u[e+1]))j&&L++,i=!0;else if(1==g){let e=c[l+1],r=d+x;if(e.length>=o){let s,g=0,h=!1,f=RegExp(a,"ig"+E);for(;s=f.exec(e);){g=s.index;let e=r+g,t=e-1;if(-1==t||O.test(u[t])){b++,h=!0;break}if(B.test(u[t]+u[e])){L++,h=!0;break}}h&&(i=!0,y.push(d,x,g,o),d+=v(c,l,g,o),n=a,x=o,j=!0,0==t&&(m=d))}if(!i){R=!0;break}}}if(r==u.length||O.test(u[r]))j&&S++,s=!0;else{if(2==h){R=!0;break}if(F&&B.test(u[r-1]+u[r]))j&&A++,s=!0;else if(1==h){R=!0;break}}j&&(I+=o,i&&s&&z++)}if(x>o&&(k+=x-o),t>0&&(C+=c[l-1].length),!e.intraFilt(a,n,d)){R=!0;break}f-1>t&&(d+=x+c[l+1].length)}if(!R){o.idx[x]=t[n],o.interLft2[x]=b,o.interLft1[x]=L,o.interRgt2[x]=S,o.interRgt1[x]=A,o.chars[x]=I,o.terms[x]=z,o.interIns[x]=C,o.intraIns[x]=k,o.start[x]=m;let e=u.match(a),l=e.index+e[1].length,r=y.length,i=r>0?0:1/0,s=r-4;for(let t=2;e.length>t;)if(i>s||y[i]!=l)l+=e[t].length,t++;else{let n=y[i+1],r=y[i+2],s=y[i+3],a=t,g="";for(let t=0;n>t;a++)g+=e[a],t+=e[a].length;e.splice(t,a-t,g),l+=v(e,t,r,s),i+=4}l=e.index+e[1].length;let g=o.ranges[x]=[],h=l,f=l;for(let t=2;e.length>t;t++){let n=e[t].length;l+=n,t%2==0?f=l:n>0&&(g.push(h,f),h=f=l)}f>h&&g.push(h,f),x++}}if(t.length>x)for(let e in o)o[e]=o[e].slice(0,x);return o},v=(e,t,l,n)=>{let r=e[t]+e[t+1].slice(0,l);return e[t-1]+=r,e[t]=e[t+1].slice(l,l+n),e[t+1]=e[t+1].slice(l+n),r.length};return{search:(...t)=>((t,n,r,i=1e3,s)=>{r=r?!0===r?5:r:0;let a=null,g=null,h=[];n=n.replace(C,(e=>{let t=e.trim().slice(1);return'"'===t[0]&&(t=l(t.slice(1,-1))),h.push(t),""}));let u,c=M(n);if(h.length>0){if(u=RegExp(h.join("|"),"i"+E),0==c.length){let e=[];for(let l=0;t.length>l;l++)u.test(t[l])||e.push(l);return[e,null,null]}}else if(0==c.length)return[null,null,null];if(r>0){let e=M(n);if(e.length>1){let l=e.slice().sort(((e,t)=>t.length-e.length));for(let e=0;l.length>e;e++){if(0==s?.length)return[[],null,null];s=T(t,l[e],s)}if(e.length>r)return[s,null,null];a=f(e).map((e=>e.join(" "))),g=[];let n=new Set;for(let e=0;a.length>e;e++)if(s.length>n.size){let l=s.filter((e=>!n.has(e))),r=T(t,a[e],l);for(let e=0;r.length>e;e++)n.add(r[e]);g.push(r)}else g.push([])}}null==a&&(a=[n],g=[s?.length>0?s:T(t,n)]);let o=null,p=null;if(h.length>0&&(g=g.map((e=>e.filter((e=>!u.test(t[e])))))),i>=g.reduce(((e,t)=>e+t.length),0)){o={},p=[];for(let l=0;g.length>l;l++){let n=g[l];if(null==n||0==n.length)continue;let r=a[l],i=U(n,t,r),s=e.sort(i,t,r);if(l>0)for(let e=0;s.length>e;e++)s[e]+=p.length;for(let e in i)o[e]=(o[e]??[]).concat(i[e]);p=p.concat(s)}}return[[].concat(...g),o,p]})(...t),split:M,filter:T,info:U,sort:e.sort}}const h=(()=>{let e={A:"ÁÀÃÂÄĄ",a:"áàãâäą",E:"ÉÈÊËĖ",e:"éèêëę",I:"ÍÌÎÏĮ",i:"íìîïį",O:"ÓÒÔÕÖ",o:"óòôõö",U:"ÚÙÛÜŪŲ",u:"úùûüūų",C:"ÇČĆ",c:"çčć",L:"Ł",l:"ł",N:"ÑŃ",n:"ñń",S:"ŠŚ",s:"šś",Z:"ŻŹ",z:"żź"},t=new Map,l="";for(let n in e)e[n].split("").forEach((e=>{l+=e,t.set(e,n)}));let n=RegExp(`[${l}]`,"g"),r=e=>t.get(e);return e=>{if("string"==typeof e)return e.replace(n,r);let t=Array(e.length);for(let l=0;e.length>l;l++)t[l]=e[l].replace(n,r);return t}})();function f(e){let t,l,n=(e=e.slice()).length,r=[e.slice()],i=Array(n).fill(0),s=1;for(;n>s;)s>i[s]?(t=s%2&&i[s],l=e[s],e[s]=e[t],e[t]=l,++i[s],s=1,r.push(e.slice())):(i[s]=0,++s);return r}const u=(e,t)=>t?`${e}`:e,c=(e,t)=>e+t;return g.latinize=h,g.permute=e=>f([...Array(e.length).keys()]).sort(((e,t)=>{for(let l=0;e.length>l;l++)if(e[l]!=t[l])return e[l]-t[l];return 0})).map((t=>t.map((t=>e[t])))),g.highlight=function(e,t,l=u,n="",r=c){n=r(n,l(e.substring(0,t[0]),!1))??n;for(let i=0;t.length>i;i+=2)n=r(n,l(e.substring(t[i],t[i+1]),!0))??n,t.length-3>i&&(n=r(n,l(e.substring(t[i+1],t[i+2]),!1))??n);return r(n,l(e.substring(t[t.length-1]),!1))??n},g}(); +var uFuzzy=function(){"use strict";const e=new Intl.Collator("en").compare,t=1/0,l=e=>e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&"),n="eexxaacctt",r=(e,t,l)=>e.replace("A-Z",t).replace("a-z",l),i={unicode:!1,alpha:null,interSplit:"[^A-Za-z\\d']+",intraSplit:"[a-z][A-Z]",intraBound:"[A-Za-z]\\d|\\d[A-Za-z]|[a-z][A-Z]",interLft:0,interRgt:0,interChars:".",interIns:t,intraChars:"[a-z\\d']",intraIns:0,intraContr:"'[a-z]{1,2}\\b",intraMode:0,intraSlice:[1,t],intraSub:0,intraTrn:0,intraDel:0,intraFilt:()=>!0,sort:(t,l)=>{let{idx:n,chars:r,terms:i,interLft2:s,interLft1:a,start:g,intraIns:f,interIns:h}=t;return n.map(((e,t)=>t)).sort(((t,c)=>r[c]-r[t]||f[t]-f[c]||i[c]+s[c]+.5*a[c]-(i[t]+s[t]+.5*a[t])||h[t]-h[c]||g[t]-g[c]||e(l[n[t]],l[n[c]])))}},s=(e,l)=>0==l?"":1==l?e+"??":l==t?e+"*?":e+`{0,${l}}?`,a="(?:\\b|_)";function g(e){e=Object.assign({},i,e);let{unicode:t,interLft:g,interRgt:f,intraMode:c,intraSlice:u,intraIns:o,intraSub:p,intraTrn:d,intraDel:m,intraContr:x,intraSplit:R,interSplit:b,intraBound:L,intraChars:A}=e,S=e.letters??e.alpha;if(null!=S){let e=S.toLocaleUpperCase(),t=S.toLocaleLowerCase();b=r(b,e,t),R=r(R,e,t),L=r(L,e,t),A=r(A,e,t),x=r(x,e,t)}let E=t?"u":"";const I='".+?"',z=RegExp(I,"gi"+E),C=RegExp(`(?:\\s+|^)-(?:${A}+|${I})`,"gi"+E);let{intraRules:k}=e;null==k&&(k=e=>{let t=i.intraSlice,l=0,n=0,r=0,s=0;if(/[^\d]/.test(e)){let i=e.length;i>4?(t=u,l=o,n=p,r=d,s=m):3>i||(r=Math.min(d,1),4==i&&(l=Math.min(o,1)))}return{intraSlice:t,intraIns:l,intraSub:n,intraTrn:r,intraDel:s}});let y=!!R,j=RegExp(R,"g"+E),$=RegExp(b,"g"+E),w=RegExp("^"+b+"|"+b+"$","g"+E),Z=RegExp(x,"gi"+E);const M=e=>{let t=[];e=(e=e.replace(z,(e=>(t.push(e),n)))).replace(w,"").toLocaleLowerCase(),y&&(e=e.replace(j,(e=>e[0]+" "+e[1])));let l=0;return e.split($).filter((e=>""!=e)).map((e=>e===n?t[l++]:e))},D=/[^\d]+|\d+/g,T=(t,n=0,r=!1)=>{let i=M(t);if(0==i.length)return[];let h,u=Array(i.length).fill("");if(i=i.map(((e,t)=>e.replace(Z,(e=>(u[t]=e,""))))),1==c)h=i.map(((e,t)=>{if('"'===e[0])return l(e.slice(1,-1));let n="";for(let l of e.matchAll(D)){let e=l[0],{intraSlice:r,intraIns:i,intraSub:a,intraTrn:g,intraDel:f}=k(e);if(i+a+g+f==0)n+=e+u[t];else{let[l,h]=r,c=e.slice(0,l),o=e.slice(h),p=e.slice(l,h);1==i&&1==c.length&&c!=p[0]&&(c+="(?!"+c+")");let d=p.length,m=[e];if(a)for(let e=0;d>e;e++)m.push(c+p.slice(0,e)+A+p.slice(e+1)+o);if(g)for(let e=0;d-1>e;e++)p[e]!=p[e+1]&&m.push(c+p.slice(0,e)+p[e+1]+p[e]+p.slice(e+2)+o);if(f)for(let e=0;d>e;e++)m.push(c+p.slice(0,e+1)+"?"+p.slice(e+1)+o);if(i){let e=s(A,1);for(let t=0;d>t;t++)m.push(c+p.slice(0,t)+e+p.slice(t)+o)}n+="(?:"+m.join("|")+")"+u[t]}}return n}));else{let e=s(A,o);2==n&&o>0&&(e=")("+e+")("),h=i.map(((t,n)=>'"'===t[0]?l(t.slice(1,-1)):t.split("").map(((e,t,l)=>(1==o&&0==t&&l.length>1&&e!=l[t+1]&&(e+="(?!"+e+")"),e))).join(e)+u[n]))}let p=2==g?a:"",d=2==f?a:"",m=d+s(e.interChars,e.interIns)+p;return n>0?r?h=p+"("+h.join(")"+d+"|"+p+"(")+")"+d:(h="("+h.join(")("+m+")(")+")",h="(.??"+p+")"+h+"("+d+".*)"):(h=h.join(m),h=p+h+d),[RegExp(h,"i"+E),i,u]},F=(e,t,l)=>{let[n]=T(t);if(null==n)return null;let r=[];if(null!=l)for(let t=0;l.length>t;t++){let i=l[t];n.test(e[i])&&r.push(i)}else for(let t=0;e.length>t;t++)n.test(e[t])&&r.push(t);return r};let O=!!L,B=RegExp(b,E),U=RegExp(L,E);const v=(t,l,n)=>{let[r,i,s]=T(n,1),[a]=T(n,2),h=i.length,c=t.length,u=Array(c).fill(0),o={idx:Array(c),start:u.slice(),chars:u.slice(),terms:u.slice(),interIns:u.slice(),intraIns:u.slice(),interLft2:u.slice(),interRgt2:u.slice(),interLft1:u.slice(),interRgt1:u.slice(),ranges:Array(c)},p=1==g||1==f,d=0;for(let n=0;t.length>n;n++){let c=l[t[n]],u=c.match(r),m=u.index+u[1].length,x=m,R=!1,b=0,L=0,A=0,S=0,I=0,z=0,C=0,k=0,y=[];for(let t=0,l=2;h>t;t++,l+=2){let n=u[l].toLocaleLowerCase(),r=i[t],a='"'==r[0]?r.slice(1,-1):r+s[t],o=a.length,d=n.length,j=n==a;if(!j&&u[l+1].length>=o){let e=u[l+1].toLocaleLowerCase().indexOf(a);e>-1&&(y.push(x,d,e,o),x+=N(u,l,e,o),n=a,d=o,j=!0,0==t&&(m=x))}if(p||j){let e=x-1,r=x+d,i=!1,s=!1;if(-1==e||B.test(c[e]))j&&b++,i=!0;else{if(2==g){R=!0;break}if(O&&U.test(c[e]+c[e+1]))j&&L++,i=!0;else if(1==g){let e=u[l+1],r=x+d;if(e.length>=o){let s,g=0,f=!1,h=RegExp(a,"ig"+E);for(;s=h.exec(e);){g=s.index;let e=r+g,t=e-1;if(-1==t||B.test(c[t])){b++,f=!0;break}if(U.test(c[t]+c[e])){L++,f=!0;break}}f&&(i=!0,y.push(x,d,g,o),x+=N(u,l,g,o),n=a,d=o,j=!0,0==t&&(m=x))}if(!i){R=!0;break}}}if(r==c.length||B.test(c[r]))j&&A++,s=!0;else{if(2==f){R=!0;break}if(O&&U.test(c[r-1]+c[r]))j&&S++,s=!0;else if(1==f){R=!0;break}}j&&(I+=o,i&&s&&z++)}if(d>o&&(k+=d-o),t>0&&(C+=u[l-1].length),!e.intraFilt(a,n,x)){R=!0;break}h-1>t&&(x+=d+u[l+1].length)}if(!R){o.idx[d]=t[n],o.interLft2[d]=b,o.interLft1[d]=L,o.interRgt2[d]=A,o.interRgt1[d]=S,o.chars[d]=I,o.terms[d]=z,o.interIns[d]=C,o.intraIns[d]=k,o.start[d]=m;let e=c.match(a),l=e.index+e[1].length,r=y.length,i=r>0?0:1/0,s=r-4;for(let t=2;e.length>t;)if(i>s||y[i]!=l)l+=e[t].length,t++;else{let n=y[i+1],r=y[i+2],s=y[i+3],a=t,g="";for(let t=0;n>t;a++)g+=e[a],t+=e[a].length;e.splice(t,a-t,g),l+=N(e,t,r,s),i+=4}l=e.index+e[1].length;let g=o.ranges[d]=[],f=l,h=l;for(let t=2;e.length>t;t++){let n=e[t].length;l+=n,t%2==0?h=l:n>0&&(g.push(f,h),f=h=l)}h>f&&g.push(f,h),d++}}if(t.length>d)for(let e in o)o[e]=o[e].slice(0,d);return o},N=(e,t,l,n)=>{let r=e[t]+e[t+1].slice(0,l);return e[t-1]+=r,e[t]=e[t+1].slice(l,l+n),e[t+1]=e[t+1].slice(l+n),r.length};return{search:(...t)=>((t,n,r,i=1e3,s)=>{r=r?!0===r?5:r:0;let a=null,g=null,f=[];n=n.replace(C,(e=>{let t=e.trim().slice(1);return'"'===t[0]&&(t=l(t.slice(1,-1))),f.push(t),""}));let c,u=M(n);if(f.length>0){if(c=RegExp(f.join("|"),"i"+E),0==u.length){let e=[];for(let l=0;t.length>l;l++)c.test(t[l])||e.push(l);return[e,null,null]}}else if(0==u.length)return[null,null,null];if(r>0){let e=M(n);if(e.length>1){let l=e.slice().sort(((e,t)=>t.length-e.length));for(let e=0;l.length>e;e++){if(0==s?.length)return[[],null,null];s=F(t,l[e],s)}if(e.length>r)return[s,null,null];a=h(e).map((e=>e.join(" "))),g=[];let n=new Set;for(let e=0;a.length>e;e++)if(s.length>n.size){let l=s.filter((e=>!n.has(e))),r=F(t,a[e],l);for(let e=0;r.length>e;e++)n.add(r[e]);g.push(r)}else g.push([])}}null==a&&(a=[n],g=[s?.length>0?s:F(t,n)]);let o=null,p=null;if(f.length>0&&(g=g.map((e=>e.filter((e=>!c.test(t[e])))))),i>=g.reduce(((e,t)=>e+t.length),0)){o={},p=[];for(let l=0;g.length>l;l++){let n=g[l];if(null==n||0==n.length)continue;let r=a[l],i=v(n,t,r),s=e.sort(i,t,r);if(l>0)for(let e=0;s.length>e;e++)s[e]+=p.length;for(let e in i)o[e]=(o[e]??[]).concat(i[e]);p=p.concat(s)}}return[[].concat(...g),o,p]})(...t),split:M,filter:F,info:v,sort:e.sort}}const f=(()=>{let e={A:"ÁÀÃÂÄĄ",a:"áàãâäą",E:"ÉÈÊËĖ",e:"éèêëę",I:"ÍÌÎÏĮ",i:"íìîïį",O:"ÓÒÔÕÖ",o:"óòôõö",U:"ÚÙÛÜŪŲ",u:"úùûüūų",C:"ÇČĆ",c:"çčć",L:"Ł",l:"ł",N:"ÑŃ",n:"ñń",S:"ŠŚ",s:"šś",Z:"ŻŹ",z:"żź"},t=new Map,l="";for(let n in e)e[n].split("").forEach((e=>{l+=e,t.set(e,n)}));let n=RegExp(`[${l}]`,"g"),r=e=>t.get(e);return e=>{if("string"==typeof e)return e.replace(n,r);let t=Array(e.length);for(let l=0;e.length>l;l++)t[l]=e[l].replace(n,r);return t}})();function h(e){let t,l,n=(e=e.slice()).length,r=[e.slice()],i=Array(n).fill(0),s=1;for(;n>s;)s>i[s]?(t=s%2&&i[s],l=e[s],e[s]=e[t],e[t]=l,++i[s],s=1,r.push(e.slice())):(i[s]=0,++s);return r}const c=(e,t)=>t?`${e}`:e,u=(e,t)=>e+t;return g.latinize=f,g.permute=e=>h([...Array(e.length).keys()]).sort(((e,t)=>{for(let l=0;e.length>l;l++)if(e[l]!=t[l])return e[l]-t[l];return 0})).map((t=>t.map((t=>e[t])))),g.highlight=function(e,t,l=c,n="",r=u){n=r(n,l(e.substring(0,t[0]),!1))??n;for(let i=0;t.length>i;i+=2)n=r(n,l(e.substring(t[i],t[i+1]),!0))??n,t.length-3>i&&(n=r(n,l(e.substring(t[i+1],t[i+2]),!1))??n);return r(n,l(e.substring(t[t.length-1]),!1))??n},g}(); diff --git a/package.json b/package.json index 957fd2e..7c7b104 100644 --- a/package.json +++ b/package.json @@ -39,6 +39,6 @@ "homepage": "https://github.com/leeoniya/uFuzzy#readme", "devDependencies": { "@rollup/plugin-terser": "^0.4.4", - "rollup": "^4.6.1" + "rollup": "^4.7.0" } } diff --git a/src/uFuzzy.js b/src/uFuzzy.js index a887f6f..11993ca 100644 --- a/src/uFuzzy.js +++ b/src/uFuzzy.js @@ -156,27 +156,30 @@ export default function uFuzzy(opts) { _intraTrn = 0, _intraDel = 0; - let plen = p.length; - - // prevent junk matches by requiring stricter rules for short terms - if (plen <= 4) { - if (plen >= 3) { - // one swap in non-first char when 3-4 chars - _intraTrn = Math.min(intraTrn, 1); - - // or one insertion when 4 chars - if (plen == 4) - _intraIns = Math.min(intraIns, 1); + // only-digits strings should match exactly, else special rules for short strings + if (/[^\d]/.test(p)) { + let plen = p.length; + + // prevent junk matches by requiring stricter rules for short terms + if (plen <= 4) { + if (plen >= 3) { + // one swap in non-first char when 3-4 chars + _intraTrn = Math.min(intraTrn, 1); + + // or one insertion when 4 chars + if (plen == 4) + _intraIns = Math.min(intraIns, 1); + } + // else exact match when 1-2 chars + } + // use supplied opts + else { + _intraSlice = intraSlice; + _intraIns = intraIns, + _intraSub = intraSub, + _intraTrn = intraTrn, + _intraDel = intraDel; } - // else exact match when 1-2 chars - } - // use supplied opts - else { - _intraSlice = intraSlice; - _intraIns = intraIns, - _intraSub = intraSub, - _intraTrn = intraTrn, - _intraDel = intraDel; } return { @@ -214,6 +217,8 @@ export default function uFuzzy(opts) { return needle.split(interSplit).filter(t => t != '').map(v => v === EXACT_HERE ? exacts[j++] : v); }; + const NUM_OR_ALPHA_RE = /[^\d]+|\d+/g; + const prepQuery = (needle, capt = 0, interOR = false) => { // split on punct, whitespace, num-alpha, and upper-lower boundaries let parts = split(needle); @@ -234,64 +239,72 @@ export default function uFuzzy(opts) { // allows single mutations within each term if (intraMode == 1) { reTpl = parts.map((p, pi) => { - let { - intraSlice, - intraIns, - intraSub, - intraTrn, - intraDel, - } = intraRules(p); - - if (intraIns + intraSub + intraTrn + intraDel == 0) - return p + contrs[pi]; - if (p[0] === '"') return escapeRegExp(p.slice(1, -1)); - let [lftIdx, rgtIdx] = intraSlice; - let lftChar = p.slice(0, lftIdx); // prefix - let rgtChar = p.slice(rgtIdx); // suffix + let reTpl = ''; - let chars = p.slice(lftIdx, rgtIdx); + // split into numeric and alpha parts, so numbers are only matched as following punct or alpha boundaries, without swaps or insertions + for (let m of p.matchAll(NUM_OR_ALPHA_RE)) { + let p = m[0]; - // neg lookahead to prefer matching 'Test' instead of 'tTest' in ManifestTest or fittest - // but skip when search term contains leading repetition (aardvark, aaa) - if (intraIns == 1 && lftChar.length == 1 && lftChar != chars[0]) - lftChar += '(?!' + lftChar + ')'; + let { + intraSlice, + intraIns, + intraSub, + intraTrn, + intraDel, + } = intraRules(p); - let numChars = chars.length; + if (intraIns + intraSub + intraTrn + intraDel == 0) + reTpl += p + contrs[pi]; + else { + let [lftIdx, rgtIdx] = intraSlice; + let lftChar = p.slice(0, lftIdx); // prefix + let rgtChar = p.slice(rgtIdx); // suffix - let variants = [p]; + let chars = p.slice(lftIdx, rgtIdx); - // variants with single char substitutions - if (intraSub) { - for (let i = 0; i < numChars; i++) - variants.push(lftChar + chars.slice(0, i) + intraChars + chars.slice(i + 1) + rgtChar); - } + // neg lookahead to prefer matching 'Test' instead of 'tTest' in ManifestTest or fittest + // but skip when search term contains leading repetition (aardvark, aaa) + if (intraIns == 1 && lftChar.length == 1 && lftChar != chars[0]) + lftChar += '(?!' + lftChar + ')'; - // variants with single transpositions - if (intraTrn) { - for (let i = 0; i < numChars - 1; i++) { - if (chars[i] != chars[i+1]) - variants.push(lftChar + chars.slice(0, i) + chars[i+1] + chars[i] + chars.slice(i + 2) + rgtChar); - } - } + let numChars = chars.length; - // variants with single char omissions - if (intraDel) { - for (let i = 0; i < numChars; i++) - variants.push(lftChar + chars.slice(0, i + 1) + '?' + chars.slice(i + 1) + rgtChar); - } + let variants = [p]; - // variants with single char insertions - if (intraIns) { - let intraInsTpl = lazyRepeat(intraChars, 1); + // variants with single char substitutions + if (intraSub) { + for (let i = 0; i < numChars; i++) + variants.push(lftChar + chars.slice(0, i) + intraChars + chars.slice(i + 1) + rgtChar); + } - for (let i = 0; i < numChars; i++) - variants.push(lftChar + chars.slice(0, i) + intraInsTpl + chars.slice(i) + rgtChar); - } + // variants with single transpositions + if (intraTrn) { + for (let i = 0; i < numChars - 1; i++) { + if (chars[i] != chars[i+1]) + variants.push(lftChar + chars.slice(0, i) + chars[i+1] + chars[i] + chars.slice(i + 2) + rgtChar); + } + } - let reTpl = '(?:' + variants.join('|') + ')' + contrs[pi]; + // variants with single char omissions + if (intraDel) { + for (let i = 0; i < numChars; i++) + variants.push(lftChar + chars.slice(0, i + 1) + '?' + chars.slice(i + 1) + rgtChar); + } + + // variants with single char insertions + if (intraIns) { + let intraInsTpl = lazyRepeat(intraChars, 1); + + for (let i = 0; i < numChars; i++) + variants.push(lftChar + chars.slice(0, i) + intraInsTpl + chars.slice(i) + rgtChar); + } + + reTpl += '(?:' + variants.join('|') + ')' + contrs[pi]; + } + } // console.log(reTpl);