Skip to content

Commit

Permalink
Bug 1676038 - Change partial tab-to-search matching to avoid autofill…
Browse files Browse the repository at this point in the history
…ing unexpected subdomains. r=adw

The previous approach was autofilling partial subdomains from search engines.
Having a search engine pointing to "developer.mozilla.org, when "moz" was typed
we ended up autofilling "moz[illa.org]" but pointing to "developer.mozilla.org".
Unfortunately that made impossible to actually go to "mozilla.org" by typing it.

The new approach instead allows tab-to-search results to appear even if there's
no autofill, because the provider checks autonomously the autofill threshold.
There's a downside though, with the old approach we could return more partial
matches, and the Muxer would pick one, depending on the autofilled host. For
example we could return "rover.ebay.com" and match it to "eb[ay.it]".
With the new approach we don't have an autofilled host, and fetching it would
be expensive, we can only compare the search string with the engine's host.
For this reason the patch also tries to partially match against the searchForm
host, that often points to a page the user visits more often.

Differential Revision: https://phabricator.services.mozilla.com/D96942
  • Loading branch information
mak77 committed Nov 18, 2020
1 parent 8654f83 commit 9ea4ff7
Show file tree
Hide file tree
Showing 8 changed files with 280 additions and 250 deletions.
64 changes: 38 additions & 26 deletions browser/components/urlbar/UrlbarMuxerUnifiedComplete.jsm
Original file line number Diff line number Diff line change
Expand Up @@ -303,36 +303,48 @@ class MuxerUnifiedComplete extends UrlbarMuxer {
}

if (result.providerName == "TabToSearch") {
// Discard tab-to-search results if we're not autofilling a URL or
// a tab-to-search result was added already.
if (
state.context.heuristicResult.type != UrlbarUtils.RESULT_TYPE.URL ||
!state.context.heuristicResult.autofill ||
!state.canAddTabToSearch
) {
// Discard the result if a tab-to-search result was added already.
if (!state.canAddTabToSearch) {
return false;
}

let autofillHostname = new URL(state.context.heuristicResult.payload.url)
.hostname;
let [autofillDomain] = UrlbarUtils.stripPrefixAndTrim(autofillHostname, {
stripWww: true,
});
// Strip the public suffix because we want to allow matching "domain.it"
// with "domain.com".
autofillDomain = UrlbarUtils.stripPublicSuffixFromHost(autofillDomain);
if (!autofillDomain) {
return false;
}
if (!result.payload.satisfiesAutofillThreshold) {
// Discard the result if the heuristic result is not autofill.
if (
state.context.heuristicResult.type != UrlbarUtils.RESULT_TYPE.URL ||
!state.context.heuristicResult.autofill
) {
return false;
}

// For tab-to-search results, result.payload.url is the engine's domain
// with the public suffix already stripped, for example "www.mozilla.".
let [engineDomain] = UrlbarUtils.stripPrefixAndTrim(result.payload.url, {
stripWww: true,
});
// Discard if the engine domain does not end with the autofilled one.
if (!engineDomain.endsWith(autofillDomain)) {
return false;
let autofillHostname = new URL(
state.context.heuristicResult.payload.url
).hostname;
let [autofillDomain] = UrlbarUtils.stripPrefixAndTrim(
autofillHostname,
{
stripWww: true,
}
);
// Strip the public suffix because we want to allow matching "domain.it"
// with "domain.com".
autofillDomain = UrlbarUtils.stripPublicSuffixFromHost(autofillDomain);
if (!autofillDomain) {
return false;
}

// For tab-to-search results, result.payload.url is the engine's domain
// with the public suffix already stripped, for example "www.mozilla.".
let [engineDomain] = UrlbarUtils.stripPrefixAndTrim(
result.payload.url,
{
stripWww: true,
}
);
// Discard if the engine domain does not end with the autofilled one.
if (!engineDomain.endsWith(autofillDomain)) {
return false;
}
}
}

Expand Down
208 changes: 62 additions & 146 deletions browser/components/urlbar/UrlbarProviderAutofill.jsm
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,68 @@ class ProviderAutofill extends UrlbarProvider {
}
}

/**
* Filters hosts by retaining only the ones over the autofill threshold, then
* sorts them by their frecency, and extracts the one with the highest value.
* @param {UrlbarQueryContext} queryContext The current queryContext.
* @param {Array} hosts Array of host names to examine.
* @returns {Promise} Resolved when the filtering is complete.
* @resolves {string} The top matching host, or null if not found.
*/
async getTopHostOverThreshold(queryContext, hosts) {
let db = await PlacesUtils.promiseLargeCacheDBConnection();
let conditions = [];
// Pay attention to the order of params, since they are not named.
let params = [UrlbarPrefs.get("autoFill.stddevMultiplier"), ...hosts];
let sources = queryContext.sources;
if (
sources.includes(UrlbarUtils.RESULT_SOURCE.HISTORY) &&
sources.includes(UrlbarUtils.RESULT_SOURCE.BOOKMARKS)
) {
conditions.push(`(bookmarked OR ${SQL_AUTOFILL_FRECENCY_THRESHOLD})`);
} else if (sources.includes(UrlbarUtils.RESULT_SOURCE.HISTORY)) {
conditions.push(`visited AND ${SQL_AUTOFILL_FRECENCY_THRESHOLD}`);
} else if (sources.includes(UrlbarUtils.RESULT_SOURCE.BOOKMARKS)) {
conditions.push("bookmarked");
}

let rows = await db.executeCached(
`
${SQL_AUTOFILL_WITH},
origins(id, prefix, host_prefix, host, fixed, host_frecency, frecency, bookmarked, visited) AS (
SELECT
id,
prefix,
first_value(prefix) OVER (
PARTITION BY host ORDER BY frecency DESC, prefix = "https://" DESC, id DESC
),
host,
fixup_url(host),
TOTAL(frecency) OVER (PARTITION BY fixup_url(host)),
frecency,
MAX(EXISTS(
SELECT 1 FROM moz_places WHERE origin_id = o.id AND foreign_count > 0
)) OVER (PARTITION BY fixup_url(host)),
MAX(EXISTS(
SELECT 1 FROM moz_places WHERE origin_id = o.id AND visit_count > 0
)) OVER (PARTITION BY fixup_url(host))
FROM moz_origins o
WHERE o.host IN (${new Array(hosts.length).fill("?").join(",")})
)
SELECT host
FROM origins
${conditions.length ? "WHERE " + conditions.join(" AND ") : ""}
ORDER BY frecency DESC, prefix = "https://" DESC, id DESC
LIMIT 1
`,
params
);
if (!rows.length) {
return null;
}
return rows[0].getResultByName("host");
}

/**
* Obtains the query to search for autofill origin results.
*
Expand Down Expand Up @@ -575,15 +637,6 @@ class ProviderAutofill extends UrlbarProvider {
return result;
}

// This searches whether the search string matches part of a search engine
// domain, in those cases where it wouldn't match the full domain. For
// example "wiki" would not match "en.wikipedia.org", but this allows to
// do it.
result = await this._matchSearchEnginePartialDomain(queryContext);
if (result) {
return result;
}

// Or we may want to fill a search engine domain regardless of the threshold.
result = await this._matchSearchEngineDomain(queryContext);
if (result) {
Expand Down Expand Up @@ -721,143 +774,6 @@ class ProviderAutofill extends UrlbarProvider {
};
return result;
}

async _matchSearchEnginePartialDomain(queryContext) {
// Differently from other autofill cases, here we don't strip the scheme
// or a trailing slash from the search string, because we want this to act
// as a shortcut to search engines. It's unlikely the user would type
// a url like string to search.
// We also don't strip www, since it may look strange that the user types
// "www.wiki" and we suggest "en.wikipedia.org".
// In practice, we only match when typing a partial top-level domain.
if (
!UrlbarTokenizer.looksLikeOrigin(queryContext.searchString, {
ignoreKnownDomains: true,
})
) {
return null;
}

let trimmedString = queryContext.searchString.startsWith("www.")
? queryContext.searchString.substring(4)
: queryContext.searchString;
if (trimmedString.includes(".")) {
trimmedString = UrlbarUtils.stripPublicSuffixFromHost(trimmedString);
}
let engines = await UrlbarSearchUtils.enginesForDomainPrefix(
trimmedString,
{
matchAllDomainLevels: true,
}
);
let autofillByHost = new Map();
for (let engine of engines) {
let host = engine.getResultDomain();
if (host.startsWith(queryContext.searchString)) {
// This should have been handled by origin autofill already.
continue;
}
let indexOfString = host.indexOf(queryContext.searchString);
if (indexOfString == -1) {
// We only want perfect matches here, if the user types www.engine and
// the engine doesn't have www, we can't trust it.
continue;
}
autofillByHost.set(
host,
queryContext.searchString +
host.substring(indexOfString + queryContext.searchString.length) +
"/"
);
}
if (!autofillByHost.size) {
return null;
}

// Now we could have multiple matching engines, we must sort them by the
// frecency of their origin, then extract the one with the highest value
// that satisfies the autofill threshold.
let db = await PlacesUtils.promiseLargeCacheDBConnection();
let hosts = Array.from(autofillByHost.keys());

let conditions = [];
// Pay attention to the order of params, since they are not named.
let params = [UrlbarPrefs.get("autoFill.stddevMultiplier"), ...hosts];
let sources = queryContext.sources;
if (
sources.includes(UrlbarUtils.RESULT_SOURCE.HISTORY) &&
sources.includes(UrlbarUtils.RESULT_SOURCE.BOOKMARKS)
) {
conditions.push(`(bookmarked OR ${SQL_AUTOFILL_FRECENCY_THRESHOLD})`);
} else if (sources.includes(UrlbarUtils.RESULT_SOURCE.HISTORY)) {
conditions.push(`visited AND ${SQL_AUTOFILL_FRECENCY_THRESHOLD}`);
} else if (sources.includes(UrlbarUtils.RESULT_SOURCE.BOOKMARKS)) {
conditions.push("bookmarked");
}

let rows = await db.executeCached(
`
${SQL_AUTOFILL_WITH},
origins(id, prefix, host_prefix, host, fixed, host_frecency, frecency, bookmarked, visited) AS (
SELECT
id,
prefix,
first_value(prefix) OVER (
PARTITION BY host ORDER BY frecency DESC, prefix = "https://" DESC, id DESC
),
host,
fixup_url(host),
TOTAL(frecency) OVER (PARTITION BY fixup_url(host)),
frecency,
MAX(EXISTS(
SELECT 1 FROM moz_places WHERE origin_id = o.id AND foreign_count > 0
)) OVER (PARTITION BY fixup_url(host)),
MAX(EXISTS(
SELECT 1 FROM moz_places WHERE origin_id = o.id AND visit_count > 0
)) OVER (PARTITION BY fixup_url(host))
FROM moz_origins o
WHERE o.host IN (${new Array(hosts.length).fill("?").join(",")})
)
SELECT host_prefix, host
FROM origins
${conditions.length ? "WHERE " + conditions.join(" AND ") : ""}
ORDER BY frecency DESC, prefix = "https://" DESC, id DESC
LIMIT 1
`,
params
);
if (!rows.length) {
return null;
}
let host = rows[0].getResultByName("host");
let host_prefix = rows[0].getResultByName("host_prefix");

// The value autofilled in the input field is the user typed string, plus
// the portion of the found engine domain and a trailing slash.
let autofill = autofillByHost.get(host);
let url = host_prefix + host + "/";
let [title] = UrlbarUtils.stripPrefixAndTrim(url, {
stripHttp: true,
trimEmptyQuery: true,
trimSlash: true,
});
let result = new UrlbarResult(
UrlbarUtils.RESULT_TYPE.URL,
UrlbarUtils.RESULT_SOURCE.HISTORY,
...UrlbarResult.payloadAndSimpleHighlights(queryContext.tokens, {
title: [title, UrlbarUtils.HIGHLIGHT.TYPED],
url: [url, UrlbarUtils.HIGHLIGHT.TYPED],
icon: iconHelper(url),
})
);
result.autofill = {
value: autofill,
selectionStart: queryContext.searchString.length,
selectionEnd: autofill.length,
};

return result;
}
}

var UrlbarProviderAutofill = new ProviderAutofill();
Loading

0 comments on commit 9ea4ff7

Please sign in to comment.