Skip to content

Commit

Permalink
NOJIRA remove suggest()
Browse files Browse the repository at this point in the history
  • Loading branch information
collectiveaccess committed Dec 21, 2020
1 parent 3fdf55c commit d27a84d
Show file tree
Hide file tree
Showing 6 changed files with 4 additions and 263 deletions.
201 changes: 0 additions & 201 deletions app/lib/Plugins/SearchEngine/SqlSearch.php
Original file line number Diff line number Diff line change
Expand Up @@ -2183,207 +2183,6 @@ public function quickSearch($pn_table_num, $ps_search, $pa_options=null) {
return $va_hits;
}
# --------------------------------------------------
# Spell correction/"Did you mean?"
# --------------------------------------------------
/**
* Return list of suggested searches that will find something, based upon the specified search expression
*
* @param string $ps_text The search expression
* @param array $pa_options Options are:
* returnAsLink = return suggestions as links to full-text searces. [Default is no]
* request = the current request; required if links are to be generated using returnAsLink. [Default is null]
* table = the name or number of the table to restrict searches to. If you pass, for example, "ca_objects" search expressions specifically for object searches will be returned. [Default is null]
* @return array List of suggested searches
*/
public function suggest($ps_text, $pa_options=null) {
$va_tokens = $this->_tokenize($ps_text);

$pm_table = caGetOption('table', $pa_options, null);
$vn_table_num = $pm_table ? Datamodel::getTableNum($pm_table) : null;

$va_word_ids = array();
foreach($va_tokens as $vn_i => $vs_token) {
if(preg_match("![\d]+!", $vs_token)) { continue; } // don't try to match if there are numbers

// set ngram length based upon length of word
// shorter words require shorter ngrams to detect similarity
$vn_token_len = strlen($vs_token);
if ($vn_token_len <= 8) {
$vn_ngram_len = 2;
} elseif($vn_token_len <= 11) {
$vn_ngram_len = 3;
} else {
$vn_ngram_len = 4;
}

$va_ngrams = caNgrams($vs_token, $vn_ngram_len);


$vs_table_sql = $vn_table_num ? 'AND swi.table_num = ?' : '';

if (!is_array($va_ngrams) || !sizeof($va_ngrams)) { continue; }
$vn_num_ngrams = sizeof($va_ngrams);
// Look for items with the most shared ngrams

$va_params = array($va_ngrams);
//if ($vn_table_num) { $va_params[] = $vn_table_num; }
$qr_res = $this->opo_db->query("
SELECT ng.word_id, sw.word, count(*) sc
FROM ca_sql_search_ngrams ng
INNER JOIN ca_sql_search_words AS sw ON sw.word_id = ng.word_id
WHERE
ng.ngram IN (?)
GROUP BY ng.word_id, sw.word
ORDER BY (length(sw.word) - (count(*) * {$vn_ngram_len})), (".($vn_ngram_len * $vn_num_ngrams).") - ((count(*) * {$vn_ngram_len}))
LIMIT 250
", $va_params);
$va_word_ids[$vn_i] = array();
$vn_c = 0;

// Check ngram results using various techniques to find most relevant hits
$vs_token_metaphone = metaphone($vs_token);
while($qr_res->nextRow()) {
$vs_word = $qr_res->get('word');
if(preg_match("![^A-Za-z ]+!", $vs_word)) { continue; } // skip anything that is not entirely letters and space
$vn_word_id = $qr_res->get('word_id');

// Is it an exact match?
if ($vs_word == $vs_token) {
$va_word_ids[$vn_i][$vn_word_id] = -250;
$vn_c++;
continue;
}

// Does it sound like the word we're looking for (in English at least)
if (metaphone($vs_word) == $vs_token_metaphone) {
$va_word_ids[$vn_i][$vn_word_id] = -150;
$vn_c++;
continue;
}

// Is it close to what we're looking for distance-wise?
if (strpos($vs_word, $vs_token) === false) {
if (($vn_score = levenshtein($vs_word, $vs_token)) > 3) { continue; }
} else {
$vn_score -= 150;
}

// does it begin with the same character?
for($i=1; $i <= mb_strlen($vs_word); $i++) {
if (mb_substr($vs_word, 0, $i) === mb_substr($vs_token, 0, $i)) {
$vn_score -= 25;
} else {
break;
}
}
$va_word_ids[$vn_i][$vn_word_id] = $vn_score;
$vn_c++;

//if ($vn_c > 25) { break; } // give up when we're found 500 possible hits
}
}

$va_temp_tables = array();
$vn_w = 0;
if (!is_array($va_word_ids) || !sizeof($va_word_ids)) {
return array();
}

// Look for phrases that use any sequence of matched words in proper order
//
if (sizeof($va_word_ids) > 1) {
foreach($va_word_ids as $vn_i => $va_word_list) {
if (!sizeof($va_word_list)) { continue; }
asort($va_word_list, SORT_NUMERIC);
$va_word_list = array_keys(array_slice($va_word_list, 0, 30, true));
$vn_w++;
$vs_temp_table = 'ca_sql_search_suggest_'.md5("/".$vn_i."/".print_R($va_word_list, true));
$this->_createTempTable($vs_temp_table);

$vs_sql = "
INSERT INTO {$vs_temp_table}
SELECT swi.index_id + 1, 1
FROM ca_sql_search_word_index swi
".(sizeof($va_temp_tables) ? " INNER JOIN ".$va_temp_tables[sizeof($va_temp_tables) - 1]." AS tt ON swi.index_id = tt.row_id" : "")."
WHERE
swi.word_id IN (?) {$vs_table_sql}
".($this->getOption('omitPrivateIndexing') ? " AND swi.access = 0" : '')."
";

$va_params = array($va_word_list);
if ($vn_table_num) { $va_params[] = $vn_table_num; }

$qr_res = $this->opo_db->query($vs_sql, $va_params);


$va_temp_tables[] = $vs_temp_table;
}

if (!sizeof($va_temp_tables)) { return array(); }

// Get most relevant phrases from index
//
$vs_results_table = array_pop($va_temp_tables);
$qr_result = $this->opo_db->query("SELECT * FROM {$vs_results_table} LIMIT 50");

$va_phrases = array();
while($qr_result->nextRow()) {
$va_indices = array();
$vn_index_id = $qr_result->get('row_id') - 1;

for($i=0; $i < sizeof($va_tokens); $i++) {
$va_indices[] = $vn_index_id;
$vn_index_id--;
}

$qr_phrases = $this->opo_db->query("
SELECT sw.word, swi.index_id
FROM ca_sql_search_words sw
INNER JOIN ca_sql_search_word_index AS swi ON sw.word_id = swi.word_id
WHERE
(swi.index_id IN (?))
", array($va_indices));

$va_acc = array();
while($qr_phrases->nextRow()) {
$va_acc[] = $qr_phrases->get('word');
}
$va_phrases[] = join(" ", $va_acc);
}

foreach($va_temp_tables as $vs_temp_table) {
$this->_dropTempTable($vs_temp_table);
}
$this->_dropTempTable($vs_results_table);

$va_phrases = array_unique($va_phrases);
} else {
// handle single word
if (!sizeof($va_word_ids[0])) { return array(); }
asort($va_word_ids[0], SORT_NUMERIC);
$va_word_ids[0] = array_slice($va_word_ids[0], 0, 3, true);
$qr_phrases = $this->opo_db->query("
SELECT sw.word
FROM ca_sql_search_words sw
WHERE
(sw.word_id IN (?))
", array(array_keys($va_word_ids[0])));

$va_phrases = array();
while($qr_phrases->nextRow()) {
$va_phrases[] = $qr_phrases->get('word');
}
}

if (caGetOption('returnAsLink', $pa_options, false) && ($po_request = caGetOption('request', $pa_options, null))) {
foreach($va_phrases as $vn_i => $vs_phrase) {
$va_phrases[$vn_i] = caNavLink($po_request, $vs_phrase, '', '*', '*', 'Index', array('search' => $vs_phrase));
}
}

return $va_phrases;
}
# --------------------------------------------------
/**
*
*/
Expand Down
18 changes: 0 additions & 18 deletions app/lib/Search/SearchEngine.php
Original file line number Diff line number Diff line change
Expand Up @@ -1132,24 +1132,6 @@ static function quickSearch($ps_search, $ps_tablename, $pn_tablenum, $pa_options
return $va_hits;
}
# ------------------------------------------------------------------
/**
* Return list of suggested searches that will find something, based upon the specified search expression
*
* @param string $ps_text The search expression
* @param array $pa_options Options are:
* returnAsLink = return suggestions as links to full-text searces. [Default is no]
* request = the current request; required if links are to be generated using returnAsLink. [Default is null]
* table = the name or number of the table to restrict searches to. If you pass, for example, "ca_objects" search expressions specifically for object searches will be returned. [Default is null]
* @return array List of suggested searches
*/
public function suggest($ps_text, $pa_options=null) {
if ($this->opo_engine && method_exists($this->opo_engine, "suggest")) {
$pa_options['table'] = $this->opn_tablenum;
return $this->opo_engine->suggest($ps_text, $pa_options);
}
return null;
}
# ------------------------------------------------------------------
/**
* Returns search expression as string for display with field qualifiers translated into display labels
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,5 @@
?>
<div id="resultBox">
<div class="subTitle"><?php print $this->getVar('search') ? _t("Your search found no %1", $this->getVar('mode_type_plural')) : _t("Please enter a search"); ?>
<?php
$o_search = caGetSearchInstance($t_subject->tableNum());
if (sizeof($va_suggestions = $o_search->suggest($vs_search, array('returnAsLink' => true, 'request' => $this->request)))) {
if (sizeof($va_suggestions) > 1) {
print "<div class='searchSuggestion'>"._t("Did you mean one of these: %1 ?", join(', ', $va_suggestions))."</div>";
} else {
print "<div class='searchSuggestion'>"._t("Did you mean %1 ?", join(', ', $va_suggestions))."</div>";
}
}
?>
</div>
</div><!-- end resultbox -->
</div><!-- end resultbox -->
12 changes: 1 addition & 11 deletions themes/default/views/find/Results/no_results_html.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,5 @@
?>
<div id="resultBox">
<div class="subTitle"><?php print $this->getVar('search') ? _t("Your search found no %1", $this->getVar('mode_type_plural')) : _t("Please enter a search"); ?>
<?php
$o_search = caGetSearchInstance($t_subject->tableNum());
if (sizeof($va_suggestions = $o_search->suggest($vs_search, array('returnAsLink' => true, 'request' => $this->request)))) {
if (sizeof($va_suggestions) > 1) {
print "<div class='searchSuggestion'>"._t("Did you mean one of these: %1 ?", join(', ', $va_suggestions))."</div>";
} else {
print "<div class='searchSuggestion'>"._t("Did you mean %1 ?", join(', ', $va_suggestions))."</div>";
}
}
?>
</div>
</div><!-- end resultbox -->
</div><!-- end resultbox -->
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,5 @@
?>
<div id="resultBox">
<div class="subTitle"><?php print $this->getVar('search') ? _t("Your search found no %1", $this->getVar('mode_type_plural')) : _t("Please enter a search"); ?>
<?php
$o_search = caGetSearchInstance($t_subject->tableNum());
if (sizeof($va_suggestions = $o_search->suggest($vs_search, array('returnAsLink' => true, 'request' => $this->request)))) {
if (sizeof($va_suggestions) > 1) {
print "<div class='searchSuggestion'>"._t("Did you mean one of these: %1 ?", join(', ', $va_suggestions))."</div>";
} else {
print "<div class='searchSuggestion'>"._t("Did you mean %1 ?", join(', ', $va_suggestions))."</div>";
}
}
?>
</div>
</div><!-- end resultbox -->
</div><!-- end resultbox -->
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,5 @@
?>
<div id="resultBox">
<div class="subTitle"><?php print $this->getVar('search') ? _t("Your search found no %1", $this->getVar('mode_type_plural')) : _t("Please enter a search"); ?>
<?php
$o_search = caGetSearchInstance($t_subject->tableNum());
if (sizeof($va_suggestions = $o_search->suggest($vs_search, array('returnAsLink' => true, 'request' => $this->request)))) {
if (sizeof($va_suggestions) > 1) {
print "<div class='searchSuggestion'>"._t("Did you mean one of these: %1 ?", join(', ', $va_suggestions))."</div>";
} else {
print "<div class='searchSuggestion'>"._t("Did you mean %1 ?", join(', ', $va_suggestions))."</div>";
}
}
?>
</div>
</div><!-- end resultbox -->
</div><!-- end resultbox -->

0 comments on commit d27a84d

Please sign in to comment.