Skip to content

Commit

Permalink
feat(search): add support for search in different languages, fixes Go…
Browse files Browse the repository at this point in the history
…ogleChrome#4644, fixes GoogleChrome#4643 (GoogleChrome#4922)

* feat(search): add support for search in different languages, fixes GoogleChrome#4644

* Update src/site/_collections/algolia.js

Co-authored-by: Rob Dodson <[email protected]>

* Update src/site/_collections/algolia.js

Co-authored-by: Rob Dodson <[email protected]>

* Update src/site/_filters/urls.js

Co-authored-by: Rob Dodson <[email protected]>

* refactor: make changes

Co-authored-by: Rob Dodson <[email protected]>
  • Loading branch information
MichaelSolati and robdodson authored Mar 22, 2021
1 parent d1cbdaa commit 7f37382
Show file tree
Hide file tree
Showing 21 changed files with 619 additions and 324 deletions.
15 changes: 9 additions & 6 deletions .eleventy.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,17 @@ const Tooltip = require(`./${componentsDir}/Tooltip`);
const {Video} = require(`./${componentsDir}/Video`);
const YouTube = require(`./${componentsDir}/YouTube`);

const collectionsDir = 'src/site/_collections';
const authors = require(`./${collectionsDir}/authors`);
const blogPostsDescending = require(`./${collectionsDir}/blog-posts-descending`);
const newsletters = require(`./${collectionsDir}/newsletters`);
// Collections
const algolia = require('./src/site/_collections/algolia');
const authors = require(`./src/site/_collections/authors`);
const blogPostsDescending = require(`./src/site/_collections/blog-posts-descending`);
const newsletters = require(`./src/site/_collections/newsletters`);
const {
postsWithLighthouse,
} = require(`./${collectionsDir}/posts-with-lighthouse`);
const tags = require(`./${collectionsDir}/tags`);
} = require(`./src/site/_collections/posts-with-lighthouse`);
const tags = require(`./src/site/_collections/tags`);

// Filters
const filtersDir = 'src/site/_filters';
const consoleDump = require(`./${filtersDir}/console-dump`);
const {i18n} = require(`./${filtersDir}/i18n`);
Expand Down Expand Up @@ -168,6 +170,7 @@ module.exports = function (config) {
// ----------------------------------------------------------------------------
// COLLECTIONS
// ----------------------------------------------------------------------------
config.addCollection('algolia', algolia);
config.addCollection('authors', authors);
config.addCollection('blogPosts', blogPostsDescending);
config.addCollection('newsletters', newsletters);
Expand Down
97 changes: 67 additions & 30 deletions algolia.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,57 +13,94 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

require('dotenv').config();

const algoliasearch = require('algoliasearch');
const fs = require('fs');
const log = require('fancy-log');
const {sizeof} = require('sizeof');

const raw = fs.readFileSync('dist/algolia.json', 'utf-8');
const indexed = JSON.parse(raw);
const maxChunkSizeInBytes = 10000000; // 10,000,000

// Revision will look like "YYYYMMDDHHMM".
const revision = new Date().toISOString().substring(0, 16).replace(/\D/g, '');
const primaryIndexName = 'webdev';
const deployIndexName = `webdev_deploy_${revision}`;
/**
* Chunks array of AlgoliaCollectionItem into array of array of AlgoliaCollectionItem smaller than 10 MB.
*
* @param {AlgoliaCollectionItem[]} arr
* @return {AlgoliaCollectionItem[][]}
*/
const chunkAlgolia = (arr) => {
const chunked = [];
let tempSizeInBytes = 0;
let temp = [];
for (const current of arr) {
const currentSizeInBytes = sizeof(current);
if (tempSizeInBytes + currentSizeInBytes < maxChunkSizeInBytes) {
temp.push(current);
tempSizeInBytes += currentSizeInBytes;
} else {
chunked.push(temp);
temp = [current];
tempSizeInBytes = currentSizeInBytes;
}
}
chunked.push(temp);
return chunked;
};

async function index() {
if (!process.env.ALGOLIA_APP || !process.env.ALGOLIA_KEY) {
const indexedOn = new Date();

if (!process.env.ALGOLIA_APP_ID || !process.env.ALGOLIA_API_KEY) {
console.warn('Missing Algolia environment variables, skipping indexing.');
return;
}

const raw = fs.readFileSync('dist/algolia.json', 'utf-8');
/** @type {AlgoliaCollection} */
const algoliaData = JSON.parse(raw);

// Set date of when object is being added to algolia
algoliaData.map((e) => {
e.indexedOn = indexedOn.getTime();
return e;
});

const chunkedAlgoliaData = chunkAlgolia(algoliaData);
const postsCount = algoliaData.length;

// @ts-ignore
const client = algoliasearch(
process.env.ALGOLIA_APP,
process.env.ALGOLIA_KEY,
process.env.ALGOLIA_APP_ID,
process.env.ALGOLIA_API_KEY,
);
const index = client.initIndex('prod_web_dev');

const primaryIndex = client.initIndex(primaryIndexName); // nb. not actually used, just forces init
const deployIndex = client.initIndex(deployIndexName);

log(
`Indexing ${indexed.length} articles with temporary index ${deployIndex.indexName}...`,
console.log(
`Indexing ${postsCount} articles amongst ${chunkedAlgoliaData.length} chunk(s).`,
);

// TODO(samthor): This is from https://www.algolia.com/doc/api-reference/api-methods/replace-all-objects/#examples,
// are there more things that should be copied?
const scope = ['settings', 'synonyms', 'rules'];
await client.copyIndex(primaryIndex.indexName, deployIndex.indexName, {
scope,
// When indexing data we mark these two fields as fields that can be filtered by.
await index.setSettings({
attributesForFaceting: ['locales', 'tags'],
});

// TODO(samthor): Batch uploads so that we don't send more than 10mb.
// As of September 2019, the JSON itself is only ~70k. \shrug/
await deployIndex.saveObjects(indexed);
log(`Indexed, replacing existing index ${primaryIndex.indexName}`);
// Update algolia index with new data
for (let i = 0; i < chunkedAlgoliaData.length; i++) {
await index.saveObjects(chunkedAlgoliaData[i], {
autoGenerateObjectIDIfNotExist: true,
});
}

console.log('Updated algolia data.');

console.log('Deleting old data no longer in algolia.json.');
await index.deleteBy({
filters: `indexedOn < ${indexedOn.getTime()}`,
});
console.log('Deleted old data.');

// Move our temporary deploy index on-top of the primary index, atomically.
await client.moveIndex(deployIndex.indexName, primaryIndex.indexName);
log('Done!');
console.log('Done!');
}

index().catch((err) => {
log.error(err);
console.error(err);
throw err;
});
1 change: 1 addition & 0 deletions gulpfile.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const compressImagesTransform = (pngQuality, jpegQuality) => {
return through2.obj();
}
return imagemin([
// @ts-ignore
pngquant({quality: [pngQuality, pngQuality]}),
mozjpeg({quality: jpegQuality * 100}),
]);
Expand Down
Loading

0 comments on commit 7f37382

Please sign in to comment.