Skip to content

Commit

Permalink
Merge pull request #3 from xdmorgan/feature/create-list-of-assets
Browse files Browse the repository at this point in the history
Create list of assets from folder of post data
  • Loading branch information
xdmorgan authored Mar 10, 2019
2 parents ba9bab6 + 2cafda4 commit 9e99aab
Show file tree
Hide file tree
Showing 8 changed files with 202 additions and 109 deletions.
23 changes: 18 additions & 5 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,18 @@ const Listr = require("listr");

const testConfig = require("./setup/test-config");
const cleanDist = require("./setup/clean-dist");
const downloadUsers = require("./wordpress/download-users");
const downloadPosts = require("./wordpress/download-posts");
const transformPosts = require("./wordpress/transform-posts");
const downloadUsers = require("./wordpress/user-download");
const downloadPosts = require("./wordpress/post-download");
const transformPosts = require("./wordpress/post-transform");
const createAssetList = require("./wordpress/create-asset-list");

const tasks = new Listr([
{
title: "Test Environment Config",
title: "Test environment config",
task: () => testConfig()
},
{
title: "Clean Destination Folder",
title: "Clean destination folder",
task: () => cleanDist()
},
// {
Expand Down Expand Up @@ -43,7 +44,19 @@ const tasks = new Listr([
}
]);
}
},
{
title: "Create list of assets",
task: () => {
return new Listr([
{
title: "Request featured image data and condense post assets",
task: () => createAssetList()
}
]);
}
}
// list of links (for redirects)
]);

tasks.run().catch(err => console.error(err));
9 changes: 2 additions & 7 deletions src/setup/clean-dist.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
const fs = require("fs-extra");
const path = require("path");
const { BUILD_DIR } = require("../util");

const OUT_DIR = "dist";

const clean = async () => {
const out = path.join(process.cwd(), OUT_DIR);
return fs.emptyDir(out);
};
const clean = async () => fs.emptyDir(BUILD_DIR);

module.exports = clean;
29 changes: 29 additions & 0 deletions src/util.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
const path = require("path");
const glob = require("glob");

// when task is ran as singular node process and not as Listr task
const MOCK_OBSERVER = { next: console.log, complete: console.success };

// dirs references in various places
const BUILD_DIR = path.join(process.cwd(), "dist");
const POST_DIR_ORIGINALS = path.join(BUILD_DIR, "posts-original-by-page");
const POST_DIR_TRANSFORMED = path.join(BUILD_DIR, "posts-transformed");
const USER_DIR_ORIGINALS = path.join(BUILD_DIR, "users-original");
const ASSET_DIR_LIST = path.join(BUILD_DIR, "list-of-assets");

// Awaitable globz
const findByGlob = (pattern = "", opts = {}) =>
new Promise((resolve, reject) => {
glob(pattern, opts, (err, files) => (err ? reject(err) : resolve(files)));
});

// exportz
module.exports = {
MOCK_OBSERVER,
BUILD_DIR,
POST_DIR_ORIGINALS,
POST_DIR_TRANSFORMED,
USER_DIR_ORIGINALS,
ASSET_DIR_LIST,
findByGlob
};
61 changes: 61 additions & 0 deletions src/wordpress/create-asset-list.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
const fetch = require("node-fetch");
const fs = require("fs-extra");
const path = require("path");
const { Observable } = require("rxjs");
const {
ASSET_DIR_LIST,
POST_DIR_TRANSFORMED,
MOCK_OBSERVER,
findByGlob
} = require("../util");

const urlById = (url, id) => `${url}/media/${id}`;

const listOfImagesByPost = async (post, url) => {
const images = [];
if (post.featured_media) {
const postId = post.id;
const mediaNumber = post.featured_media;
const response = await fetch(urlById(url, mediaNumber));
const { status } = response;
// Save data and move on to the next page
if (status === 200) {
const json = await response.json();
images.push({
mediaNumber,
link: json.guid.rendered,
title: json.title.rendered || "",
description: json.alt_text || "",
postId
});
}
}
return images.concat(post.bodyImages ? post.bodyImages : []);
};

const assets = async (url, observer = MOCK_OBSERVER) => {
await fs.ensureDir(ASSET_DIR_LIST);
const files = await findByGlob("*.json", { cwd: POST_DIR_TRANSFORMED });
observer.next(`Processing ${files.length} posts`);
const queue = [...files];
let list = [];
while (queue.length) {
const file = queue.shift();
const post = await fs.readJson(path.join(POST_DIR_TRANSFORMED, file));
const images = await listOfImagesByPost(post, url);
list = list.concat(images);
observer.next(
`Processed ${list.length} images. (${files.length - queue.length} / ${
files.length
} posts)`
);
}

await fs.writeJson(path.join(ASSET_DIR_LIST, "assets.json"), list, {
spaces: 2
});
observer.complete();
};

module.exports = () =>
new Observable(observer => assets(process.env.API_URL, observer));
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,21 @@ const fetch = require("node-fetch");
const fs = require("fs-extra");
const path = require("path");
const { Observable } = require("rxjs");
const { POST_DIR_ORIGINALS, MOCK_OBSERVER } = require("../util");

const MOCK_OBSERVER = { next: console.log, complete: console.success };
const DEST_DIR = path.join(process.cwd(), "dist", "posts-raw-by-page");
const urlByPage = (url, page) => `${url}/posts?page=${page}`;
const urlForPage = (url, page) => `${url}/posts?page=${page}`;

const posts = async (url, observer = MOCK_OBSERVER) => {
await fs.ensureDir(DEST_DIR);
await fs.ensureDir(POST_DIR_ORIGINALS);

const postsByPage = async (page = 1) => {
observer.next(`Getting posts by page (${page})`);
const response = await fetch(urlByPage(url, page));
const response = await fetch(urlForPage(url, page));
const { status } = response;
// Save data and move on to the next page
if (status === 200) {
const json = await response.json();
const dest = path.join(DEST_DIR, `posts-${page}.json`);
const dest = path.join(POST_DIR_ORIGINALS, `posts-${page}.json`);
await fs.writeJson(dest, json);
return postsByPage(page + 1);
}
Expand Down
83 changes: 83 additions & 0 deletions src/wordpress/post-transform.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
const fs = require("fs-extra");
const path = require("path");
const { Observable } = require("rxjs");
const {
POST_DIR_ORIGINALS,
POST_DIR_TRANSFORMED,
MOCK_OBSERVER,
findByGlob
} = require("../util");

const extractBodyImages = post => {
const regex = /<img.*?src="(.*?)"[\s\S]*?alt="(.*?)"/g;
post.bodyImages = [];
while ((foundImage = regex.exec(post.body))) {
const alt = foundImage[2] ? foundImage[2].replace(/_/g, " ") : "";
post.bodyImages.push({
link: foundImage[1],
description: alt,
title: alt,
postId: post.id
});
}
return post;
};

const transform = post => {
delete post._links;
delete post.guid;
delete post.excerpt;
delete post.author; // TODO: Get authors, pull name, look for match in Contentful — Else fallback.
delete post.comment_status;
delete post.ping_status;
delete post.template;
delete post.format;
delete post.meta;
delete post.status;
delete post.type;
post.publishDate = post.date_gmt + "+00:00";
delete post.date_gmt;
delete post.date;
delete post.modified;
delete post.modified_gmt;
delete post.tags;
delete post.sticky;
post.body = post.content.rendered;
delete post.content;
post.title = post.title.rendered;
post.slug = post.slug;
post.category = post.categories[0];
delete post.categories;
return [post.slug, extractBodyImages(post)];
};

const write = (name, data) =>
fs.writeJson(path.join(POST_DIR_TRANSFORMED, `${name}.json`), data, {
spaces: 2
});

const transformByPage = async (observer = MOCK_OBSERVER) => {
// get paginated raw posts from directory created in previous step
await fs.ensureDir(POST_DIR_TRANSFORMED);
const files = await findByGlob("*.json", { cwd: POST_DIR_ORIGINALS });
observer.next(`Found ${files.length} pages of posts`);
// create a queue to process
const queue = [...files];
let count = 0;
while (queue.length) {
const file = queue.shift();
const page = await fs.readJson(path.join(POST_DIR_ORIGINALS, file));
while (page.length) {
const post = page.shift();
// transform the wordpress post into the expected format
const [name, data] = transform(post);
observer.next(`Processing: ${name}`);
// save processed post by slug for later
await write(name, data);
count += 1;
}
}
observer.complete(`Successfully tranfsormed ${count} posts`);
};

module.exports = () => new Observable(observer => transformByPage(observer));
85 changes: 0 additions & 85 deletions src/wordpress/transform-posts.js

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,17 @@ const fetch = require("node-fetch");
const fs = require("fs-extra");
const path = require("path");
const { Observable } = require("rxjs");

const MOCK_OBSERVER = { next: console.log, complete: console.success };
const DEST_DIR = path.join(process.cwd(), "dist", "users-raw-by-page");
const { USER_DIR_ORIGINALS, MOCK_OBSERVER } = require("../util");

const users = async (url, observer = MOCK_OBSERVER) => {
await fs.ensureDir(DEST_DIR);
await fs.ensureDir(USER_DIR_ORIGINALS);
const response = await fetch(`${url}/users`);
const { status } = response;
// Save data and move on to the next page
if (status === 200) {
const json = await response.json();
const dest = path.join(DEST_DIR, `users.json`);
await fs.writeJson(dest, json);
const dest = path.join(USER_DIR_ORIGINALS, `users.json`);
await fs.writeJson(dest, json, { spaces: 2 });
return observer.complete();
}
throw new Error(response);
Expand Down

0 comments on commit 9e99aab

Please sign in to comment.