Skip to content

Commit

Permalink
url scrape logic + write url to file
Browse files Browse the repository at this point in the history
  • Loading branch information
algo7 committed Mar 3, 2022
1 parent 4a8af71 commit 410f56a
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 14 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,5 @@ dist

# TernJS port file
.tern-port
reviewUrl.json
cookies.json
35 changes: 23 additions & 12 deletions app.js
Original file line number Diff line number Diff line change
Expand Up @@ -218,11 +218,21 @@ const extractAllReviewPageUrls = async () => {
// Replace the url page count till the last page
while (counter < totalReviewCount) {
counter++
url = url.replace(/or[0-9]*/g, `or${counter * 5}`)
url = url.replace(/-or[0-9]*/g, `-or${counter * 5}`)
allUrls.push(url)

}


// JSON structure
const data = {
count: allUrls.length * 5,
pageCount: count * 5,
urls: allUrls
};

// Write the data to a json file
writeFileSync('reviewUrl.json', JSON.stringify(data));

return allUrls

} catch (err) {
Expand All @@ -231,15 +241,16 @@ const extractAllReviewPageUrls = async () => {
}


// const start = async () => {
// try {
// const allReviewsUrl = await extractAllReviewPageUrls();
// const results = await scrap(allReviewsUrl);
const start = async () => {
try {
const allReviewsUrl = await extractAllReviewPageUrls();
console.log(allReviewsUrl)
const results = await scrap(allReviewsUrl);

// console.log(results);
// } catch (err) {
// throw err;
// }
// }
// start().catch(err => console.log(err));
console.log(results);
} catch (err) {
throw err;
}
}
start().catch(err => console.log(err));

1 change: 0 additions & 1 deletion cookies.json

This file was deleted.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"description": "Scrape TripAdvisor reviews",
"main": "app.js",
"scripts": {
"start": "nodemon app.js --ignore cookies.json",
"start": "nodemon app.js --ignore reviewUrl.json --ignore cookies.json",
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
Expand Down

0 comments on commit 410f56a

Please sign in to comment.