From 410f56a0a6087f0a596f6f0aff9ca6fd5a69c9e8 Mon Sep 17 00:00:00 2001
From: algo7 <11154774+algo7@users.noreply.github.com>
Date: Thu, 3 Mar 2022 22:40:54 +0100
Subject: [PATCH] url scrape logic + write url to file

---
 .gitignore   |  2 ++
 app.js       | 35 +++++++++++++++++++++++------------
 cookies.json |  1 -
 package.json |  2 +-
 4 files changed, 26 insertions(+), 14 deletions(-)
 delete mode 100644 cookies.json

diff --git a/.gitignore b/.gitignore
index 6704566..3e83092 100644
--- a/.gitignore
+++ b/.gitignore
@@ -102,3 +102,5 @@ dist
 
 # TernJS port file
 .tern-port
+reviewUrl.json
+cookies.json
diff --git a/app.js b/app.js
index ccee90c..1b2fda3 100644
--- a/app.js
+++ b/app.js
@@ -218,11 +218,21 @@ const extractAllReviewPageUrls = async () => {
         // Replace the url page count till the last page
         while (counter < totalReviewCount) {
             counter++
-            url = url.replace(/or[0-9]*/g, `or${counter * 5}`)
+            url = url.replace(/-or[0-9]*/g, `-or${counter * 5}`)
             allUrls.push(url)
-
         }
 
+
+        // JSON structure
+        const data = {
+            count: allUrls.length * 5,
+            pageCount: count * 5,
+            urls: allUrls
+        };
+
+        // Write the data to a json file
+        writeFileSync('reviewUrl.json', JSON.stringify(data));
+
         return allUrls
 
     } catch (err) {
@@ -231,15 +241,16 @@ const extractAllReviewPageUrls = async () => {
 }
 
 
-// const start = async () => {
-//     try {
-//         const allReviewsUrl = await extractAllReviewPageUrls();
-//         const results = await scrap(allReviewsUrl);
+const start = async () => {
+    try {
+        const allReviewsUrl = await extractAllReviewPageUrls();
+        console.log(allReviewsUrl)
+        const results = await scrap(allReviewsUrl);
 
-//         console.log(results);
-//     } catch (err) {
-//         throw err;
-//     }
-// }
-// start().catch(err => console.log(err));
+        console.log(results);
+    } catch (err) {
+        throw err;
+    }
+}
+start().catch(err => console.log(err));
 
diff --git a/cookies.json b/cookies.json
deleted file mode 100644
index 8b6f63f..0000000
--- a/cookies.json
+++ /dev/null
@@ -1 +0,0 @@
-[{"name":"bm_sv","value":"2F3C65D03AD7A1131B551A170C289FE7~kK56fv7C7z2IrVNwdTgmTEq68x+LZx/wJ+7JFkuj1GDMsWGLc410Ca/zaGmIQbXUArFGsEaDEETllrcWStC3Q862fmy9OM3NS1wiblgORPkNQee1VtYmOzDDYf6cJc4M2OnCO3P6ok7IhP1qX1Z1V/Ucz2BRaA85WbAE74YQjbQ=","domain":".tripadvisor.com","path":"/","expires":1646342482.810172,"size":210,"httpOnly":true,"secure":false,"session":false,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"ak_bmsc","value":"C12FD9EAC7AE40A6453D730D9A9B7B24~000000000000000000000000000000~YAAQXxYQAlj1Fkl/AQAA3c06UQ4aub9a1lbyaBkxCXELhL5irMyZnxCGFpsQsE4bFc5knE+E0yH0PrWpcoUotbng6tTLZvZMUYz6C9jToD6eikNxGxaG+wUHhy2amOMPv0PwlkhjKJ2BsN9fGG9/vH1T3YWKwPwrxqeC/rDAHGurdFHAk3Y9XMiR+zns8e+hSHNn2l3sYAX2AVYB4ePe19pOFVJAyWpjuYrxpMBp++K2aUnlGWCVkRCu3o+UYBUIjSGHDWarF/Rp/OzMvkt/oFKNTXsKok6Wjlfw83PLkblx3dntOBVTn0e+sIInhDurHpkuUZzwV67h40CwNdQA4D8+4XSk/0zJdrk82oC9cQzuTwL4SttTofj4UEQ95fnOqomlnbjgF+qJzuHErXyMMQ==","domain":".tripadvisor.com","path":"/","expires":1646342482.670134,"size":479,"httpOnly":true,"secure":false,"session":false,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"TASID","value":"C6C8A2BC2E5C4298B5D258DB6D1F8E61","domain":".www.tripadvisor.com","path":"/","expires":1646337084.810127,"size":37,"httpOnly":false,"secure":true,"session":false,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"TAUD","value":"LG-1646335282081-2.1.F.*RDD-2-2022_03_04*HDD-3-2022_03_13.2022_03_14.1","domain":".tripadvisor.com","path":"/","expires":1647544882.670108,"size":74,"httpOnly":false,"secure":false,"session":false,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"PMC","value":"V2*MS.39*MD.20220303*LD.20220303","domain":".www.tripadvisor.com","path":"/","expires":1709407282.670065,"size":35,"httpOnly":true,"secure":true,"session":false,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"TART","value":"%1%enc%3As98S0idXkVBUG0y7HHiez%2FgZUhRG9yHrKZfihF1llaixqXfFoW1KazR%2FCtnG6cls7D689Ml6Ofc%3D","domain":".www.tripadvisor.com","path":"/","expires":1646767282.670076,"size":95,"httpOnly":true,"secure":false,"session":false,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"PAC","value":"AMrkROy6uPqcgAj06p7Ls3Y9SQ_vFUoIuxX9FdOcuT6Nk_tZO3ZBSDw-_XFFyHXWSeFEy7DRF5SEMAfLSB_A22LqGjp0dZaO1uJFgS94foTqXfKWWJM-AyBXqrwrqEDuUSE5KLDeen8AitOVb2051tQ9zWIlnMaTLbzwfNPty0OTdc2eVBbXQeVjMEcpnqLm_DmbfTyprN-FfIngRTzDzV4%3D","domain":".www.tripadvisor.com","path":"/","expires":1709407282.670042,"size":221,"httpOnly":true,"secure":true,"session":false,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"TATravelInfo","value":"V2*AY.2022*AM.3*AD.13*DY.2022*DM.3*DD.14*A.2*MG.-1*HP.2*FL.3*DSM.1646335282174*RS.1","domain":".tripadvisor.com","path":"/","expires":1647544882.670086,"size":95,"httpOnly":false,"secure":false,"session":false,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"TASession","value":"V2ID.C6C8A2BC2E5C4298B5D258DB6D1F8E61*SQ.1*GR.95*TCPAR.87*TBR.60*EXEX.62*ABTR.31*PHTB.27*FS.36*CPU.14*HS.recommended*ES.popularity*DS.5*SAS.popularity*FPS.oldFirst*LF.en*FA.1*DF.0*TRA.false*LD.231860*EAU._","domain":".tripadvisor.com","path":"/","expires":-1,"size":214,"httpOnly":false,"secure":false,"session":true,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"CM","value":"%1%PremiumMobSess%2C%2C-1%7Ct4b-pc%2C%2C-1%7CRestAds%2FRPers%2C%2C-1%7CRCPers%2C%2C-1%7CWShadeSeen%2C%2C-1%7CTheForkMCCPers%2C%2C-1%7CHomeASess%2C%2C-1%7CPremiumMCSess%2C%2C-1%7CCrisisSess%2C%2C-1%7CUVOwnersSess%2C%2C-1%7CRestPremRSess%2C%2C-1%7CRepTarMCSess%2C%2C-1%7CCCSess%2C%2C-1%7CCYLSess%2C%2C-1%7CPremRetPers%2C%2C-1%7CViatorMCPers%2C%2C-1%7Csesssticker%2C%2C-1%7C%24%2C%2C-1%7CPremiumORSess%2C%2C-1%7Ct4b-sc%2C%2C-1%7CRestAdsPers%2C%2C-1%7CMC_IB_UPSELL_IB_LOGOS2%2C%2C-1%7CTSMCPers%2C%2C-1%7Cb2bmcpers%2C%2C-1%7CPremMCBtmSess%2C%2C-1%7CMC_IB_UPSELL_IB_LOGOS%2C%2C-1%7CLaFourchette+Banners%2C%2C-1%7Csess_rev%2C%2C-1%7Csessamex%2C%2C-1%7CPremiumRRSess%2C%2C-1%7CTADORSess%2C%2C-1%7CAdsRetPers%2C%2C-1%7CCOVIDMCSess%2C%2C-1%7CListMCSess%2C%2C-1%7CTARSWBPers%2C%2C-1%7CSPMCSess%2C%2C-1%7CTheForkORSess%2C%2C-1%7CTheForkRRSess%2C%2C-1%7Cpers_rev%2C%2C-1%7CSPACMCSess%2C%2C-1%7CRBAPers%2C%2C-1%7CRestAds%2FRSess%2C%2C-1%7CHomeAPers%2C%2C-1%7CPremiumMobPers%2C%2C-1%7CRCSess%2C%2C-1%7CLaFourchette+MC+Banners%2C%2C-1%7CRestAdsCCSess%2C%2C-1%7CRestPremRPers%2C%2C-1%7CRevHubRMPers%2C%2C-1%7CUVOwnersPers%2C%2C-1%7Cpssamex%2C%2C-1%7CTheForkMCCSess%2C%2C-1%7CCrisisPers%2C%2C-1%7CCYLPers%2C%2C-1%7CCCPers%2C%2C-1%7CRepTarMCPers%2C%2C-1%7Cb2bmcsess%2C%2C-1%7CTSMCSess%2C%2C-1%7CSPMCPers%2C%2C-1%7CRevHubRMSess%2C%2C-1%7CPremRetSess%2C%2C-1%7CViatorMCSess%2C%2C-1%7CPremiumMCPers%2C%2C-1%7CAdsRetSess%2C%2C-1%7CPremiumRRPers%2C%2C-1%7CCOVIDMCPers%2C%2C-1%7CRestAdsCCPers%2C%2C-1%7CTADORPers%2C%2C-1%7CSPACMCPers%2C%2C-1%7CTheForkORPers%2C%2C-1%7CPremMCBtmPers%2C%2C-1%7CTheForkRRPers%2C%2C-1%7CTARSWBSess%2C%2C-1%7CPremiumORPers%2C%2C-1%7CRestAdsSess%2C%2C-1%7CRBASess%2C%2C-1%7CSPORPers%2C%2C-1%7Cperssticker%2C%2C-1%7CListMCPers%2C%2C-1%7C","domain":".tripadvisor.com","path":"/","expires":1961695282.670094,"size":1740,"httpOnly":false,"secure":false,"session":false,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"TASSK","value":"enc%3AAImfHSkps5RbyS2ZYBNWvqQox%2FHvxkF%2FyJuc%2BYzv5%2BWTsDDVdPOHHoPaDXU5ky5bs9pbIKzLZd2UD7ZLTmYXz5QdzmNmK2v7rLCJIVHU5wBU2cq20xs3VEBkHBLl6f4h2g%3D%3D","domain":".www.tripadvisor.com","path":"/","expires":1661887282.670021,"size":155,"httpOnly":true,"secure":false,"session":false,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"TAReturnTo","value":"%1%%2FHotel_Review-g188107-d231860-Reviews-Beau_Rivage_Palace-Lausanne_Canton_of_Vaud.html","domain":".tripadvisor.com","path":"/","expires":-1,"size":100,"httpOnly":false,"secure":false,"session":true,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"__vt","value":"K4J7nFjmbBaN2Pp7ABQCIf6-ytF7QiW7ovfhqc-AvRvGtog-MtL0-iTcRV0mQYmzzhPpWT8OuNcRg40v3p-HU58JEEsK35LHMthnk3mgffVVWZ1IDjvOdUCnHjm32PkfTATZzVg_ONuDwOZ_mD388QIcx90","domain":".www.tripadvisor.com","path":"/","expires":1646338882.670003,"size":159,"httpOnly":true,"secure":true,"session":false,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"ServerPool","value":"A","domain":".tripadvisor.com","path":"/","expires":-1,"size":11,"httpOnly":false,"secure":false,"session":true,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"roybatty","value":"TNI1625!AFjTtGgBqylllZvPA1OFq%2FVLfrS57w8ivpcZElRopF1baXdkrYJAfIDNu3%2Bo85iKrBo7qlhme%2BCo7dFYgWRtgW0wQZs7LMNxTtiXzQFcwD%2BWKTSh9gKcObA39DUpuyXT4cb3fgIr8XLCjiM%2BcO9yId1gbYIFK1T4HSK%2F%2BQryiR0z%2C1","domain":".tripadvisor.com","path":"/","expires":-1,"size":206,"httpOnly":false,"secure":false,"session":true,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"TAUnique","value":"%1%enc%3AeHVMXz6hVrez3xLSJ1eRULI3lsjTlHxVvZnGDcpfC4Usm8AFpoSOyQ%3D%3D","domain":".tripadvisor.com","path":"/","expires":1709407282.669985,"size":77,"httpOnly":true,"secure":false,"session":false,"sameParty":false,"sourceScheme":"Secure","sourcePort":443},{"name":"TADCID","value":"w6u6xJ4QJBfrzQ2eABQCFdpBzzOuRA-9xvCxaMyI12vpFuGFx-nURlm1WF-eeUlTEc0kABt75x_A7d35pR4wOXKar8mQt-7BUrg","domain":".www.tripadvisor.com","path":"/","expires":1961695282.669902,"size":105,"httpOnly":true,"secure":true,"session":false,"sameParty":false,"sourceScheme":"Secure","sourcePort":443}]
\ No newline at end of file
diff --git a/package.json b/package.json
index f5906f3..4547751 100644
--- a/package.json
+++ b/package.json
@@ -4,7 +4,7 @@
   "description": "Scrape TripAdvisor reviews",
   "main": "app.js",
   "scripts": {
-    "start": "nodemon app.js --ignore cookies.json",
+    "start": "nodemon app.js --ignore reviewUrl.json --ignore cookies.json",
     "test": "echo \"Error: no test specified\" && exit 1"
   },
   "repository": {