Skip to content

Commit

Permalink
Category and profit mapping errors are sourced to separate files, fix…
Browse files Browse the repository at this point in the history
…ed cases where upcs node is not present in macys response, added a setup script before starting crawler to take care of things
  • Loading branch information
Viveckh committed Apr 21, 2019
1 parent d9d0fc0 commit e9d11ec
Show file tree
Hide file tree
Showing 10 changed files with 2,757 additions and 331 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Development rates are on a hourly basis or project basis depending on your needs
* Make sure you have Python, Scrapy and Pandas installed in your system
* Add the urls of products you want to crawl by going to `/product_scraper/product_scraper/input/urls.txt`. Check the supported e-commerce stores above.
* Navigate back to the base of the scraper where the scrapy.cfg file is. `cd /product_scraper`
* `python setup.py`
* `scrapy crawl products-scraper`
* `python csv-cleaner.py`
* The output file to upload in woocommerce is ready at `/product_scraper/product_scraper/output/Product-formatted-for-wp.csv`
Expand Down
13 changes: 13 additions & 0 deletions product_scraper/csv-cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,17 @@

products_df.to_csv('./product_scraper/output/Product-formatted-for-wp.csv', index=False)

## Reformatting error file
try:
category_mapping_errors_df = pd.read_csv('./product_scraper/output/category_mapping_errors.csv', header=None, index_col=False)
category_mapping_errors_df.drop_duplicates(keep='first', inplace=True)
category_mapping_errors_df.to_csv('./product_scraper/output/category_mapping_errors.csv', header=False, index=False)
except:
print("We got some errors while reformatting category mapping error file")

try:
profit_mapping_errors_df = pd.read_csv('./product_scraper/output/profit_mapping_errors.csv', header=None, index_col=False)
profit_mapping_errors_df.drop_duplicates(keep='first', inplace=True)
profit_mapping_errors_df.to_csv('./product_scraper/output/profit_mapping_errors.csv', header=False, index=False)
except:
print("We got some errors while reformatting profit mapping error file")
125 changes: 120 additions & 5 deletions product_scraper/product_scraper/input/urls.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,120 @@
https://www.macys.com/shop/product/haggar-mens-active-series-herringbone-slim-fit-suit-separate-jacket?ID=8037396&CategoryID=3763&swatchColor=Black#fn=sp%3D1%26spc%3D1045%26ruleId%3D105%7CBOOST%20SAVED%20SET%26searchPass%3DmatchNone%26slotId%3D5
https://www.macys.com/shop/product/clarks-mens-bushacre-2-chukka-boots?ID=5470539&CategoryID=59851&swatchColor=Beeswax#fn=sp%3D1%26spc%3D952%26ruleId%3D105%7CBOOST%20SAVED%20SET%26searchPass%3DmatchNone%26slotId%3D13
https://www.macys.com/shop/product/tommy-hilfiger-mens-ref-low-top-sneakers?ID=6729602&CategoryID=55642&swatchColor=Dark%20Brown#fn=sp%3D1%26spc%3D712%26ruleId%3D105%7CBOOST%20SAVED%20SET%26searchPass%3DmatchNone%26slotId%3D1
https://www.macys.com/shop/product/origins-ginzing-refreshing-scrub-cleanser-5-fl.-oz.?ID=1385165&CategoryID=30582#fn=sp%3D1%26spc%3D426%26ruleId%3D78%7CBOOST%20SAVED%20SET%7CBOOST%20ATTRIBUTE%26searchPass%3DmatchNone%26slotId%3D4
https://www.macys.com/shop/product/michael-michael-kors-lillie-moccasin-flats?ID=6537373&CategoryID=50295&swatchColor=Soft%20Pink#fn=sp%3D1%26spc%3D514%26ruleId%3D105%7CBOOST%20ATTRIBUTE%7CBOOST%20SAVED%20SET%26searchPass%3DmatchNone%26slotId%3D30
https://www.macys.com/shop/product/lauren-ralph-lauren-ruffled-overlay-dress?ID=7526526&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-plaid-ruched-dress-created-for-macys?ID=6711184&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-plaid-cotton-shirt?ID=8667767&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-petite-regal-straight-crop-jeans?ID=8849639&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-floral-print-fit-flare-dress?ID=7456659&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-belted-wrap-coat?ID=6437135&CategoryID=188851
https://www.macys.com/shop/product/tommy-hilfiger-cropped-denim-jacket?ID=8447435&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-faux-fur-trim-hooded-puffer-coat?ID=7883505&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-plus-size-ruffled-high-low-midi-dress?ID=7651646&CategoryID=188851
https://www.macys.com/shop/product/under-armour-coldgear-fleece-lined-balaclava-long-sleeve-top?ID=6759145&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-hooded-quilted-coat?ID=7482580&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-plus-size-bell-sleeve-sheath-dress?ID=5865993&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-belted-a-line-dress?ID=7456578&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-striped-cotton-dress?ID=8954385&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-notch-collar-wrap-coat?ID=7923336&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-premier-straight-ankle-jeans?ID=8631273&CategoryID=188851
https://www.macys.com/shop/product/under-armour-misty-copeland-leggings?ID=5780511&CategoryID=188851
https://www.macys.com/shop/product/tommy-hilfiger-textured-one-button-elbow-patches-jacket?ID=8044269&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-petite-floral-print-dress?ID=8042479&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-wool-cashmere-double-breasted-peacoat-created-for-macys?ID=2789477&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-plus-size-linen-shirt?ID=8569169&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-plus-size-tiered-bell-sleeve-dress?ID=7656483&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-floral-print-sheath-dress?ID=7203856&CategoryID=188851
https://www.macys.com/shop/product/tommy-hilfiger-one-button-blazer?ID=4922559&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-full-zip-mock-neck-jacket?ID=8908421&CategoryID=188851
https://www.macys.com/shop/product/michael-michael-kors-mkgo-logo-tape-hoodie?ID=7902429&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-ultimate-slimming-premier-straight-jeans?ID=8908286&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-petite-gingham-georgette-top?ID=8562431&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-floral-print-sheath-dress?ID=8669383&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-sleeveless-ruffled-wide-leg-jumpsuit?ID=7654766&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-pastel-floral-flutter-sleeve-dress?ID=7952187&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-gingham-print-shift-dress?ID=7654782&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-petite-premier-straight-jeans?ID=8907906&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-floral-printed-eyelet-a-line-dress?ID=7952194&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-tummy-control-ruched-halter-one-piece-swimsuit?ID=757922&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-petite-graphic-shirt?ID=8923207&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-petite-premier-straight-curvy-jeans?ID=8907908&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-faux-fur-trim-hooded-puffer-coat?ID=6367944&CategoryID=188851
https://www.macys.com/shop/product/michael-michael-kors-plus-size-moto-jacket-animal-print-top-miranda-pants?ID=7934649&CategoryID=188851
https://www.macys.com/shop/product/tommy-hilfiger-zip-front-peplum-jacket?ID=6198698&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-plus-size-illusion-trim-fit-flare-dress?ID=4853130&CategoryID=188851
https://www.macys.com/shop/product/nike-plus-size-air-hooded-running-jacket?ID=7166730&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-plus-size-bell-sleeve-sheath-dress?ID=7651656&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-plus-size-plaid-cotton-shirt?ID=8569202&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-faux-fur-lined-puffer-coat?ID=6367943&CategoryID=188851
https://www.macys.com/shop/product/michael-michael-kors-plus-side-zip-trench-coat?ID=5359613&CategoryID=188851
https://www.macys.com/shop/product/nike-sportswear-windrunner-hooded-jacket?ID=4900051&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-puff-sleeve-denim-shirt?ID=8631166&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-puff-sleeve-shirt?ID=8667780&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-plus-size-bell-sleeve-sheath-dress?ID=7656696&CategoryID=188851
https://www.macys.com/shop/product/tommy-hilfiger-cotton-patchwork-print-belted-shirtdress-created-for-macys?ID=8448053&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-plus-size-linen-shirt?ID=8569170&CategoryID=188851
https://www.macys.com/shop/product/michael-michael-kors-lace-up-ribbed-knit-sweater-dress?ID=8151853&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-striped-button-sleeve-sweater?ID=8631286&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-regal-straight-crop-jeans?ID=8689743&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-ruffle-trim-sheath-dress?ID=7952177&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-georgette-top?ID=8631278&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-roll-tab-striped-cardigan?ID=8631152&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-plus-size-midi-fit-flare-dress?ID=7417958&CategoryID=188851
https://www.macys.com/shop/product/tommy-hilfiger-sleeveless-cotton-button-front-shirtdress?ID=8447433&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-petite-denim-shirt?ID=8562412&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-lace-dress?ID=7058382&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-metallic-ponte-knit-shift-dress?ID=7018496&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-striped-dolman-sleeve-sweater?ID=8954458&CategoryID=188851
https://www.macys.com/shop/product/tommy-hilfiger-printed-drawstring-romper-created-for-macys?ID=8447467&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-plus-size-ruffled-sleeve-sheath-dress?ID=7204641&CategoryID=188851
https://www.macys.com/shop/product/guess-adjustable-waist-hooded-anorak?ID=5224945&CategoryID=188851
https://www.macys.com/shop/product/tommy-hilfiger-patchwork-print-skirt?ID=8447462&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-french-terry-sweatpants?ID=8954426&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-faux-wrap-midi-dress?ID=7342816&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-petite-floral-print-crepe-a-line-dress?ID=7877838&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-short-sleeve-sweater?ID=8631054&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-plus-size-pointelle-striped-open-front-cardigan?ID=8774295&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-petite-linen-pants?ID=8562395&CategoryID=188851
https://www.macys.com/shop/product/tommy-hilfiger-plaid-handkerchief-hem-shirtdress-created-for-macys?ID=8447434&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-ultimate-slimming-premier-curvy-straight-jeans?ID=8520778&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-slim-fit-v-neck-dress?ID=6077633&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-plus-size-striped-shirt-tattersall-skinny-pants?ID=9019290&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-plus-size-linen-shirt?ID=8569183&CategoryID=188851
https://www.macys.com/shop/product/adidas-warp-knit-ankle-leggings?ID=6757227&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-hooded-puffer-coat?ID=7883502&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-fit-flare-striped-skirt?ID=8954462&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-petite-stretch-striped-skinny-pants?ID=8562438&CategoryID=188851
https://www.macys.com/shop/product/michael-michael-kors-faux-fur-trim-puffer-vest?ID=6417850&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-ruffle-trim-jumpsuit?ID=8330369&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-plus-size-floral-print-cotton-shirt?ID=8569185&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-petite-textured-sweater?ID=8562406&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-pinstripe-fit-flare-dress?ID=7018616&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-two-tone-jersey-dress?ID=7877681&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-lace-dress?ID=7018495&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-wool-cashmere-single-breasted-peacoat-created-for-macys?ID=2789498&CategoryID=188851
https://www.macys.com/shop/product/tommy-hilfiger-hooded-zip-raincoat?ID=7475879&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-eyelet-a-line-dress?ID=7952192&CategoryID=188851
https://www.macys.com/shop/product/nike-61-shine-stripe-cross-back-one-piece-swimsuit?ID=6885349&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-floral-print-faux-wrap-dress?ID=8669384&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-colorblocked-full-zip-jacket?ID=8908108&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-plus-size-ruffled-faux-wrap-skirt?ID=8808721&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-plus-size-illusion-inset-fit-flare-dress?ID=6282213&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-sweater-blazer?ID=8908247&CategoryID=188851
https://www.macys.com/shop/product/tommy-hilfiger-patchwork-print-ruffled-dress?ID=8447452&CategoryID=188851
https://www.macys.com/shop/product/tommy-hilfiger-chiffon-v-neck-jersey-jumpsuit-with-flutter-sleeves?ID=8062165&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-plus-size-plaid-cotton-shirt?ID=8569203&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-sequin-panel-jersey-dress?ID=7342828&CategoryID=188851
https://www.macys.com/shop/product/michael-michael-kors-plus-size-logo-tape-top-leggings?ID=7936030&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-floral-print-georgette-top?ID=8631435&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-striped-cotton-shirt?ID=8631085&CategoryID=188851
https://www.macys.com/shop/product/tommy-hilfiger-printed-pleated-cotton-skirt-created-for-macys?ID=8447448&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-shadow-stripe-maxi-dress?ID=7654791&CategoryID=188851
https://www.macys.com/shop/product/nike-plus-size-sportswear-windrunner-jacket?ID=4920961&CategoryID=188851
https://www.macys.com/shop/product/nine-west-ruffled-bell-sleeve-jacket?ID=8038479&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-plus-size-capelet-sheath-dress?ID=7651650&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-logo-crochet-sheath-dress?ID=7952183&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-walker-coat?ID=6437136&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-petite-shirt-skinny-pants?ID=9019340&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-petite-premier-straight-ankle-jeans?ID=8562420&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-pleated-jersey-dress?ID=7018593&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-petite-striped-cotton-sweater?ID=8923178&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-petite-striped-cardigan?ID=8562411&CategoryID=188851
https://www.macys.com/shop/product/lauren-ralph-lauren-floral-print-bell-sleeve-dress?ID=7877654&CategoryID=188851
https://www.macys.com/shop/product/calvin-klein-hooded-belted-coat?ID=6437137&CategoryID=188851
19 changes: 15 additions & 4 deletions product_scraper/product_scraper/itemloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ class ProductItemLoader(ItemLoader):
category_mapping_df.fillna(value='', inplace=True)
profit_margin_mapping_df.fillna(value='', inplace=True)

f_category_mapping_errors = open(os.path.dirname(__file__) + "/output/category_mapping_errors.csv", "a")
f_profit_mapping_errors = open(os.path.dirname(__file__) + "/output/profit_mapping_errors.csv", "a")

def parse_kylie_cosmetics(self, product_url, html_dump):
loader = ProductItemLoader(item=Product(), selector=html_dump)

Expand Down Expand Up @@ -78,7 +81,10 @@ def parse_macys(self, product_url, html_dump):
shared_product_details['wp_featured'] = 0
shared_product_details['wp_visibility'] = 'visible'
shared_product_details['base_image_urls'] = product['urlTemplate']
upcs_list = dict(product['relationships']['upcs']).keys()
if 'upcs' in product['relationships']:
upcs_list = dict(product['relationships']['upcs']).keys()
else:
upcs_list = dict(product['relationships']['memberProductMap']).keys()
if len(upcs_list) > 1:
is_variable_product = True
shared_product_details['attributes'] = {}
Expand All @@ -103,7 +109,10 @@ def parse_macys(self, product_url, html_dump):

if is_variable_product:
for upcs in upcs_list:
child_sku_obj = product['relationships']['upcs'][upcs]
if 'upcs' in product['relationships']:
child_sku_obj = product['relationships']['upcs'][upcs]
else:
child_sku_obj = product['relationships']['memberProductMap'][upcs]
loaders.append(self.gather_macys_variation(sku_obj=child_sku_obj, parent_details=shared_product_details, category_list=macys_category_list, is_parent=False, is_variable=True))

return loaders
Expand Down Expand Up @@ -361,6 +370,7 @@ def map_and_calculate(self, store, category_list, original_price_in_usd):
(self.category_mapping_df['s_subcategory2'].str.strip().str.lower() == str(padded_category_list[3]).strip().lower())]

if cat_match_df.empty:
self.f_category_mapping_errors.write(store + "," + ",".join(padded_category_list) + "\n")
raise Exception("Could not find a qarece mapping in category mapping file for %s store's category hierarchy %s" % (store, padded_category_list))

# Only get the first record if there are multiple matches
Expand All @@ -379,13 +389,14 @@ def map_and_calculate(self, store, category_list, original_price_in_usd):
(self.profit_margin_mapping_df['q_subcategory2'].str.strip().str.lower() == str(cat_match_df['q_subcategory2']).strip().lower())]

if bizrules_match_df.empty:
self.f_profit_mapping_errors.write(",".join(qarece_category_list) + "\n")
raise Exception("Could not find qarece category mapping in profit margin file for hierarchy of %s" % (qarece_category_list))

# There should be only one match, if multiple, then there is an error within the file
# Calculate final values to return
item_weight = float(bizrules_match_df['avg_weight'])
item_profit_margin_rate = .50
item_estimated_shipping_cost_in_usd = item_weight * 5
item_profit_margin_rate = .38
item_estimated_shipping_cost_in_usd = max(item_weight * 5, 5)
item_estimated_profit_in_usd = original_price_in_usd * item_profit_margin_rate
item_final_price_in_usd = original_price_in_usd + item_estimated_shipping_cost_in_usd + item_estimated_profit_in_usd
item_final_price_in_npr = self.usd_to_npr(item_final_price_in_usd)
Expand Down
Loading

0 comments on commit e9d11ec

Please sign in to comment.