-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathingredient_exporter.py
36 lines (31 loc) · 1.48 KB
/
ingredient_exporter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import json
import re
import sys
QUANTITIES_TYPES = ['teaspoon', 'tsp', 'tablespoon', 'tbsp', 'cup', 'oz', 'pound', 'gram', 'kilogram', 'kg', 'ml',
'millilitre', 'milliliter', 'liter', 'pint', 'pnt', 'quart', 'gallon', 'dash', 'pinch', 'sprig',
'bag', 'can', 'package', 'bottle', 'slice', 'ounce']
ADJACTIVES = ['fresh', 'freshly', 'torn', 'chopped', 'condensed', 'small', 'diced', 'petite', 'large']
def parse_ingredients(ingredients):
parsed_ingredients = []
for ing in ingredients:
parsed_ing = ing.split(",")[0]
regex = re.match(r'(\d+\s?\d*/\d+|\d+\.\d+|\d+)?( \(.*\))?\s*(' + "|".join(
[q + "s?" for q in QUANTITIES_TYPES]) + ')?\s*(' + "|".join([a for a in ADJACTIVES]) + ')?\s*('
'.*)', parsed_ing)
if regex:
parsed_ingredients.append(regex.groups())
else:
parsed_ingredients.append((None, None, None, None, parsed_ing))
return parsed_ingredients
if __name__ == '__main__':
file_name = sys.argv[1]
parsed_recipes = []
with open(file_name) as f:
recipes = f.readlines()
for recipe in recipes:
rec_json = json.loads(recipe)
ingredients = rec_json.get("ingredients")
rec_json["parsed_ingredients"] = parse_ingredients(ingredients)
parsed_recipes.append(json.dumps(rec_json) + "\n")
with open("output_" + file_name, "w") as f:
f.writelines(parsed_recipes)