Add python recipe scraper

2026-01-10 20:01:47 +00:00 · 2023-07-08 15:24:28 +02:00 · 2023-07-08 15:24:28 +02:00 · 91d2bc338a
parent 9d6d7c2bd4
commit 91d2bc338a
1 changed files with 69 additions and 0 deletions
--- a/recipes.py
+++ b/recipes.py
@ -0,0 +1,69 @@
+from recipe_scrapers import scrape_me
+import urllib.request
+import unicodedata
+import re
+import sys
+import ssl
+ssl._create_default_https_context = ssl._create_unverified_context
+
+recipe_url = sys.argv[1]
+
+scraper = scrape_me(recipe_url, wild_mode=True)
+
+recipe_json = scraper.to_json()
+
+title = recipe_json["title"]
+total_time = recipe_json["total_time"]
+servings = re.sub("[^0-9]", "", recipe_json["yields"])
+host = recipe_json["host"]
+site_name = recipe_json["site_name"]
+
+ingredients_list = recipe_json["ingredients"]
+ingredients = ""
+for i, v in enumerate(ingredients_list):
+    ingredients += f"   - {v}\n"
+
+instructions_list = recipe_json["instructions_list"]
+instructions = ""
+for i, v in enumerate(instructions_list):
+    instructions += f"{i + 1}. {v}\n"
+
+def slugify(value, allow_unicode=False):
+    value = str(value)
+    if allow_unicode:
+        value = unicodedata.normalize('NFKC', value)
+    else:
+        value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
+    value = re.sub(r'[^\w\s-]', '', value.lower())
+    return re.sub(r'[-\s]+', '-', value).strip('-_')
+
+image_url = recipe_json["image"]
+
+image_path = "/img/recipes/"
+image_name = slugify(title) + "." + image_url.split('.')[-1]
+
+def download_jpg(url, file_path, file_name):
+    full_path = file_path + file_name + "." + url.split('.')[-1]
+    urllib.request.urlretrieve(url, full_path)
+
+def recipesConcat():
+    return (
+        f'---\n'
+        f'title: {title}\n'
+        f'image: {image_path}{image_name}\n'
+        f'tags: \n'
+        f'time: {total_time} min\n'
+        f'servings: {servings}\n'
+        f'sourceLabel: {site_name}\n'
+        f'sourceURL: {recipe_url}\n'
+        f'ingredients: \n'
+        f'{ingredients}'
+        f'---\n'
+        f'{instructions}'
+    )
+
+f = open(f"src/recipes/{slugify(title)}.md", "w")
+f.write(recipesConcat())
+f.close()
+
+download_jpg(image_url, r"C:\Users\Thomas\Downloads\recipes-scrapers\src\img\recipes/", slugify(title) )