mirror of
https://github.com/TheThomaas/my-online-cookbook.git
synced 2026-01-10 20:01:47 +00:00
Add python recipe scraper
This commit is contained in:
parent
9d6d7c2bd4
commit
91d2bc338a
69
recipes.py
Normal file
69
recipes.py
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
from recipe_scrapers import scrape_me
|
||||
import urllib.request
|
||||
import unicodedata
|
||||
import re
|
||||
import sys
|
||||
import ssl
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
|
||||
recipe_url = sys.argv[1]
|
||||
|
||||
scraper = scrape_me(recipe_url, wild_mode=True)
|
||||
|
||||
recipe_json = scraper.to_json()
|
||||
|
||||
title = recipe_json["title"]
|
||||
total_time = recipe_json["total_time"]
|
||||
servings = re.sub("[^0-9]", "", recipe_json["yields"])
|
||||
host = recipe_json["host"]
|
||||
site_name = recipe_json["site_name"]
|
||||
|
||||
ingredients_list = recipe_json["ingredients"]
|
||||
ingredients = ""
|
||||
for i, v in enumerate(ingredients_list):
|
||||
ingredients += f" - {v}\n"
|
||||
|
||||
instructions_list = recipe_json["instructions_list"]
|
||||
instructions = ""
|
||||
for i, v in enumerate(instructions_list):
|
||||
instructions += f"{i + 1}. {v}\n"
|
||||
|
||||
def slugify(value, allow_unicode=False):
|
||||
value = str(value)
|
||||
if allow_unicode:
|
||||
value = unicodedata.normalize('NFKC', value)
|
||||
else:
|
||||
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
|
||||
value = re.sub(r'[^\w\s-]', '', value.lower())
|
||||
return re.sub(r'[-\s]+', '-', value).strip('-_')
|
||||
|
||||
image_url = recipe_json["image"]
|
||||
|
||||
image_path = "/img/recipes/"
|
||||
image_name = slugify(title) + "." + image_url.split('.')[-1]
|
||||
|
||||
def download_jpg(url, file_path, file_name):
|
||||
full_path = file_path + file_name + "." + url.split('.')[-1]
|
||||
urllib.request.urlretrieve(url, full_path)
|
||||
|
||||
def recipesConcat():
|
||||
return (
|
||||
f'---\n'
|
||||
f'title: {title}\n'
|
||||
f'image: {image_path}{image_name}\n'
|
||||
f'tags: \n'
|
||||
f'time: {total_time} min\n'
|
||||
f'servings: {servings}\n'
|
||||
f'sourceLabel: {site_name}\n'
|
||||
f'sourceURL: {recipe_url}\n'
|
||||
f'ingredients: \n'
|
||||
f'{ingredients}'
|
||||
f'---\n'
|
||||
f'{instructions}'
|
||||
)
|
||||
|
||||
f = open(f"src/recipes/{slugify(title)}.md", "w")
|
||||
f.write(recipesConcat())
|
||||
f.close()
|
||||
|
||||
download_jpg(image_url, r"C:\Users\Thomas\Downloads\recipes-scrapers\src\img\recipes/", slugify(title) )
|
||||
Loading…
Reference in a new issue