Feature/import keywords as tags (#1170)

* feat: import original keywords as tags

* remove cached env

* Update frontend api types

* fix: Issues with previous tag scraping implementation

* Update category handling in backend

* Update backend tests to include group_id

* Correct type check

* Update create-url interface

* Improve tag cleaner list support

* remove builtin name shadowing

* update type annotations

* test include tags scraper

* implement scaper context for optional data

* readd cache venv

* use continue instead of break

* remove test callback

Co-authored-by: Miroito <alban.vachette@gmail.com>
This commit is contained in:
Hayden 2022-04-23 12:23:12 -08:00 committed by GitHub
parent 75c535fb2e
commit c696dee320
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
27 changed files with 281 additions and 61 deletions

View file

@ -77,8 +77,8 @@ export class RecipeAPI extends BaseCRUDAPI<Recipe, CreateRecipe> {
return await this.requests.post<Recipe | null>(routes.recipesTestScrapeUrl, { url }); return await this.requests.post<Recipe | null>(routes.recipesTestScrapeUrl, { url });
} }
async createOneByUrl(url: string) { async createOneByUrl(url: string, includeTags: boolean) {
return await this.requests.post<string>(routes.recipesCreateUrl, { url }); return await this.requests.post<string>(routes.recipesCreateUrl, { url, includeTags });
} }
async createManyByUrl(payload: BulkCreatePayload) { async createManyByUrl(payload: BulkCreatePayload) {

View file

@ -18,7 +18,7 @@
<v-tabs-items v-model="tab" class="mt-2"> <v-tabs-items v-model="tab" class="mt-2">
<!-- Create From URL --> <!-- Create From URL -->
<v-tab-item value="url" eager> <v-tab-item value="url" eager>
<v-form ref="domUrlForm" @submit.prevent="createByUrl(recipeUrl)"> <v-form ref="domUrlForm" @submit.prevent="createByUrl(recipeUrl, importKeywordsAsTags)">
<v-card flat> <v-card flat>
<v-card-title class="headline"> Scrape Recipe </v-card-title> <v-card-title class="headline"> Scrape Recipe </v-card-title>
<v-card-text> <v-card-text>
@ -38,6 +38,8 @@
:hint="$t('new-recipe.url-form-hint')" :hint="$t('new-recipe.url-form-hint')"
persistent-hint persistent-hint
></v-text-field> ></v-text-field>
<v-checkbox v-model="importKeywordsAsTags" label="Import original keywords as tags">
</v-checkbox>
</v-card-text> </v-card-text>
<v-card-actions class="justify-center"> <v-card-actions class="justify-center">
<div style="width: 250px"> <div style="width: 250px">
@ -409,20 +411,36 @@ export default defineComponent({
} }
}, },
get() { get() {
return route.value.query.recipe_import_url as string; return route.value.query.recipe_import_url as string | null;
}, },
}); });
const importKeywordsAsTags = computed({
get() {
return route.value.query.import_keywords_as_tags === "1";
},
set(keywordsAsTags: boolean) {
let import_keywords_as_tags = "0"
if (keywordsAsTags) {
import_keywords_as_tags = "1"
}
router.replace({query: {...route.value.query, import_keywords_as_tags}})
}
});
onMounted(() => { onMounted(() => {
if (!recipeUrl.value) { if (!recipeUrl.value) {
return; return;
} }
if (recipeUrl.value.includes("https")) { if (recipeUrl.value.includes("https")) {
createByUrl(recipeUrl.value); createByUrl(recipeUrl.value, importKeywordsAsTags.value);
} }
}); });
// =================================================== // ===================================================
// Recipe Debug URL Scraper // Recipe Debug URL Scraper
@ -447,16 +465,18 @@ export default defineComponent({
// Recipe URL Import // Recipe URL Import
const domUrlForm = ref<VForm | null>(null); const domUrlForm = ref<VForm | null>(null);
async function createByUrl(url: string | null) { async function createByUrl(url: string, importKeywordsAsTags: boolean) {
if (url === null) { if (url === null) {
return; return;
} }
if (!domUrlForm.value?.validate() || url === "") { if (!domUrlForm.value?.validate() || url === "") {
console.log("Invalid URL", url); console.log("Invalid URL", url);
return; return;
} }
state.loading = true; state.loading = true;
const { response } = await api.recipes.createOneByUrl(url); const { response } = await api.recipes.createOneByUrl(url, importKeywordsAsTags);
handleResponse(response); handleResponse(response);
} }
@ -523,6 +543,7 @@ export default defineComponent({
allCategories, allCategories,
tab, tab,
recipeUrl, recipeUrl,
importKeywordsAsTags,
bulkCreate, bulkCreate,
bulkUrls, bulkUrls,
lockBulkImport, lockBulkImport,

View file

@ -103,11 +103,13 @@ export interface RecipeSummary {
} }
export interface RecipeCategory { export interface RecipeCategory {
id?: string; id?: string;
groupId: string;
name: string; name: string;
slug: string; slug: string;
} }
export interface RecipeTag { export interface RecipeTag {
id?: string; id?: string;
groupId: string;
name: string; name: string;
slug: string; slug: string;
} }

View file

@ -89,11 +89,13 @@ export interface RecipeSummary {
} }
export interface RecipeCategory { export interface RecipeCategory {
id?: string; id?: string;
groupId: string;
name: string; name: string;
slug: string; slug: string;
} }
export interface RecipeTag { export interface RecipeTag {
id?: string; id?: string;
groupId: string;
name: string; name: string;
slug: string; slug: string;
} }

View file

@ -258,11 +258,13 @@ export interface RecipeSummary {
} }
export interface RecipeCategory { export interface RecipeCategory {
id?: string; id?: string;
groupId: string;
name: string; name: string;
slug: string; slug: string;
} }
export interface RecipeTag { export interface RecipeTag {
id?: string; id?: string;
groupId: string;
name: string; name: string;
slug: string; slug: string;
} }

View file

@ -119,11 +119,13 @@ export interface RecipeSummary {
} }
export interface RecipeCategory { export interface RecipeCategory {
id?: string; id?: string;
groupId: string;
name: string; name: string;
slug: string; slug: string;
} }
export interface RecipeTag { export interface RecipeTag {
id?: string; id?: string;
groupId: string;
name: string; name: string;
slug: string; slug: string;
} }

View file

@ -68,11 +68,13 @@ export interface CreateRecipeBulk {
} }
export interface RecipeCategory { export interface RecipeCategory {
id?: string; id?: string;
groupId: string;
name: string; name: string;
slug: string; slug: string;
} }
export interface RecipeTag { export interface RecipeTag {
id?: string; id?: string;
groupId: string;
name: string; name: string;
slug: string; slug: string;
} }

View file

@ -132,11 +132,13 @@ export interface RecipeSummary {
} }
export interface RecipeCategory { export interface RecipeCategory {
id?: string; id?: string;
groupId: string;
name: string; name: string;
slug: string; slug: string;
} }
export interface RecipeTag { export interface RecipeTag {
id?: string; id?: string;
groupId: string;
name: string; name: string;
slug: string; slug: string;
} }

View file

@ -57,8 +57,6 @@ async def start_scheduler():
tasks.purge_group_data_exports, tasks.purge_group_data_exports,
) )
SchedulerRegistry.register_minutely(lambda: logger.info("Scheduler tick"))
SchedulerRegistry.print_jobs() SchedulerRegistry.print_jobs()
await SchedulerService.start() await SchedulerService.start()

View file

@ -34,10 +34,10 @@ class RecipeCategoryController(BaseUserController):
def mixins(self): def mixins(self):
return HttpRepo(self.repo, self.deps.logger) return HttpRepo(self.repo, self.deps.logger)
@router.get("", response_model=list[CategorySummary]) @router.get("", response_model=list[RecipeCategory])
def get_all(self): def get_all(self):
"""Returns a list of available categories in the database""" """Returns a list of available categories in the database"""
return self.repo.get_all(override_schema=CategorySummary) return self.repo.get_all(override_schema=RecipeCategory)
@router.post("", status_code=201) @router.post("", status_code=201)
def create_one(self, category: CategoryIn): def create_one(self, category: CategoryIn):

View file

@ -23,14 +23,16 @@ from mealie.routes._base import BaseUserController, controller
from mealie.routes._base.mixins import HttpRepo from mealie.routes._base.mixins import HttpRepo
from mealie.routes._base.routers import UserAPIRouter from mealie.routes._base.routers import UserAPIRouter
from mealie.schema.query import GetAll from mealie.schema.query import GetAll
from mealie.schema.recipe import CreateRecipeByUrl, Recipe, RecipeImageTypes from mealie.schema.recipe import Recipe, RecipeImageTypes, ScrapeRecipe
from mealie.schema.recipe.recipe import CreateRecipe, CreateRecipeByUrlBulk, RecipeSummary from mealie.schema.recipe.recipe import CreateRecipe, CreateRecipeByUrlBulk, RecipeSummary
from mealie.schema.recipe.recipe_asset import RecipeAsset from mealie.schema.recipe.recipe_asset import RecipeAsset
from mealie.schema.recipe.recipe_scraper import ScrapeRecipeTest
from mealie.schema.response.responses import ErrorResponse from mealie.schema.response.responses import ErrorResponse
from mealie.schema.server.tasks import ServerTaskNames from mealie.schema.server.tasks import ServerTaskNames
from mealie.services.recipe.recipe_data_service import RecipeDataService from mealie.services.recipe.recipe_data_service import RecipeDataService
from mealie.services.recipe.recipe_service import RecipeService from mealie.services.recipe.recipe_service import RecipeService
from mealie.services.recipe.template_service import TemplateService from mealie.services.recipe.template_service import TemplateService
from mealie.services.scraper.scraped_extras import ScraperContext
from mealie.services.scraper.scraper import create_from_url from mealie.services.scraper.scraper import create_from_url
from mealie.services.scraper.scraper_strategies import RecipeScraperPackage from mealie.services.scraper.scraper_strategies import RecipeScraperPackage
from mealie.services.server_tasks.background_executory import BackgroundExecutor from mealie.services.server_tasks.background_executory import BackgroundExecutor
@ -141,9 +143,15 @@ class RecipeController(BaseRecipeController):
# URL Scraping Operations # URL Scraping Operations
@router.post("/create-url", status_code=201, response_model=str) @router.post("/create-url", status_code=201, response_model=str)
def parse_recipe_url(self, url: CreateRecipeByUrl): def parse_recipe_url(self, req: ScrapeRecipe):
"""Takes in a URL and attempts to scrape data and load it into the database""" """Takes in a URL and attempts to scrape data and load it into the database"""
recipe = create_from_url(url.url) recipe, extras = create_from_url(req.url)
if req.include_tags:
ctx = ScraperContext(self.user.id, self.group_id, self.repos)
recipe.tags = extras.use_tags(ctx) # type: ignore
return self.service.create_one(recipe).slug return self.service.create_one(recipe).slug
@router.post("/create-url/bulk", status_code=202) @router.post("/create-url/bulk", status_code=202)
@ -159,7 +167,7 @@ class RecipeController(BaseRecipeController):
for b in bulk.imports: for b in bulk.imports:
try: try:
recipe = create_from_url(b.url) recipe, _ = create_from_url(b.url)
if b.tags: if b.tags:
recipe.tags = b.tags recipe.tags = b.tags
@ -184,7 +192,7 @@ class RecipeController(BaseRecipeController):
return {"details": "task has been started"} return {"details": "task has been started"}
@router.post("/test-scrape-url") @router.post("/test-scrape-url")
def test_parse_recipe_url(self, url: CreateRecipeByUrl): def test_parse_recipe_url(self, url: ScrapeRecipeTest):
# Debugger should produce the same result as the scraper sees before cleaning # Debugger should produce the same result as the scraper sees before cleaning
if scraped_data := RecipeScraperPackage(url.url).scrape_url(): if scraped_data := RecipeScraperPackage(url.url).scrape_url():
return scraped_data.schema.data return scraped_data.schema.data
@ -264,7 +272,7 @@ class RecipeController(BaseRecipeController):
# Image and Assets # Image and Assets
@router.post("/{slug}/image", tags=["Recipe: Images and Assets"]) @router.post("/{slug}/image", tags=["Recipe: Images and Assets"])
def scrape_image_url(self, slug: str, url: CreateRecipeByUrl): def scrape_image_url(self, slug: str, url: ScrapeRecipe):
recipe = self.mixins.get_one(slug) recipe = self.mixins.get_one(slug)
data_service = RecipeDataService(recipe.id) data_service = RecipeDataService(recipe.id)
data_service.scrape_image(url.url) data_service.scrape_image(url.url)

View file

@ -8,6 +8,7 @@ from .recipe_image_types import *
from .recipe_ingredient import * from .recipe_ingredient import *
from .recipe_notes import * from .recipe_notes import *
from .recipe_nutrition import * from .recipe_nutrition import *
from .recipe_scraper import *
from .recipe_settings import * from .recipe_settings import *
from .recipe_share_token import * # type: ignore from .recipe_share_token import * # type: ignore
from .recipe_step import * from .recipe_step import *

View file

@ -41,13 +41,6 @@ class RecipeTool(RecipeTag):
on_hand: bool = False on_hand: bool = False
class CreateRecipeByUrl(BaseModel):
url: str
class Config:
schema_extra = {"example": {"url": "https://myfavoriterecipes.com/recipes"}}
class CreateRecipeBulk(BaseModel): class CreateRecipeBulk(BaseModel):
url: str url: str
categories: list[RecipeCategory] = None categories: list[RecipeCategory] = None
@ -140,21 +133,21 @@ class Recipe(RecipeSummary):
if not self.id: if not self.id:
raise ValueError("Recipe has no ID") raise ValueError("Recipe has no ID")
dir = app_dirs.RECIPE_DATA_DIR.joinpath(str(self.id)) folder = app_dirs.RECIPE_DATA_DIR.joinpath(str(self.id))
dir.mkdir(exist_ok=True, parents=True) folder.mkdir(exist_ok=True, parents=True)
return dir return folder
@property @property
def asset_dir(self) -> Path: def asset_dir(self) -> Path:
dir = self.directory.joinpath("assets") folder = self.directory.joinpath("assets")
dir.mkdir(exist_ok=True, parents=True) folder.mkdir(exist_ok=True, parents=True)
return dir return folder
@property @property
def image_dir(self) -> Path: def image_dir(self) -> Path:
dir = self.directory.joinpath("images") folder = self.directory.joinpath("images")
dir.mkdir(exist_ok=True, parents=True) folder.mkdir(exist_ok=True, parents=True)
return dir return folder
class Config: class Config:
orm_mode = True orm_mode = True

View file

@ -28,6 +28,7 @@ class CategoryBase(CategoryIn):
class CategoryOut(CategoryBase): class CategoryOut(CategoryBase):
slug: str slug: str
group_id: UUID4
class Config: class Config:
orm_mode = True orm_mode = True

View file

@ -0,0 +1,18 @@
from mealie.schema._mealie.mealie_model import MealieModel
class ScrapeRecipeTest(MealieModel):
url: str
class ScrapeRecipe(MealieModel):
url: str
include_tags: bool = False
class Config:
schema_extra = {
"example": {
"url": "https://myfavoriterecipes.com/recipes",
"includeTags": True,
},
}

View file

@ -91,6 +91,10 @@ class RecipeService(BaseService):
additional_attrs["user_id"] = user.id additional_attrs["user_id"] = user.id
additional_attrs["group_id"] = user.group_id additional_attrs["group_id"] = user.group_id
if additional_attrs.get("tags"):
for i in range(len(additional_attrs.get("tags"))):
additional_attrs["tags"][i]["group_id"] = user.group_id
if not additional_attrs.get("recipe_ingredient"): if not additional_attrs.get("recipe_ingredient"):
additional_attrs["recipe_ingredient"] = [RecipeIngredient(note=ingredient_note)] additional_attrs["recipe_ingredient"] = [RecipeIngredient(note=ingredient_note)]

View file

@ -297,3 +297,32 @@ def pretty_print_timedelta(t: timedelta, max_components=None, max_decimal_places
if out_list == []: if out_list == []:
return "none" return "none"
return " ".join(out_list) return " ".join(out_list)
def clean_tags(data: str | list[str]) -> list[str]:
"""
Gets keywords as a list or natural language list and returns them into a list of strings of individual tags
"""
if data is None:
return []
if isinstance(data, list):
all_str = True
i = 0
while all_str and i < len(data):
all_str = isinstance(data[i], str)
i = i + 1
if all_str:
return data
return []
if isinstance(data, str):
tag_list = data.split(",")
for i in range(len(tag_list)):
tag_list[i] = tag_list[i].strip().capitalize()
return tag_list
return []

View file

@ -1,4 +1,5 @@
from mealie.schema.recipe.recipe import Recipe from mealie.schema.recipe.recipe import Recipe
from mealie.services.scraper.scraped_extras import ScrapedExtras
from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage
@ -20,16 +21,16 @@ class RecipeScraper:
self.scrapers = scrapers self.scrapers = scrapers
def scrape(self, url: str) -> Recipe | None: def scrape(self, url: str) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
""" """
Scrapes a recipe from the web. Scrapes a recipe from the web.
""" """
for scraper_type in self.scrapers: for scraper_type in self.scrapers:
scraper = scraper_type(url) scraper = scraper_type(url)
recipe = scraper.parse() recipe, extras = scraper.parse()
if recipe is not None: if recipe is not None:
return recipe return recipe, extras
return None return None, None

View file

@ -0,0 +1,49 @@
from dataclasses import dataclass
from pydantic import UUID4
from slugify import slugify
from mealie.repos.repository_factory import AllRepositories
from mealie.schema.recipe import TagOut
from mealie.schema.recipe.recipe_category import TagSave
class NoContextException(Exception):
pass
@dataclass(slots=True)
class ScraperContext:
user_id: UUID4
group_id: UUID4
repos: AllRepositories
class ScrapedExtras:
def __init__(self) -> None:
self._tags: list[str] = []
def set_tags(self, tags: list[str]) -> None:
self._tags = tags
def use_tags(self, ctx: ScraperContext) -> list[TagOut]:
if not self._tags:
return []
repo = ctx.repos.tags.by_group(ctx.group_id)
tags = []
for tag in self._tags:
slugify_tag = slugify(tag)
# Check if tag exists
if db_tag := repo.get_one(slugify_tag, "slug"):
tags.append(db_tag)
continue
save_data = TagSave(name=tag, group_id=ctx.group_id)
db_tag = repo.create(save_data)
tags.append(db_tag)
return tags

View file

@ -8,6 +8,7 @@ from mealie.core.root_logger import get_logger
from mealie.pkgs import cache from mealie.pkgs import cache
from mealie.schema.recipe import Recipe from mealie.schema.recipe import Recipe
from mealie.services.recipe.recipe_data_service import RecipeDataService from mealie.services.recipe.recipe_data_service import RecipeDataService
from mealie.services.scraper.scraped_extras import ScrapedExtras
from .recipe_scraper import RecipeScraper from .recipe_scraper import RecipeScraper
@ -18,7 +19,7 @@ class ParserErrors(str, Enum):
CONNECTION_ERROR = "CONNECTION_ERROR" CONNECTION_ERROR = "CONNECTION_ERROR"
def create_from_url(url: str) -> Recipe: def create_from_url(url: str) -> tuple[Recipe, ScrapedExtras]:
"""Main entry point for generating a recipe from a URL. Pass in a URL and """Main entry point for generating a recipe from a URL. Pass in a URL and
a Recipe object will be returned if successful. a Recipe object will be returned if successful.
@ -29,12 +30,12 @@ def create_from_url(url: str) -> Recipe:
Recipe: Recipe Object Recipe: Recipe Object
""" """
scraper = RecipeScraper() scraper = RecipeScraper()
new_recipe = scraper.scrape(url) new_recipe, extras = scraper.scrape(url)
new_recipe.id = uuid4()
if not new_recipe: if not new_recipe:
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value}) raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
new_recipe.id = uuid4()
logger = get_logger() logger = get_logger()
logger.info(f"Image {new_recipe.image}") logger.info(f"Image {new_recipe.image}")
@ -52,4 +53,4 @@ def create_from_url(url: str) -> Recipe:
new_recipe.name = f"No Recipe Name Found - {str(uuid4())}" new_recipe.name = f"No Recipe Name Found - {str(uuid4())}"
new_recipe.slug = slugify(new_recipe.name) new_recipe.slug = slugify(new_recipe.name)
return new_recipe return new_recipe, extras

View file

@ -10,6 +10,7 @@ from w3lib.html import get_base_url
from mealie.core.root_logger import get_logger from mealie.core.root_logger import get_logger
from mealie.schema.recipe.recipe import Recipe, RecipeStep from mealie.schema.recipe.recipe import Recipe, RecipeStep
from mealie.services.scraper.scraped_extras import ScrapedExtras
from . import cleaner from . import cleaner
@ -26,7 +27,7 @@ class ABCScraperStrategy(ABC):
self.url = url self.url = url
@abstractmethod @abstractmethod
def parse(self) -> Recipe | None: def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
"""Parse a recipe from a web URL. """Parse a recipe from a web URL.
Args: Args:
@ -39,13 +40,15 @@ class ABCScraperStrategy(ABC):
class RecipeScraperPackage(ABCScraperStrategy): class RecipeScraperPackage(ABCScraperStrategy):
def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> Recipe: def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]:
def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None): def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None):
value = default value = default
try:
value = func_call() if func_call:
except Exception: try:
self.logger.error(f"Error parsing recipe func_call for '{get_attr}'") value = func_call()
except Exception:
self.logger.error(f"Error parsing recipe func_call for '{get_attr}'")
if value == default: if value == default:
try: try:
@ -58,7 +61,7 @@ class RecipeScraperPackage(ABCScraperStrategy):
return value return value
def get_instructions() -> list[dict]: def get_instructions() -> list[RecipeStep]:
instruction_as_text = try_get_default( instruction_as_text = try_get_default(
scraped_data.instructions, "recipeInstructions", ["No Instructions Found"] scraped_data.instructions, "recipeInstructions", ["No Instructions Found"]
) )
@ -78,7 +81,11 @@ class RecipeScraperPackage(ABCScraperStrategy):
None, "cookTime", None, cleaner.clean_time None, "cookTime", None, cleaner.clean_time
) )
return Recipe( extras = ScrapedExtras()
extras.set_tags(try_get_default(None, "keywords", "", cleaner.clean_tags))
recipe = Recipe(
name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string), name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string),
slug="", slug="",
image=try_get_default(None, "image", None), image=try_get_default(None, "image", None),
@ -93,6 +100,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
org_url=url, org_url=url,
) )
return recipe, extras
def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None: def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None:
try: try:
scraped_schema = scrape_me(self.url) scraped_schema = scrape_me(self.url)
@ -103,8 +112,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
self.logger.error("Recipe Scraper was unable to extract a recipe.") self.logger.error("Recipe Scraper was unable to extract a recipe.")
return None return None
except ConnectionError: except ConnectionError as e:
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": "CONNECTION_ERROR"}) raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": "CONNECTION_ERROR"}) from e
# Check to see if the recipe is valid # Check to see if the recipe is valid
try: try:
@ -123,7 +132,7 @@ class RecipeScraperPackage(ABCScraperStrategy):
self.logger.debug(f"Recipe Scraper [Package] was unable to extract a recipe from {self.url}") self.logger.debug(f"Recipe Scraper [Package] was unable to extract a recipe from {self.url}")
return None return None
def parse(self) -> Recipe | None: def parse(self):
""" """
Parse a recipe from a given url. Parse a recipe from a given url.
""" """
@ -177,7 +186,7 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
"extras": [], "extras": [],
} }
def parse(self) -> Recipe | None: def parse(self):
""" """
Parse a recipe from a given url. Parse a recipe from a given url.
""" """
@ -188,4 +197,4 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
if og_data is None: if og_data is None:
return None return None
return Recipe(**og_data) return Recipe(**og_data), ScrapedExtras()

View file

@ -22,6 +22,10 @@ html_sous_vide_shrimp = CWD / "html/sous-vide-shrimp.html"
html_jam_roly_poly_with_custard = CWD / "html/jam-roly-poly-with-custard.html" html_jam_roly_poly_with_custard = CWD / "html/jam-roly-poly-with-custard.html"
html_nutty_umami_noodles_with_scallion_brown_butter_and_snow_peas_recipe = (
CWD / "html/nutty-umami-noodles-with-scallion-brown-butter-and-snow-peas-recipe.html"
)
html_taiwanese_three_cup_chicken_san_bei_gi_recipe = CWD / "html/taiwanese-three-cup-chicken-san-bei-gi-recipe.html" html_taiwanese_three_cup_chicken_san_bei_gi_recipe = CWD / "html/taiwanese-three-cup-chicken-san-bei-gi-recipe.html"
html_detroit_style_pepperoni_pizza = CWD / "html/detroit-style-pepperoni-pizza.html" html_detroit_style_pepperoni_pizza = CWD / "html/detroit-style-pepperoni-pizza.html"

File diff suppressed because one or more lines are too long

View file

@ -130,7 +130,9 @@ def test_organizer_association(
# Get Recipe Data # Get Recipe Data
response = api_client.get(routes.RoutesRecipe.item(slug), headers=unique_user.token) response = api_client.get(routes.RoutesRecipe.item(slug), headers=unique_user.token)
as_json = response.json() as_json = response.json()
as_json[recipe_key] = [{"id": item["id"], "name": item["name"], "slug": item["slug"]}] as_json[recipe_key] = [
{"id": item["id"], "group_id": unique_user.group_id, "name": item["name"], "slug": item["slug"]}
]
# Update Recipe # Update Recipe
response = api_client.put(routes.RoutesRecipe.item(slug), json=as_json, headers=unique_user.token) response = api_client.put(routes.RoutesRecipe.item(slug), json=as_json, headers=unique_user.token)
@ -177,7 +179,9 @@ def test_organizer_get_by_slug(
for slug in recipe_slugs: for slug in recipe_slugs:
response = api_client.get(routes.RoutesRecipe.item(slug), headers=unique_user.token) response = api_client.get(routes.RoutesRecipe.item(slug), headers=unique_user.token)
as_json = response.json() as_json = response.json()
as_json[recipe_key] = [{"id": item["id"], "name": item["name"], "slug": item["slug"]}] as_json[recipe_key] = [
{"id": item["id"], "group_id": unique_user.group_id, "name": item["name"], "slug": item["slug"]}
]
response = api_client.put(routes.RoutesRecipe.item(slug), json=as_json, headers=unique_user.token) response = api_client.put(routes.RoutesRecipe.item(slug), json=as_json, headers=unique_user.token)
assert response.status_code == 200 assert response.status_code == 200

View file

@ -13,7 +13,7 @@ from slugify import slugify
from mealie.schema.recipe.recipe import RecipeCategory from mealie.schema.recipe.recipe import RecipeCategory
from mealie.services.recipe.recipe_data_service import RecipeDataService from mealie.services.recipe.recipe_data_service import RecipeDataService
from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph
from tests import utils from tests import data, utils
from tests.utils.app_routes import AppRoutes from tests.utils.app_routes import AppRoutes
from tests.utils.factories import random_string from tests.utils.factories import random_string
from tests.utils.fixture_schemas import TestUser from tests.utils.fixture_schemas import TestUser
@ -83,12 +83,77 @@ def test_create_by_url(
api_client.delete(api_routes.recipes_recipe_slug(recipe_data.expected_slug), headers=unique_user.token) api_client.delete(api_routes.recipes_recipe_slug(recipe_data.expected_slug), headers=unique_user.token)
response = api_client.post(api_routes.recipes_create_url, json={"url": recipe_data.url}, headers=unique_user.token) response = api_client.post(
api_routes.recipes_create_url, json={"url": recipe_data.url, "include_tags": False}, headers=unique_user.token
)
assert response.status_code == 201 assert response.status_code == 201
assert json.loads(response.text) == recipe_data.expected_slug assert json.loads(response.text) == recipe_data.expected_slug
def test_create_by_url_with_tags(
api_client: TestClient,
api_routes: AppRoutes,
unique_user: TestUser,
monkeypatch: MonkeyPatch,
):
html_file = data.html_nutty_umami_noodles_with_scallion_brown_butter_and_snow_peas_recipe
# Override init function for AbstractScraper to use the test html instead of calling the url
monkeypatch.setattr(
AbstractScraper,
"__init__",
get_init(html_file),
)
# Override the get_html method of the RecipeScraperOpenGraph to return the test html
monkeypatch.setattr(
RecipeScraperOpenGraph,
"get_html",
open_graph_override(html_file.read_text()),
)
# Skip image downloader
monkeypatch.setattr(
RecipeDataService,
"scrape_image",
lambda *_: "TEST_IMAGE",
)
response = api_client.post(
api_routes.recipes_create_url,
json={"url": "https://google.com", "include_tags": True}, # URL Doesn't matter
headers=unique_user.token,
)
assert response.status_code == 201
slug = "nutty-umami-noodles-with-scallion-brown-butter-and-snow-peas"
# Get the recipe
response = api_client.get(api_routes.recipes_recipe_slug(slug), headers=unique_user.token)
assert response.status_code == 200
# Verifiy the tags are present
expected_tags = {
"sauté",
"pea",
"noodle",
"udon noodle",
"ramen noodle",
"dinner",
"main",
"vegetarian",
"easy",
"quick",
"weeknight meals",
"web",
}
recipe = json.loads(response.text)
assert len(recipe["tags"]) == len(expected_tags)
for tag in recipe["tags"]:
assert tag["name"] in expected_tags
@pytest.mark.parametrize("recipe_data", recipe_test_data) @pytest.mark.parametrize("recipe_data", recipe_test_data)
def test_read_update( def test_read_update(
api_client: TestClient, api_client: TestClient,

View file

@ -17,7 +17,7 @@ and then use this test case by removing the `@pytest.mark.skip` and than testing
@pytest.mark.skipif(True, reason="Long Running API Test - manually run when updating the parser") @pytest.mark.skipif(True, reason="Long Running API Test - manually run when updating the parser")
@pytest.mark.parametrize("recipe_test_data", test_cases) @pytest.mark.parametrize("recipe_test_data", test_cases)
def test_recipe_parser(recipe_test_data: RecipeSiteTestCase): def test_recipe_parser(recipe_test_data: RecipeSiteTestCase):
recipe = scraper.create_from_url(recipe_test_data.url) recipe, _ = scraper.create_from_url(recipe_test_data.url)
assert recipe.slug == recipe_test_data.expected_slug assert recipe.slug == recipe_test_data.expected_slug
assert len(recipe.recipe_instructions) == recipe_test_data.num_steps assert len(recipe.recipe_instructions) == recipe_test_data.num_steps

View file

@ -1,16 +1,17 @@
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path
from tests import data as test_data from tests import data as test_data
@dataclass @dataclass(slots=True)
class RecipeSiteTestCase: class RecipeSiteTestCase:
url: str url: str
html: str html: str
expected_slug: str expected_slug: str
num_ingredients: int num_ingredients: int
num_steps: int num_steps: int
html_file: str html_file: Path
def get_recipe_test_cases(): def get_recipe_test_cases():