Feature/import keywords as tags (#1170)
* feat: import original keywords as tags * remove cached env * Update frontend api types * fix: Issues with previous tag scraping implementation * Update category handling in backend * Update backend tests to include group_id * Correct type check * Update create-url interface * Improve tag cleaner list support * remove builtin name shadowing * update type annotations * test include tags scraper * implement scaper context for optional data * readd cache venv * use continue instead of break * remove test callback Co-authored-by: Miroito <alban.vachette@gmail.com>
This commit is contained in:
parent
75c535fb2e
commit
c696dee320
27 changed files with 281 additions and 61 deletions
|
@ -77,8 +77,8 @@ export class RecipeAPI extends BaseCRUDAPI<Recipe, CreateRecipe> {
|
||||||
return await this.requests.post<Recipe | null>(routes.recipesTestScrapeUrl, { url });
|
return await this.requests.post<Recipe | null>(routes.recipesTestScrapeUrl, { url });
|
||||||
}
|
}
|
||||||
|
|
||||||
async createOneByUrl(url: string) {
|
async createOneByUrl(url: string, includeTags: boolean) {
|
||||||
return await this.requests.post<string>(routes.recipesCreateUrl, { url });
|
return await this.requests.post<string>(routes.recipesCreateUrl, { url, includeTags });
|
||||||
}
|
}
|
||||||
|
|
||||||
async createManyByUrl(payload: BulkCreatePayload) {
|
async createManyByUrl(payload: BulkCreatePayload) {
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
<v-tabs-items v-model="tab" class="mt-2">
|
<v-tabs-items v-model="tab" class="mt-2">
|
||||||
<!-- Create From URL -->
|
<!-- Create From URL -->
|
||||||
<v-tab-item value="url" eager>
|
<v-tab-item value="url" eager>
|
||||||
<v-form ref="domUrlForm" @submit.prevent="createByUrl(recipeUrl)">
|
<v-form ref="domUrlForm" @submit.prevent="createByUrl(recipeUrl, importKeywordsAsTags)">
|
||||||
<v-card flat>
|
<v-card flat>
|
||||||
<v-card-title class="headline"> Scrape Recipe </v-card-title>
|
<v-card-title class="headline"> Scrape Recipe </v-card-title>
|
||||||
<v-card-text>
|
<v-card-text>
|
||||||
|
@ -38,6 +38,8 @@
|
||||||
:hint="$t('new-recipe.url-form-hint')"
|
:hint="$t('new-recipe.url-form-hint')"
|
||||||
persistent-hint
|
persistent-hint
|
||||||
></v-text-field>
|
></v-text-field>
|
||||||
|
<v-checkbox v-model="importKeywordsAsTags" label="Import original keywords as tags">
|
||||||
|
</v-checkbox>
|
||||||
</v-card-text>
|
</v-card-text>
|
||||||
<v-card-actions class="justify-center">
|
<v-card-actions class="justify-center">
|
||||||
<div style="width: 250px">
|
<div style="width: 250px">
|
||||||
|
@ -409,20 +411,36 @@ export default defineComponent({
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
get() {
|
get() {
|
||||||
return route.value.query.recipe_import_url as string;
|
return route.value.query.recipe_import_url as string | null;
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const importKeywordsAsTags = computed({
|
||||||
|
get() {
|
||||||
|
return route.value.query.import_keywords_as_tags === "1";
|
||||||
|
},
|
||||||
|
set(keywordsAsTags: boolean) {
|
||||||
|
let import_keywords_as_tags = "0"
|
||||||
|
if (keywordsAsTags) {
|
||||||
|
import_keywords_as_tags = "1"
|
||||||
|
}
|
||||||
|
router.replace({query: {...route.value.query, import_keywords_as_tags}})
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
onMounted(() => {
|
onMounted(() => {
|
||||||
if (!recipeUrl.value) {
|
if (!recipeUrl.value) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (recipeUrl.value.includes("https")) {
|
if (recipeUrl.value.includes("https")) {
|
||||||
createByUrl(recipeUrl.value);
|
createByUrl(recipeUrl.value, importKeywordsAsTags.value);
|
||||||
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// ===================================================
|
// ===================================================
|
||||||
// Recipe Debug URL Scraper
|
// Recipe Debug URL Scraper
|
||||||
|
|
||||||
|
@ -447,16 +465,18 @@ export default defineComponent({
|
||||||
// Recipe URL Import
|
// Recipe URL Import
|
||||||
const domUrlForm = ref<VForm | null>(null);
|
const domUrlForm = ref<VForm | null>(null);
|
||||||
|
|
||||||
async function createByUrl(url: string | null) {
|
async function createByUrl(url: string, importKeywordsAsTags: boolean) {
|
||||||
|
|
||||||
if (url === null) {
|
if (url === null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!domUrlForm.value?.validate() || url === "") {
|
if (!domUrlForm.value?.validate() || url === "") {
|
||||||
console.log("Invalid URL", url);
|
console.log("Invalid URL", url);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
state.loading = true;
|
state.loading = true;
|
||||||
const { response } = await api.recipes.createOneByUrl(url);
|
const { response } = await api.recipes.createOneByUrl(url, importKeywordsAsTags);
|
||||||
handleResponse(response);
|
handleResponse(response);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -523,6 +543,7 @@ export default defineComponent({
|
||||||
allCategories,
|
allCategories,
|
||||||
tab,
|
tab,
|
||||||
recipeUrl,
|
recipeUrl,
|
||||||
|
importKeywordsAsTags,
|
||||||
bulkCreate,
|
bulkCreate,
|
||||||
bulkUrls,
|
bulkUrls,
|
||||||
lockBulkImport,
|
lockBulkImport,
|
||||||
|
|
|
@ -103,11 +103,13 @@ export interface RecipeSummary {
|
||||||
}
|
}
|
||||||
export interface RecipeCategory {
|
export interface RecipeCategory {
|
||||||
id?: string;
|
id?: string;
|
||||||
|
groupId: string;
|
||||||
name: string;
|
name: string;
|
||||||
slug: string;
|
slug: string;
|
||||||
}
|
}
|
||||||
export interface RecipeTag {
|
export interface RecipeTag {
|
||||||
id?: string;
|
id?: string;
|
||||||
|
groupId: string;
|
||||||
name: string;
|
name: string;
|
||||||
slug: string;
|
slug: string;
|
||||||
}
|
}
|
||||||
|
|
|
@ -89,11 +89,13 @@ export interface RecipeSummary {
|
||||||
}
|
}
|
||||||
export interface RecipeCategory {
|
export interface RecipeCategory {
|
||||||
id?: string;
|
id?: string;
|
||||||
|
groupId: string;
|
||||||
name: string;
|
name: string;
|
||||||
slug: string;
|
slug: string;
|
||||||
}
|
}
|
||||||
export interface RecipeTag {
|
export interface RecipeTag {
|
||||||
id?: string;
|
id?: string;
|
||||||
|
groupId: string;
|
||||||
name: string;
|
name: string;
|
||||||
slug: string;
|
slug: string;
|
||||||
}
|
}
|
||||||
|
|
|
@ -258,11 +258,13 @@ export interface RecipeSummary {
|
||||||
}
|
}
|
||||||
export interface RecipeCategory {
|
export interface RecipeCategory {
|
||||||
id?: string;
|
id?: string;
|
||||||
|
groupId: string;
|
||||||
name: string;
|
name: string;
|
||||||
slug: string;
|
slug: string;
|
||||||
}
|
}
|
||||||
export interface RecipeTag {
|
export interface RecipeTag {
|
||||||
id?: string;
|
id?: string;
|
||||||
|
groupId: string;
|
||||||
name: string;
|
name: string;
|
||||||
slug: string;
|
slug: string;
|
||||||
}
|
}
|
||||||
|
|
|
@ -119,11 +119,13 @@ export interface RecipeSummary {
|
||||||
}
|
}
|
||||||
export interface RecipeCategory {
|
export interface RecipeCategory {
|
||||||
id?: string;
|
id?: string;
|
||||||
|
groupId: string;
|
||||||
name: string;
|
name: string;
|
||||||
slug: string;
|
slug: string;
|
||||||
}
|
}
|
||||||
export interface RecipeTag {
|
export interface RecipeTag {
|
||||||
id?: string;
|
id?: string;
|
||||||
|
groupId: string;
|
||||||
name: string;
|
name: string;
|
||||||
slug: string;
|
slug: string;
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,11 +68,13 @@ export interface CreateRecipeBulk {
|
||||||
}
|
}
|
||||||
export interface RecipeCategory {
|
export interface RecipeCategory {
|
||||||
id?: string;
|
id?: string;
|
||||||
|
groupId: string;
|
||||||
name: string;
|
name: string;
|
||||||
slug: string;
|
slug: string;
|
||||||
}
|
}
|
||||||
export interface RecipeTag {
|
export interface RecipeTag {
|
||||||
id?: string;
|
id?: string;
|
||||||
|
groupId: string;
|
||||||
name: string;
|
name: string;
|
||||||
slug: string;
|
slug: string;
|
||||||
}
|
}
|
||||||
|
|
|
@ -132,11 +132,13 @@ export interface RecipeSummary {
|
||||||
}
|
}
|
||||||
export interface RecipeCategory {
|
export interface RecipeCategory {
|
||||||
id?: string;
|
id?: string;
|
||||||
|
groupId: string;
|
||||||
name: string;
|
name: string;
|
||||||
slug: string;
|
slug: string;
|
||||||
}
|
}
|
||||||
export interface RecipeTag {
|
export interface RecipeTag {
|
||||||
id?: string;
|
id?: string;
|
||||||
|
groupId: string;
|
||||||
name: string;
|
name: string;
|
||||||
slug: string;
|
slug: string;
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,8 +57,6 @@ async def start_scheduler():
|
||||||
tasks.purge_group_data_exports,
|
tasks.purge_group_data_exports,
|
||||||
)
|
)
|
||||||
|
|
||||||
SchedulerRegistry.register_minutely(lambda: logger.info("Scheduler tick"))
|
|
||||||
|
|
||||||
SchedulerRegistry.print_jobs()
|
SchedulerRegistry.print_jobs()
|
||||||
|
|
||||||
await SchedulerService.start()
|
await SchedulerService.start()
|
||||||
|
|
|
@ -34,10 +34,10 @@ class RecipeCategoryController(BaseUserController):
|
||||||
def mixins(self):
|
def mixins(self):
|
||||||
return HttpRepo(self.repo, self.deps.logger)
|
return HttpRepo(self.repo, self.deps.logger)
|
||||||
|
|
||||||
@router.get("", response_model=list[CategorySummary])
|
@router.get("", response_model=list[RecipeCategory])
|
||||||
def get_all(self):
|
def get_all(self):
|
||||||
"""Returns a list of available categories in the database"""
|
"""Returns a list of available categories in the database"""
|
||||||
return self.repo.get_all(override_schema=CategorySummary)
|
return self.repo.get_all(override_schema=RecipeCategory)
|
||||||
|
|
||||||
@router.post("", status_code=201)
|
@router.post("", status_code=201)
|
||||||
def create_one(self, category: CategoryIn):
|
def create_one(self, category: CategoryIn):
|
||||||
|
|
|
@ -23,14 +23,16 @@ from mealie.routes._base import BaseUserController, controller
|
||||||
from mealie.routes._base.mixins import HttpRepo
|
from mealie.routes._base.mixins import HttpRepo
|
||||||
from mealie.routes._base.routers import UserAPIRouter
|
from mealie.routes._base.routers import UserAPIRouter
|
||||||
from mealie.schema.query import GetAll
|
from mealie.schema.query import GetAll
|
||||||
from mealie.schema.recipe import CreateRecipeByUrl, Recipe, RecipeImageTypes
|
from mealie.schema.recipe import Recipe, RecipeImageTypes, ScrapeRecipe
|
||||||
from mealie.schema.recipe.recipe import CreateRecipe, CreateRecipeByUrlBulk, RecipeSummary
|
from mealie.schema.recipe.recipe import CreateRecipe, CreateRecipeByUrlBulk, RecipeSummary
|
||||||
from mealie.schema.recipe.recipe_asset import RecipeAsset
|
from mealie.schema.recipe.recipe_asset import RecipeAsset
|
||||||
|
from mealie.schema.recipe.recipe_scraper import ScrapeRecipeTest
|
||||||
from mealie.schema.response.responses import ErrorResponse
|
from mealie.schema.response.responses import ErrorResponse
|
||||||
from mealie.schema.server.tasks import ServerTaskNames
|
from mealie.schema.server.tasks import ServerTaskNames
|
||||||
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
||||||
from mealie.services.recipe.recipe_service import RecipeService
|
from mealie.services.recipe.recipe_service import RecipeService
|
||||||
from mealie.services.recipe.template_service import TemplateService
|
from mealie.services.recipe.template_service import TemplateService
|
||||||
|
from mealie.services.scraper.scraped_extras import ScraperContext
|
||||||
from mealie.services.scraper.scraper import create_from_url
|
from mealie.services.scraper.scraper import create_from_url
|
||||||
from mealie.services.scraper.scraper_strategies import RecipeScraperPackage
|
from mealie.services.scraper.scraper_strategies import RecipeScraperPackage
|
||||||
from mealie.services.server_tasks.background_executory import BackgroundExecutor
|
from mealie.services.server_tasks.background_executory import BackgroundExecutor
|
||||||
|
@ -141,9 +143,15 @@ class RecipeController(BaseRecipeController):
|
||||||
# URL Scraping Operations
|
# URL Scraping Operations
|
||||||
|
|
||||||
@router.post("/create-url", status_code=201, response_model=str)
|
@router.post("/create-url", status_code=201, response_model=str)
|
||||||
def parse_recipe_url(self, url: CreateRecipeByUrl):
|
def parse_recipe_url(self, req: ScrapeRecipe):
|
||||||
"""Takes in a URL and attempts to scrape data and load it into the database"""
|
"""Takes in a URL and attempts to scrape data and load it into the database"""
|
||||||
recipe = create_from_url(url.url)
|
recipe, extras = create_from_url(req.url)
|
||||||
|
|
||||||
|
if req.include_tags:
|
||||||
|
ctx = ScraperContext(self.user.id, self.group_id, self.repos)
|
||||||
|
|
||||||
|
recipe.tags = extras.use_tags(ctx) # type: ignore
|
||||||
|
|
||||||
return self.service.create_one(recipe).slug
|
return self.service.create_one(recipe).slug
|
||||||
|
|
||||||
@router.post("/create-url/bulk", status_code=202)
|
@router.post("/create-url/bulk", status_code=202)
|
||||||
|
@ -159,7 +167,7 @@ class RecipeController(BaseRecipeController):
|
||||||
|
|
||||||
for b in bulk.imports:
|
for b in bulk.imports:
|
||||||
try:
|
try:
|
||||||
recipe = create_from_url(b.url)
|
recipe, _ = create_from_url(b.url)
|
||||||
|
|
||||||
if b.tags:
|
if b.tags:
|
||||||
recipe.tags = b.tags
|
recipe.tags = b.tags
|
||||||
|
@ -184,7 +192,7 @@ class RecipeController(BaseRecipeController):
|
||||||
return {"details": "task has been started"}
|
return {"details": "task has been started"}
|
||||||
|
|
||||||
@router.post("/test-scrape-url")
|
@router.post("/test-scrape-url")
|
||||||
def test_parse_recipe_url(self, url: CreateRecipeByUrl):
|
def test_parse_recipe_url(self, url: ScrapeRecipeTest):
|
||||||
# Debugger should produce the same result as the scraper sees before cleaning
|
# Debugger should produce the same result as the scraper sees before cleaning
|
||||||
if scraped_data := RecipeScraperPackage(url.url).scrape_url():
|
if scraped_data := RecipeScraperPackage(url.url).scrape_url():
|
||||||
return scraped_data.schema.data
|
return scraped_data.schema.data
|
||||||
|
@ -264,7 +272,7 @@ class RecipeController(BaseRecipeController):
|
||||||
# Image and Assets
|
# Image and Assets
|
||||||
|
|
||||||
@router.post("/{slug}/image", tags=["Recipe: Images and Assets"])
|
@router.post("/{slug}/image", tags=["Recipe: Images and Assets"])
|
||||||
def scrape_image_url(self, slug: str, url: CreateRecipeByUrl):
|
def scrape_image_url(self, slug: str, url: ScrapeRecipe):
|
||||||
recipe = self.mixins.get_one(slug)
|
recipe = self.mixins.get_one(slug)
|
||||||
data_service = RecipeDataService(recipe.id)
|
data_service = RecipeDataService(recipe.id)
|
||||||
data_service.scrape_image(url.url)
|
data_service.scrape_image(url.url)
|
||||||
|
|
|
@ -8,6 +8,7 @@ from .recipe_image_types import *
|
||||||
from .recipe_ingredient import *
|
from .recipe_ingredient import *
|
||||||
from .recipe_notes import *
|
from .recipe_notes import *
|
||||||
from .recipe_nutrition import *
|
from .recipe_nutrition import *
|
||||||
|
from .recipe_scraper import *
|
||||||
from .recipe_settings import *
|
from .recipe_settings import *
|
||||||
from .recipe_share_token import * # type: ignore
|
from .recipe_share_token import * # type: ignore
|
||||||
from .recipe_step import *
|
from .recipe_step import *
|
||||||
|
|
|
@ -41,13 +41,6 @@ class RecipeTool(RecipeTag):
|
||||||
on_hand: bool = False
|
on_hand: bool = False
|
||||||
|
|
||||||
|
|
||||||
class CreateRecipeByUrl(BaseModel):
|
|
||||||
url: str
|
|
||||||
|
|
||||||
class Config:
|
|
||||||
schema_extra = {"example": {"url": "https://myfavoriterecipes.com/recipes"}}
|
|
||||||
|
|
||||||
|
|
||||||
class CreateRecipeBulk(BaseModel):
|
class CreateRecipeBulk(BaseModel):
|
||||||
url: str
|
url: str
|
||||||
categories: list[RecipeCategory] = None
|
categories: list[RecipeCategory] = None
|
||||||
|
@ -140,21 +133,21 @@ class Recipe(RecipeSummary):
|
||||||
if not self.id:
|
if not self.id:
|
||||||
raise ValueError("Recipe has no ID")
|
raise ValueError("Recipe has no ID")
|
||||||
|
|
||||||
dir = app_dirs.RECIPE_DATA_DIR.joinpath(str(self.id))
|
folder = app_dirs.RECIPE_DATA_DIR.joinpath(str(self.id))
|
||||||
dir.mkdir(exist_ok=True, parents=True)
|
folder.mkdir(exist_ok=True, parents=True)
|
||||||
return dir
|
return folder
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def asset_dir(self) -> Path:
|
def asset_dir(self) -> Path:
|
||||||
dir = self.directory.joinpath("assets")
|
folder = self.directory.joinpath("assets")
|
||||||
dir.mkdir(exist_ok=True, parents=True)
|
folder.mkdir(exist_ok=True, parents=True)
|
||||||
return dir
|
return folder
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def image_dir(self) -> Path:
|
def image_dir(self) -> Path:
|
||||||
dir = self.directory.joinpath("images")
|
folder = self.directory.joinpath("images")
|
||||||
dir.mkdir(exist_ok=True, parents=True)
|
folder.mkdir(exist_ok=True, parents=True)
|
||||||
return dir
|
return folder
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
orm_mode = True
|
orm_mode = True
|
||||||
|
|
|
@ -28,6 +28,7 @@ class CategoryBase(CategoryIn):
|
||||||
|
|
||||||
class CategoryOut(CategoryBase):
|
class CategoryOut(CategoryBase):
|
||||||
slug: str
|
slug: str
|
||||||
|
group_id: UUID4
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
orm_mode = True
|
orm_mode = True
|
||||||
|
|
18
mealie/schema/recipe/recipe_scraper.py
Normal file
18
mealie/schema/recipe/recipe_scraper.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
from mealie.schema._mealie.mealie_model import MealieModel
|
||||||
|
|
||||||
|
|
||||||
|
class ScrapeRecipeTest(MealieModel):
|
||||||
|
url: str
|
||||||
|
|
||||||
|
|
||||||
|
class ScrapeRecipe(MealieModel):
|
||||||
|
url: str
|
||||||
|
include_tags: bool = False
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
schema_extra = {
|
||||||
|
"example": {
|
||||||
|
"url": "https://myfavoriterecipes.com/recipes",
|
||||||
|
"includeTags": True,
|
||||||
|
},
|
||||||
|
}
|
|
@ -91,6 +91,10 @@ class RecipeService(BaseService):
|
||||||
additional_attrs["user_id"] = user.id
|
additional_attrs["user_id"] = user.id
|
||||||
additional_attrs["group_id"] = user.group_id
|
additional_attrs["group_id"] = user.group_id
|
||||||
|
|
||||||
|
if additional_attrs.get("tags"):
|
||||||
|
for i in range(len(additional_attrs.get("tags"))):
|
||||||
|
additional_attrs["tags"][i]["group_id"] = user.group_id
|
||||||
|
|
||||||
if not additional_attrs.get("recipe_ingredient"):
|
if not additional_attrs.get("recipe_ingredient"):
|
||||||
additional_attrs["recipe_ingredient"] = [RecipeIngredient(note=ingredient_note)]
|
additional_attrs["recipe_ingredient"] = [RecipeIngredient(note=ingredient_note)]
|
||||||
|
|
||||||
|
|
|
@ -297,3 +297,32 @@ def pretty_print_timedelta(t: timedelta, max_components=None, max_decimal_places
|
||||||
if out_list == []:
|
if out_list == []:
|
||||||
return "none"
|
return "none"
|
||||||
return " ".join(out_list)
|
return " ".join(out_list)
|
||||||
|
|
||||||
|
|
||||||
|
def clean_tags(data: str | list[str]) -> list[str]:
|
||||||
|
"""
|
||||||
|
Gets keywords as a list or natural language list and returns them into a list of strings of individual tags
|
||||||
|
"""
|
||||||
|
if data is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
if isinstance(data, list):
|
||||||
|
all_str = True
|
||||||
|
i = 0
|
||||||
|
while all_str and i < len(data):
|
||||||
|
all_str = isinstance(data[i], str)
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
|
if all_str:
|
||||||
|
return data
|
||||||
|
return []
|
||||||
|
|
||||||
|
if isinstance(data, str):
|
||||||
|
tag_list = data.split(",")
|
||||||
|
|
||||||
|
for i in range(len(tag_list)):
|
||||||
|
tag_list[i] = tag_list[i].strip().capitalize()
|
||||||
|
|
||||||
|
return tag_list
|
||||||
|
|
||||||
|
return []
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from mealie.schema.recipe.recipe import Recipe
|
from mealie.schema.recipe.recipe import Recipe
|
||||||
|
from mealie.services.scraper.scraped_extras import ScrapedExtras
|
||||||
|
|
||||||
from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage
|
from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage
|
||||||
|
|
||||||
|
@ -20,16 +21,16 @@ class RecipeScraper:
|
||||||
|
|
||||||
self.scrapers = scrapers
|
self.scrapers = scrapers
|
||||||
|
|
||||||
def scrape(self, url: str) -> Recipe | None:
|
def scrape(self, url: str) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||||
"""
|
"""
|
||||||
Scrapes a recipe from the web.
|
Scrapes a recipe from the web.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
for scraper_type in self.scrapers:
|
for scraper_type in self.scrapers:
|
||||||
scraper = scraper_type(url)
|
scraper = scraper_type(url)
|
||||||
recipe = scraper.parse()
|
recipe, extras = scraper.parse()
|
||||||
|
|
||||||
if recipe is not None:
|
if recipe is not None:
|
||||||
return recipe
|
return recipe, extras
|
||||||
|
|
||||||
return None
|
return None, None
|
||||||
|
|
49
mealie/services/scraper/scraped_extras.py
Normal file
49
mealie/services/scraper/scraped_extras.py
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from pydantic import UUID4
|
||||||
|
from slugify import slugify
|
||||||
|
|
||||||
|
from mealie.repos.repository_factory import AllRepositories
|
||||||
|
from mealie.schema.recipe import TagOut
|
||||||
|
from mealie.schema.recipe.recipe_category import TagSave
|
||||||
|
|
||||||
|
|
||||||
|
class NoContextException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class ScraperContext:
|
||||||
|
user_id: UUID4
|
||||||
|
group_id: UUID4
|
||||||
|
repos: AllRepositories
|
||||||
|
|
||||||
|
|
||||||
|
class ScrapedExtras:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._tags: list[str] = []
|
||||||
|
|
||||||
|
def set_tags(self, tags: list[str]) -> None:
|
||||||
|
self._tags = tags
|
||||||
|
|
||||||
|
def use_tags(self, ctx: ScraperContext) -> list[TagOut]:
|
||||||
|
if not self._tags:
|
||||||
|
return []
|
||||||
|
|
||||||
|
repo = ctx.repos.tags.by_group(ctx.group_id)
|
||||||
|
|
||||||
|
tags = []
|
||||||
|
for tag in self._tags:
|
||||||
|
slugify_tag = slugify(tag)
|
||||||
|
|
||||||
|
# Check if tag exists
|
||||||
|
if db_tag := repo.get_one(slugify_tag, "slug"):
|
||||||
|
tags.append(db_tag)
|
||||||
|
continue
|
||||||
|
|
||||||
|
save_data = TagSave(name=tag, group_id=ctx.group_id)
|
||||||
|
db_tag = repo.create(save_data)
|
||||||
|
|
||||||
|
tags.append(db_tag)
|
||||||
|
|
||||||
|
return tags
|
|
@ -8,6 +8,7 @@ from mealie.core.root_logger import get_logger
|
||||||
from mealie.pkgs import cache
|
from mealie.pkgs import cache
|
||||||
from mealie.schema.recipe import Recipe
|
from mealie.schema.recipe import Recipe
|
||||||
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
||||||
|
from mealie.services.scraper.scraped_extras import ScrapedExtras
|
||||||
|
|
||||||
from .recipe_scraper import RecipeScraper
|
from .recipe_scraper import RecipeScraper
|
||||||
|
|
||||||
|
@ -18,7 +19,7 @@ class ParserErrors(str, Enum):
|
||||||
CONNECTION_ERROR = "CONNECTION_ERROR"
|
CONNECTION_ERROR = "CONNECTION_ERROR"
|
||||||
|
|
||||||
|
|
||||||
def create_from_url(url: str) -> Recipe:
|
def create_from_url(url: str) -> tuple[Recipe, ScrapedExtras]:
|
||||||
"""Main entry point for generating a recipe from a URL. Pass in a URL and
|
"""Main entry point for generating a recipe from a URL. Pass in a URL and
|
||||||
a Recipe object will be returned if successful.
|
a Recipe object will be returned if successful.
|
||||||
|
|
||||||
|
@ -29,12 +30,12 @@ def create_from_url(url: str) -> Recipe:
|
||||||
Recipe: Recipe Object
|
Recipe: Recipe Object
|
||||||
"""
|
"""
|
||||||
scraper = RecipeScraper()
|
scraper = RecipeScraper()
|
||||||
new_recipe = scraper.scrape(url)
|
new_recipe, extras = scraper.scrape(url)
|
||||||
new_recipe.id = uuid4()
|
|
||||||
|
|
||||||
if not new_recipe:
|
if not new_recipe:
|
||||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
|
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
|
||||||
|
|
||||||
|
new_recipe.id = uuid4()
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
logger.info(f"Image {new_recipe.image}")
|
logger.info(f"Image {new_recipe.image}")
|
||||||
|
|
||||||
|
@ -52,4 +53,4 @@ def create_from_url(url: str) -> Recipe:
|
||||||
new_recipe.name = f"No Recipe Name Found - {str(uuid4())}"
|
new_recipe.name = f"No Recipe Name Found - {str(uuid4())}"
|
||||||
new_recipe.slug = slugify(new_recipe.name)
|
new_recipe.slug = slugify(new_recipe.name)
|
||||||
|
|
||||||
return new_recipe
|
return new_recipe, extras
|
||||||
|
|
|
@ -10,6 +10,7 @@ from w3lib.html import get_base_url
|
||||||
|
|
||||||
from mealie.core.root_logger import get_logger
|
from mealie.core.root_logger import get_logger
|
||||||
from mealie.schema.recipe.recipe import Recipe, RecipeStep
|
from mealie.schema.recipe.recipe import Recipe, RecipeStep
|
||||||
|
from mealie.services.scraper.scraped_extras import ScrapedExtras
|
||||||
|
|
||||||
from . import cleaner
|
from . import cleaner
|
||||||
|
|
||||||
|
@ -26,7 +27,7 @@ class ABCScraperStrategy(ABC):
|
||||||
self.url = url
|
self.url = url
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def parse(self) -> Recipe | None:
|
def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||||
"""Parse a recipe from a web URL.
|
"""Parse a recipe from a web URL.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -39,9 +40,11 @@ class ABCScraperStrategy(ABC):
|
||||||
|
|
||||||
|
|
||||||
class RecipeScraperPackage(ABCScraperStrategy):
|
class RecipeScraperPackage(ABCScraperStrategy):
|
||||||
def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> Recipe:
|
def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]:
|
||||||
def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None):
|
def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None):
|
||||||
value = default
|
value = default
|
||||||
|
|
||||||
|
if func_call:
|
||||||
try:
|
try:
|
||||||
value = func_call()
|
value = func_call()
|
||||||
except Exception:
|
except Exception:
|
||||||
|
@ -58,7 +61,7 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
||||||
|
|
||||||
return value
|
return value
|
||||||
|
|
||||||
def get_instructions() -> list[dict]:
|
def get_instructions() -> list[RecipeStep]:
|
||||||
instruction_as_text = try_get_default(
|
instruction_as_text = try_get_default(
|
||||||
scraped_data.instructions, "recipeInstructions", ["No Instructions Found"]
|
scraped_data.instructions, "recipeInstructions", ["No Instructions Found"]
|
||||||
)
|
)
|
||||||
|
@ -78,7 +81,11 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
||||||
None, "cookTime", None, cleaner.clean_time
|
None, "cookTime", None, cleaner.clean_time
|
||||||
)
|
)
|
||||||
|
|
||||||
return Recipe(
|
extras = ScrapedExtras()
|
||||||
|
|
||||||
|
extras.set_tags(try_get_default(None, "keywords", "", cleaner.clean_tags))
|
||||||
|
|
||||||
|
recipe = Recipe(
|
||||||
name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string),
|
name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string),
|
||||||
slug="",
|
slug="",
|
||||||
image=try_get_default(None, "image", None),
|
image=try_get_default(None, "image", None),
|
||||||
|
@ -93,6 +100,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
||||||
org_url=url,
|
org_url=url,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
return recipe, extras
|
||||||
|
|
||||||
def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None:
|
def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None:
|
||||||
try:
|
try:
|
||||||
scraped_schema = scrape_me(self.url)
|
scraped_schema = scrape_me(self.url)
|
||||||
|
@ -103,8 +112,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
||||||
self.logger.error("Recipe Scraper was unable to extract a recipe.")
|
self.logger.error("Recipe Scraper was unable to extract a recipe.")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
except ConnectionError:
|
except ConnectionError as e:
|
||||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": "CONNECTION_ERROR"})
|
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": "CONNECTION_ERROR"}) from e
|
||||||
|
|
||||||
# Check to see if the recipe is valid
|
# Check to see if the recipe is valid
|
||||||
try:
|
try:
|
||||||
|
@ -123,7 +132,7 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
||||||
self.logger.debug(f"Recipe Scraper [Package] was unable to extract a recipe from {self.url}")
|
self.logger.debug(f"Recipe Scraper [Package] was unable to extract a recipe from {self.url}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def parse(self) -> Recipe | None:
|
def parse(self):
|
||||||
"""
|
"""
|
||||||
Parse a recipe from a given url.
|
Parse a recipe from a given url.
|
||||||
"""
|
"""
|
||||||
|
@ -177,7 +186,7 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
|
||||||
"extras": [],
|
"extras": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
def parse(self) -> Recipe | None:
|
def parse(self):
|
||||||
"""
|
"""
|
||||||
Parse a recipe from a given url.
|
Parse a recipe from a given url.
|
||||||
"""
|
"""
|
||||||
|
@ -188,4 +197,4 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
|
||||||
if og_data is None:
|
if og_data is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return Recipe(**og_data)
|
return Recipe(**og_data), ScrapedExtras()
|
||||||
|
|
|
@ -22,6 +22,10 @@ html_sous_vide_shrimp = CWD / "html/sous-vide-shrimp.html"
|
||||||
|
|
||||||
html_jam_roly_poly_with_custard = CWD / "html/jam-roly-poly-with-custard.html"
|
html_jam_roly_poly_with_custard = CWD / "html/jam-roly-poly-with-custard.html"
|
||||||
|
|
||||||
|
html_nutty_umami_noodles_with_scallion_brown_butter_and_snow_peas_recipe = (
|
||||||
|
CWD / "html/nutty-umami-noodles-with-scallion-brown-butter-and-snow-peas-recipe.html"
|
||||||
|
)
|
||||||
|
|
||||||
html_taiwanese_three_cup_chicken_san_bei_gi_recipe = CWD / "html/taiwanese-three-cup-chicken-san-bei-gi-recipe.html"
|
html_taiwanese_three_cup_chicken_san_bei_gi_recipe = CWD / "html/taiwanese-three-cup-chicken-san-bei-gi-recipe.html"
|
||||||
|
|
||||||
html_detroit_style_pepperoni_pizza = CWD / "html/detroit-style-pepperoni-pizza.html"
|
html_detroit_style_pepperoni_pizza = CWD / "html/detroit-style-pepperoni-pizza.html"
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -130,7 +130,9 @@ def test_organizer_association(
|
||||||
# Get Recipe Data
|
# Get Recipe Data
|
||||||
response = api_client.get(routes.RoutesRecipe.item(slug), headers=unique_user.token)
|
response = api_client.get(routes.RoutesRecipe.item(slug), headers=unique_user.token)
|
||||||
as_json = response.json()
|
as_json = response.json()
|
||||||
as_json[recipe_key] = [{"id": item["id"], "name": item["name"], "slug": item["slug"]}]
|
as_json[recipe_key] = [
|
||||||
|
{"id": item["id"], "group_id": unique_user.group_id, "name": item["name"], "slug": item["slug"]}
|
||||||
|
]
|
||||||
|
|
||||||
# Update Recipe
|
# Update Recipe
|
||||||
response = api_client.put(routes.RoutesRecipe.item(slug), json=as_json, headers=unique_user.token)
|
response = api_client.put(routes.RoutesRecipe.item(slug), json=as_json, headers=unique_user.token)
|
||||||
|
@ -177,7 +179,9 @@ def test_organizer_get_by_slug(
|
||||||
for slug in recipe_slugs:
|
for slug in recipe_slugs:
|
||||||
response = api_client.get(routes.RoutesRecipe.item(slug), headers=unique_user.token)
|
response = api_client.get(routes.RoutesRecipe.item(slug), headers=unique_user.token)
|
||||||
as_json = response.json()
|
as_json = response.json()
|
||||||
as_json[recipe_key] = [{"id": item["id"], "name": item["name"], "slug": item["slug"]}]
|
as_json[recipe_key] = [
|
||||||
|
{"id": item["id"], "group_id": unique_user.group_id, "name": item["name"], "slug": item["slug"]}
|
||||||
|
]
|
||||||
|
|
||||||
response = api_client.put(routes.RoutesRecipe.item(slug), json=as_json, headers=unique_user.token)
|
response = api_client.put(routes.RoutesRecipe.item(slug), json=as_json, headers=unique_user.token)
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
|
|
|
@ -13,7 +13,7 @@ from slugify import slugify
|
||||||
from mealie.schema.recipe.recipe import RecipeCategory
|
from mealie.schema.recipe.recipe import RecipeCategory
|
||||||
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
||||||
from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph
|
from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph
|
||||||
from tests import utils
|
from tests import data, utils
|
||||||
from tests.utils.app_routes import AppRoutes
|
from tests.utils.app_routes import AppRoutes
|
||||||
from tests.utils.factories import random_string
|
from tests.utils.factories import random_string
|
||||||
from tests.utils.fixture_schemas import TestUser
|
from tests.utils.fixture_schemas import TestUser
|
||||||
|
@ -83,12 +83,77 @@ def test_create_by_url(
|
||||||
|
|
||||||
api_client.delete(api_routes.recipes_recipe_slug(recipe_data.expected_slug), headers=unique_user.token)
|
api_client.delete(api_routes.recipes_recipe_slug(recipe_data.expected_slug), headers=unique_user.token)
|
||||||
|
|
||||||
response = api_client.post(api_routes.recipes_create_url, json={"url": recipe_data.url}, headers=unique_user.token)
|
response = api_client.post(
|
||||||
|
api_routes.recipes_create_url, json={"url": recipe_data.url, "include_tags": False}, headers=unique_user.token
|
||||||
|
)
|
||||||
|
|
||||||
assert response.status_code == 201
|
assert response.status_code == 201
|
||||||
assert json.loads(response.text) == recipe_data.expected_slug
|
assert json.loads(response.text) == recipe_data.expected_slug
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_by_url_with_tags(
|
||||||
|
api_client: TestClient,
|
||||||
|
api_routes: AppRoutes,
|
||||||
|
unique_user: TestUser,
|
||||||
|
monkeypatch: MonkeyPatch,
|
||||||
|
):
|
||||||
|
html_file = data.html_nutty_umami_noodles_with_scallion_brown_butter_and_snow_peas_recipe
|
||||||
|
|
||||||
|
# Override init function for AbstractScraper to use the test html instead of calling the url
|
||||||
|
monkeypatch.setattr(
|
||||||
|
AbstractScraper,
|
||||||
|
"__init__",
|
||||||
|
get_init(html_file),
|
||||||
|
)
|
||||||
|
# Override the get_html method of the RecipeScraperOpenGraph to return the test html
|
||||||
|
monkeypatch.setattr(
|
||||||
|
RecipeScraperOpenGraph,
|
||||||
|
"get_html",
|
||||||
|
open_graph_override(html_file.read_text()),
|
||||||
|
)
|
||||||
|
# Skip image downloader
|
||||||
|
monkeypatch.setattr(
|
||||||
|
RecipeDataService,
|
||||||
|
"scrape_image",
|
||||||
|
lambda *_: "TEST_IMAGE",
|
||||||
|
)
|
||||||
|
|
||||||
|
response = api_client.post(
|
||||||
|
api_routes.recipes_create_url,
|
||||||
|
json={"url": "https://google.com", "include_tags": True}, # URL Doesn't matter
|
||||||
|
headers=unique_user.token,
|
||||||
|
)
|
||||||
|
assert response.status_code == 201
|
||||||
|
slug = "nutty-umami-noodles-with-scallion-brown-butter-and-snow-peas"
|
||||||
|
|
||||||
|
# Get the recipe
|
||||||
|
response = api_client.get(api_routes.recipes_recipe_slug(slug), headers=unique_user.token)
|
||||||
|
assert response.status_code == 200
|
||||||
|
|
||||||
|
# Verifiy the tags are present
|
||||||
|
expected_tags = {
|
||||||
|
"sauté",
|
||||||
|
"pea",
|
||||||
|
"noodle",
|
||||||
|
"udon noodle",
|
||||||
|
"ramen noodle",
|
||||||
|
"dinner",
|
||||||
|
"main",
|
||||||
|
"vegetarian",
|
||||||
|
"easy",
|
||||||
|
"quick",
|
||||||
|
"weeknight meals",
|
||||||
|
"web",
|
||||||
|
}
|
||||||
|
|
||||||
|
recipe = json.loads(response.text)
|
||||||
|
|
||||||
|
assert len(recipe["tags"]) == len(expected_tags)
|
||||||
|
|
||||||
|
for tag in recipe["tags"]:
|
||||||
|
assert tag["name"] in expected_tags
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("recipe_data", recipe_test_data)
|
@pytest.mark.parametrize("recipe_data", recipe_test_data)
|
||||||
def test_read_update(
|
def test_read_update(
|
||||||
api_client: TestClient,
|
api_client: TestClient,
|
||||||
|
|
|
@ -17,7 +17,7 @@ and then use this test case by removing the `@pytest.mark.skip` and than testing
|
||||||
@pytest.mark.skipif(True, reason="Long Running API Test - manually run when updating the parser")
|
@pytest.mark.skipif(True, reason="Long Running API Test - manually run when updating the parser")
|
||||||
@pytest.mark.parametrize("recipe_test_data", test_cases)
|
@pytest.mark.parametrize("recipe_test_data", test_cases)
|
||||||
def test_recipe_parser(recipe_test_data: RecipeSiteTestCase):
|
def test_recipe_parser(recipe_test_data: RecipeSiteTestCase):
|
||||||
recipe = scraper.create_from_url(recipe_test_data.url)
|
recipe, _ = scraper.create_from_url(recipe_test_data.url)
|
||||||
|
|
||||||
assert recipe.slug == recipe_test_data.expected_slug
|
assert recipe.slug == recipe_test_data.expected_slug
|
||||||
assert len(recipe.recipe_instructions) == recipe_test_data.num_steps
|
assert len(recipe.recipe_instructions) == recipe_test_data.num_steps
|
||||||
|
|
|
@ -1,16 +1,17 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from tests import data as test_data
|
from tests import data as test_data
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass(slots=True)
|
||||||
class RecipeSiteTestCase:
|
class RecipeSiteTestCase:
|
||||||
url: str
|
url: str
|
||||||
html: str
|
html: str
|
||||||
expected_slug: str
|
expected_slug: str
|
||||||
num_ingredients: int
|
num_ingredients: int
|
||||||
num_steps: int
|
num_steps: int
|
||||||
html_file: str
|
html_file: Path
|
||||||
|
|
||||||
|
|
||||||
def get_recipe_test_cases():
|
def get_recipe_test_cases():
|
||||||
|
|
Loading…
Reference in a new issue