Feature/import keywords as tags (#1170)

* feat: import original keywords as tags * remove cached env * Update frontend api types * fix: Issues with previous tag scraping implementation * Update category handling in backend * Update backend tests to include group_id * Correct type check * Update create-url interface * Improve tag cleaner list support * remove builtin name shadowing * update type annotations * test include tags scraper * implement scaper context for optional data * readd cache venv * use continue instead of break * remove test callback Co-authored-by: Miroito <alban.vachette@gmail.com>
2022-04-23 12:23:12 -08:00 · 2022-04-23 12:23:12 -08:00 · c696dee320
commit c696dee320
parent 75c535fb2e
27 changed files with 281 additions and 61 deletions
--- a/frontend/api/class-interfaces/recipes/recipe.ts
+++ b/frontend/api/class-interfaces/recipes/recipe.ts
@ -77,8 +77,8 @@ export class RecipeAPI extends BaseCRUDAPI<Recipe, CreateRecipe> {
    return await this.requests.post<Recipe | null>(routes.recipesTestScrapeUrl, { url });
  }

-  async createOneByUrl(url: string) {
-    return await this.requests.post<string>(routes.recipesCreateUrl, { url });
+  async createOneByUrl(url: string, includeTags: boolean) {
+    return await this.requests.post<string>(routes.recipesCreateUrl, { url, includeTags });
  }

  async createManyByUrl(payload: BulkCreatePayload) {
--- a/frontend/pages/recipe/create.vue
+++ b/frontend/pages/recipe/create.vue
@ -18,7 +18,7 @@
        <v-tabs-items v-model="tab" class="mt-2">
          <!-- Create From URL -->
          <v-tab-item value="url" eager>
-            <v-form ref="domUrlForm" @submit.prevent="createByUrl(recipeUrl)">
+            <v-form ref="domUrlForm" @submit.prevent="createByUrl(recipeUrl, importKeywordsAsTags)">
              <v-card flat>
                <v-card-title class="headline"> Scrape Recipe </v-card-title>
                <v-card-text>
@ -38,6 +38,8 @@
                    :hint="$t('new-recipe.url-form-hint')"
                    persistent-hint
                  ></v-text-field>
+                  <v-checkbox v-model="importKeywordsAsTags" label="Import original keywords as tags">
+                  </v-checkbox>
                </v-card-text>
                <v-card-actions class="justify-center">
                  <div style="width: 250px">
@ -409,20 +411,36 @@ export default defineComponent({
        }
      },
      get() {
-        return route.value.query.recipe_import_url as string;
+        return route.value.query.recipe_import_url as string | null;
      },
    });

+    const importKeywordsAsTags = computed({
+      get() {
+        return route.value.query.import_keywords_as_tags === "1";
+      },
+      set(keywordsAsTags: boolean) {
+        let import_keywords_as_tags = "0"
+        if (keywordsAsTags) {
+          import_keywords_as_tags = "1"
+        }
+        router.replace({query: {...route.value.query, import_keywords_as_tags}})
+      }
+    });
+
    onMounted(() => {
      if (!recipeUrl.value) {
        return;
      }

      if (recipeUrl.value.includes("https")) {
-        createByUrl(recipeUrl.value);
+        createByUrl(recipeUrl.value, importKeywordsAsTags.value);
+
      }
    });

+
+
    // ===================================================
    // Recipe Debug URL Scraper

@ -447,16 +465,18 @@ export default defineComponent({
    // Recipe URL Import
    const domUrlForm = ref<VForm | null>(null);

-    async function createByUrl(url: string | null) {
+    async function createByUrl(url: string, importKeywordsAsTags: boolean) {
+
      if (url === null) {
        return;
      }
+
      if (!domUrlForm.value?.validate() || url === "") {
        console.log("Invalid URL", url);
        return;
      }
      state.loading = true;
-      const { response } = await api.recipes.createOneByUrl(url);
+      const { response } = await api.recipes.createOneByUrl(url, importKeywordsAsTags);
      handleResponse(response);
    }

@ -523,6 +543,7 @@ export default defineComponent({
      allCategories,
      tab,
      recipeUrl,
+      importKeywordsAsTags,
      bulkCreate,
      bulkUrls,
      lockBulkImport,
--- a/frontend/types/api-types/admin.ts
+++ b/frontend/types/api-types/admin.ts
@ -103,11 +103,13 @@ export interface RecipeSummary {
 }
 export interface RecipeCategory {
  id?: string;
+  groupId: string;
  name: string;
  slug: string;
 }
 export interface RecipeTag {
  id?: string;
+  groupId: string;
  name: string;
  slug: string;
 }
--- a/frontend/types/api-types/cookbook.ts
+++ b/frontend/types/api-types/cookbook.ts
@ -89,11 +89,13 @@ export interface RecipeSummary {
 }
 export interface RecipeCategory {
  id?: string;
+  groupId: string;
  name: string;
  slug: string;
 }
 export interface RecipeTag {
  id?: string;
+  groupId: string;
  name: string;
  slug: string;
 }
--- a/frontend/types/api-types/group.ts
+++ b/frontend/types/api-types/group.ts
@ -258,11 +258,13 @@ export interface RecipeSummary {
 }
 export interface RecipeCategory {
  id?: string;
+  groupId: string;
  name: string;
  slug: string;
 }
 export interface RecipeTag {
  id?: string;
+  groupId: string;
  name: string;
  slug: string;
 }
--- a/frontend/types/api-types/meal-plan.ts
+++ b/frontend/types/api-types/meal-plan.ts
@ -119,11 +119,13 @@ export interface RecipeSummary {
 }
 export interface RecipeCategory {
  id?: string;
+  groupId: string;
  name: string;
  slug: string;
 }
 export interface RecipeTag {
  id?: string;
+  groupId: string;
  name: string;
  slug: string;
 }
--- a/frontend/types/api-types/recipe.ts
+++ b/frontend/types/api-types/recipe.ts
@ -68,11 +68,13 @@ export interface CreateRecipeBulk {
 }
 export interface RecipeCategory {
  id?: string;
+  groupId: string;
  name: string;
  slug: string;
 }
 export interface RecipeTag {
  id?: string;
+  groupId: string;
  name: string;
  slug: string;
 }
--- a/frontend/types/api-types/user.ts
+++ b/frontend/types/api-types/user.ts
@ -132,11 +132,13 @@ export interface RecipeSummary {
 }
 export interface RecipeCategory {
  id?: string;
+  groupId: string;
  name: string;
  slug: string;
 }
 export interface RecipeTag {
  id?: string;
+  groupId: string;
  name: string;
  slug: string;
 }
--- a/mealie/app.py
+++ b/mealie/app.py
@ -57,8 +57,6 @@ async def start_scheduler():
        tasks.purge_group_data_exports,
    )

-    SchedulerRegistry.register_minutely(lambda: logger.info("Scheduler tick"))
-
    SchedulerRegistry.print_jobs()

    await SchedulerService.start()
--- a/mealie/routes/organizers/controller_categories.py
+++ b/mealie/routes/organizers/controller_categories.py
@ -34,10 +34,10 @@ class RecipeCategoryController(BaseUserController):
    def mixins(self):
        return HttpRepo(self.repo, self.deps.logger)

-    @router.get("", response_model=list[CategorySummary])
+    @router.get("", response_model=list[RecipeCategory])
    def get_all(self):
        """Returns a list of available categories in the database"""
-        return self.repo.get_all(override_schema=CategorySummary)
+        return self.repo.get_all(override_schema=RecipeCategory)

    @router.post("", status_code=201)
    def create_one(self, category: CategoryIn):
--- a/mealie/routes/recipe/recipe_crud_routes.py
+++ b/mealie/routes/recipe/recipe_crud_routes.py
@ -23,14 +23,16 @@ from mealie.routes._base import BaseUserController, controller
 from mealie.routes._base.mixins import HttpRepo
 from mealie.routes._base.routers import UserAPIRouter
 from mealie.schema.query import GetAll
-from mealie.schema.recipe import CreateRecipeByUrl, Recipe, RecipeImageTypes
+from mealie.schema.recipe import Recipe, RecipeImageTypes, ScrapeRecipe
 from mealie.schema.recipe.recipe import CreateRecipe, CreateRecipeByUrlBulk, RecipeSummary
 from mealie.schema.recipe.recipe_asset import RecipeAsset
+from mealie.schema.recipe.recipe_scraper import ScrapeRecipeTest
 from mealie.schema.response.responses import ErrorResponse
 from mealie.schema.server.tasks import ServerTaskNames
 from mealie.services.recipe.recipe_data_service import RecipeDataService
 from mealie.services.recipe.recipe_service import RecipeService
 from mealie.services.recipe.template_service import TemplateService
+from mealie.services.scraper.scraped_extras import ScraperContext
 from mealie.services.scraper.scraper import create_from_url
 from mealie.services.scraper.scraper_strategies import RecipeScraperPackage
 from mealie.services.server_tasks.background_executory import BackgroundExecutor
@ -141,9 +143,15 @@ class RecipeController(BaseRecipeController):
    # URL Scraping Operations

    @router.post("/create-url", status_code=201, response_model=str)
-    def parse_recipe_url(self, url: CreateRecipeByUrl):
+    def parse_recipe_url(self, req: ScrapeRecipe):
        """Takes in a URL and attempts to scrape data and load it into the database"""
-        recipe = create_from_url(url.url)
+        recipe, extras = create_from_url(req.url)
+
+        if req.include_tags:
+            ctx = ScraperContext(self.user.id, self.group_id, self.repos)
+
+            recipe.tags = extras.use_tags(ctx)  # type: ignore
+
        return self.service.create_one(recipe).slug

    @router.post("/create-url/bulk", status_code=202)
@ -159,7 +167,7 @@ class RecipeController(BaseRecipeController):

            for b in bulk.imports:
                try:
-                    recipe = create_from_url(b.url)
+                    recipe, _ = create_from_url(b.url)

                    if b.tags:
                        recipe.tags = b.tags
@ -184,7 +192,7 @@ class RecipeController(BaseRecipeController):
        return {"details": "task has been started"}

    @router.post("/test-scrape-url")
-    def test_parse_recipe_url(self, url: CreateRecipeByUrl):
+    def test_parse_recipe_url(self, url: ScrapeRecipeTest):
        # Debugger should produce the same result as the scraper sees before cleaning
        if scraped_data := RecipeScraperPackage(url.url).scrape_url():
            return scraped_data.schema.data
@ -264,7 +272,7 @@ class RecipeController(BaseRecipeController):
    # Image and Assets

    @router.post("/{slug}/image", tags=["Recipe: Images and Assets"])
-    def scrape_image_url(self, slug: str, url: CreateRecipeByUrl):
+    def scrape_image_url(self, slug: str, url: ScrapeRecipe):
        recipe = self.mixins.get_one(slug)
        data_service = RecipeDataService(recipe.id)
        data_service.scrape_image(url.url)
--- a/mealie/schema/recipe/init.py
+++ b/mealie/schema/recipe/init.py
@ -8,6 +8,7 @@ from .recipe_image_types import *
 from .recipe_ingredient import *
 from .recipe_notes import *
 from .recipe_nutrition import *
+from .recipe_scraper import *
 from .recipe_settings import *
 from .recipe_share_token import *  # type: ignore
 from .recipe_step import *
--- a/mealie/schema/recipe/recipe.py
+++ b/mealie/schema/recipe/recipe.py
@ -41,13 +41,6 @@ class RecipeTool(RecipeTag):
    on_hand: bool = False


-class CreateRecipeByUrl(BaseModel):
-    url: str
-
-    class Config:
-        schema_extra = {"example": {"url": "https://myfavoriterecipes.com/recipes"}}
-
-
 class CreateRecipeBulk(BaseModel):
    url: str
    categories: list[RecipeCategory] = None
@ -140,21 +133,21 @@ class Recipe(RecipeSummary):
        if not self.id:
            raise ValueError("Recipe has no ID")

-        dir = app_dirs.RECIPE_DATA_DIR.joinpath(str(self.id))
-        dir.mkdir(exist_ok=True, parents=True)
-        return dir
+        folder = app_dirs.RECIPE_DATA_DIR.joinpath(str(self.id))
+        folder.mkdir(exist_ok=True, parents=True)
+        return folder

    @property
    def asset_dir(self) -> Path:
-        dir = self.directory.joinpath("assets")
-        dir.mkdir(exist_ok=True, parents=True)
-        return dir
+        folder = self.directory.joinpath("assets")
+        folder.mkdir(exist_ok=True, parents=True)
+        return folder

    @property
    def image_dir(self) -> Path:
-        dir = self.directory.joinpath("images")
-        dir.mkdir(exist_ok=True, parents=True)
-        return dir
+        folder = self.directory.joinpath("images")
+        folder.mkdir(exist_ok=True, parents=True)
+        return folder

    class Config:
        orm_mode = True
--- a/mealie/schema/recipe/recipe_category.py
+++ b/mealie/schema/recipe/recipe_category.py
@ -28,6 +28,7 @@ class CategoryBase(CategoryIn):

 class CategoryOut(CategoryBase):
    slug: str
+    group_id: UUID4

    class Config:
        orm_mode = True
--- a/mealie/schema/recipe/recipe_scraper.py
+++ b/mealie/schema/recipe/recipe_scraper.py
@ -0,0 +1,18 @@
+from mealie.schema._mealie.mealie_model import MealieModel
+
+
+class ScrapeRecipeTest(MealieModel):
+    url: str
+
+
+class ScrapeRecipe(MealieModel):
+    url: str
+    include_tags: bool = False
+
+    class Config:
+        schema_extra = {
+            "example": {
+                "url": "https://myfavoriterecipes.com/recipes",
+                "includeTags": True,
+            },
+        }
--- a/mealie/services/recipe/recipe_service.py
+++ b/mealie/services/recipe/recipe_service.py
@ -91,6 +91,10 @@ class RecipeService(BaseService):
        additional_attrs["user_id"] = user.id
        additional_attrs["group_id"] = user.group_id

+        if additional_attrs.get("tags"):
+            for i in range(len(additional_attrs.get("tags"))):
+                additional_attrs["tags"][i]["group_id"] = user.group_id
+
        if not additional_attrs.get("recipe_ingredient"):
            additional_attrs["recipe_ingredient"] = [RecipeIngredient(note=ingredient_note)]

--- a/mealie/services/scraper/cleaner.py
+++ b/mealie/services/scraper/cleaner.py
@ -297,3 +297,32 @@ def pretty_print_timedelta(t: timedelta, max_components=None, max_decimal_places
    if out_list == []:
        return "none"
    return " ".join(out_list)
+
+
+def clean_tags(data: str | list[str]) -> list[str]:
+    """
+    Gets keywords as a list or natural language list and returns them into a list of strings of individual tags
+    """
+    if data is None:
+        return []
+
+    if isinstance(data, list):
+        all_str = True
+        i = 0
+        while all_str and i < len(data):
+            all_str = isinstance(data[i], str)
+            i = i + 1
+
+        if all_str:
+            return data
+        return []
+
+    if isinstance(data, str):
+        tag_list = data.split(",")
+
+        for i in range(len(tag_list)):
+            tag_list[i] = tag_list[i].strip().capitalize()
+
+        return tag_list
+
+    return []
--- a/mealie/services/scraper/recipe_scraper.py
+++ b/mealie/services/scraper/recipe_scraper.py
@ -1,4 +1,5 @@
 from mealie.schema.recipe.recipe import Recipe
+from mealie.services.scraper.scraped_extras import ScrapedExtras

 from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage

@ -20,16 +21,16 @@ class RecipeScraper:

        self.scrapers = scrapers

-    def scrape(self, url: str) -> Recipe | None:
+    def scrape(self, url: str) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
        """
        Scrapes a recipe from the web.
        """

        for scraper_type in self.scrapers:
            scraper = scraper_type(url)
-            recipe = scraper.parse()
+            recipe, extras = scraper.parse()

            if recipe is not None:
-                return recipe
+                return recipe, extras

-        return None
+        return None, None
--- a/mealie/services/scraper/scraped_extras.py
+++ b/mealie/services/scraper/scraped_extras.py
@ -0,0 +1,49 @@
+from dataclasses import dataclass
+
+from pydantic import UUID4
+from slugify import slugify
+
+from mealie.repos.repository_factory import AllRepositories
+from mealie.schema.recipe import TagOut
+from mealie.schema.recipe.recipe_category import TagSave
+
+
+class NoContextException(Exception):
+    pass
+
+
+@dataclass(slots=True)
+class ScraperContext:
+    user_id: UUID4
+    group_id: UUID4
+    repos: AllRepositories
+
+
+class ScrapedExtras:
+    def __init__(self) -> None:
+        self._tags: list[str] = []
+
+    def set_tags(self, tags: list[str]) -> None:
+        self._tags = tags
+
+    def use_tags(self, ctx: ScraperContext) -> list[TagOut]:
+        if not self._tags:
+            return []
+
+        repo = ctx.repos.tags.by_group(ctx.group_id)
+
+        tags = []
+        for tag in self._tags:
+            slugify_tag = slugify(tag)
+
+            # Check if tag exists
+            if db_tag := repo.get_one(slugify_tag, "slug"):
+                tags.append(db_tag)
+                continue
+
+            save_data = TagSave(name=tag, group_id=ctx.group_id)
+            db_tag = repo.create(save_data)
+
+            tags.append(db_tag)
+
+        return tags
--- a/mealie/services/scraper/scraper.py
+++ b/mealie/services/scraper/scraper.py
@ -8,6 +8,7 @@ from mealie.core.root_logger import get_logger
 from mealie.pkgs import cache
 from mealie.schema.recipe import Recipe
 from mealie.services.recipe.recipe_data_service import RecipeDataService
+from mealie.services.scraper.scraped_extras import ScrapedExtras

 from .recipe_scraper import RecipeScraper

@ -18,7 +19,7 @@ class ParserErrors(str, Enum):
    CONNECTION_ERROR = "CONNECTION_ERROR"


-def create_from_url(url: str) -> Recipe:
+def create_from_url(url: str) -> tuple[Recipe, ScrapedExtras]:
    """Main entry point for generating a recipe from a URL. Pass in a URL and
    a Recipe object will be returned if successful.

@ -29,12 +30,12 @@ def create_from_url(url: str) -> Recipe:
        Recipe: Recipe Object
    """
    scraper = RecipeScraper()
-    new_recipe = scraper.scrape(url)
-    new_recipe.id = uuid4()
+    new_recipe, extras = scraper.scrape(url)

    if not new_recipe:
        raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})

+    new_recipe.id = uuid4()
    logger = get_logger()
    logger.info(f"Image {new_recipe.image}")

@ -52,4 +53,4 @@ def create_from_url(url: str) -> Recipe:
        new_recipe.name = f"No Recipe Name Found - {str(uuid4())}"
        new_recipe.slug = slugify(new_recipe.name)

-    return new_recipe
+    return new_recipe, extras
--- a/mealie/services/scraper/scraper_strategies.py
+++ b/mealie/services/scraper/scraper_strategies.py
@ -10,6 +10,7 @@ from w3lib.html import get_base_url

 from mealie.core.root_logger import get_logger
 from mealie.schema.recipe.recipe import Recipe, RecipeStep
+from mealie.services.scraper.scraped_extras import ScrapedExtras

 from . import cleaner

@ -26,7 +27,7 @@ class ABCScraperStrategy(ABC):
        self.url = url

    @abstractmethod
-    def parse(self) -> Recipe | None:
+    def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
        """Parse a recipe from a web URL.

        Args:
@ -39,13 +40,15 @@ class ABCScraperStrategy(ABC):


 class RecipeScraperPackage(ABCScraperStrategy):
-    def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> Recipe:
+    def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]:
        def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None):
            value = default
-            try:
-                value = func_call()
-            except Exception:
-                self.logger.error(f"Error parsing recipe func_call for '{get_attr}'")
+
+            if func_call:
+                try:
+                    value = func_call()
+                except Exception:
+                    self.logger.error(f"Error parsing recipe func_call for '{get_attr}'")

            if value == default:
                try:
@ -58,7 +61,7 @@ class RecipeScraperPackage(ABCScraperStrategy):

            return value

-        def get_instructions() -> list[dict]:
+        def get_instructions() -> list[RecipeStep]:
            instruction_as_text = try_get_default(
                scraped_data.instructions, "recipeInstructions", ["No Instructions Found"]
            )
@ -78,7 +81,11 @@ class RecipeScraperPackage(ABCScraperStrategy):
            None, "cookTime", None, cleaner.clean_time
        )

-        return Recipe(
+        extras = ScrapedExtras()
+
+        extras.set_tags(try_get_default(None, "keywords", "", cleaner.clean_tags))
+
+        recipe = Recipe(
            name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string),
            slug="",
            image=try_get_default(None, "image", None),
@ -93,6 +100,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
            org_url=url,
        )

+        return recipe, extras
+
    def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None:
        try:
            scraped_schema = scrape_me(self.url)
@ -103,8 +112,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
                self.logger.error("Recipe Scraper was unable to extract a recipe.")
                return None

-        except ConnectionError:
-            raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": "CONNECTION_ERROR"})
+        except ConnectionError as e:
+            raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": "CONNECTION_ERROR"}) from e

        # Check to see if the recipe is valid
        try:
@ -123,7 +132,7 @@ class RecipeScraperPackage(ABCScraperStrategy):
        self.logger.debug(f"Recipe Scraper [Package] was unable to extract a recipe from {self.url}")
        return None

-    def parse(self) -> Recipe | None:
+    def parse(self):
        """
        Parse a recipe from a given url.
        """
@ -177,7 +186,7 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
            "extras": [],
        }

-    def parse(self) -> Recipe | None:
+    def parse(self):
        """
        Parse a recipe from a given url.
        """
@ -188,4 +197,4 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
        if og_data is None:
            return None

-        return Recipe(**og_data)
+        return Recipe(**og_data), ScrapedExtras()
--- a/tests/data/init.py
+++ b/tests/data/init.py
@ -22,6 +22,10 @@ html_sous_vide_shrimp = CWD / "html/sous-vide-shrimp.html"

 html_jam_roly_poly_with_custard = CWD / "html/jam-roly-poly-with-custard.html"

+html_nutty_umami_noodles_with_scallion_brown_butter_and_snow_peas_recipe = (
+    CWD / "html/nutty-umami-noodles-with-scallion-brown-butter-and-snow-peas-recipe.html"
+)
+
 html_taiwanese_three_cup_chicken_san_bei_gi_recipe = CWD / "html/taiwanese-three-cup-chicken-san-bei-gi-recipe.html"

 html_detroit_style_pepperoni_pizza = CWD / "html/detroit-style-pepperoni-pizza.html"
--- a/tests/data/html/nutty-umami-noodles-with-scallion-brown-butter-and-snow-peas-recipe.html
+++ b/tests/data/html/nutty-umami-noodles-with-scallion-brown-butter-and-snow-peas-recipe.html
--- a/tests/integration_tests/category_tag_tool_tests/test_organizers_common.py
+++ b/tests/integration_tests/category_tag_tool_tests/test_organizers_common.py
@ -130,7 +130,9 @@ def test_organizer_association(
    # Get Recipe Data
    response = api_client.get(routes.RoutesRecipe.item(slug), headers=unique_user.token)
    as_json = response.json()
-    as_json[recipe_key] = [{"id": item["id"], "name": item["name"], "slug": item["slug"]}]
+    as_json[recipe_key] = [
+        {"id": item["id"], "group_id": unique_user.group_id, "name": item["name"], "slug": item["slug"]}
+    ]

    # Update Recipe
    response = api_client.put(routes.RoutesRecipe.item(slug), json=as_json, headers=unique_user.token)
@ -177,7 +179,9 @@ def test_organizer_get_by_slug(
    for slug in recipe_slugs:
        response = api_client.get(routes.RoutesRecipe.item(slug), headers=unique_user.token)
        as_json = response.json()
-        as_json[recipe_key] = [{"id": item["id"], "name": item["name"], "slug": item["slug"]}]
+        as_json[recipe_key] = [
+            {"id": item["id"], "group_id": unique_user.group_id, "name": item["name"], "slug": item["slug"]}
+        ]

        response = api_client.put(routes.RoutesRecipe.item(slug), json=as_json, headers=unique_user.token)
        assert response.status_code == 200
--- a/tests/integration_tests/user_recipe_tests/test_recipe_crud.py
+++ b/tests/integration_tests/user_recipe_tests/test_recipe_crud.py
@ -13,7 +13,7 @@ from slugify import slugify
 from mealie.schema.recipe.recipe import RecipeCategory
 from mealie.services.recipe.recipe_data_service import RecipeDataService
 from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph
-from tests import utils
+from tests import data, utils
 from tests.utils.app_routes import AppRoutes
 from tests.utils.factories import random_string
 from tests.utils.fixture_schemas import TestUser
@ -83,12 +83,77 @@ def test_create_by_url(

    api_client.delete(api_routes.recipes_recipe_slug(recipe_data.expected_slug), headers=unique_user.token)

-    response = api_client.post(api_routes.recipes_create_url, json={"url": recipe_data.url}, headers=unique_user.token)
+    response = api_client.post(
+        api_routes.recipes_create_url, json={"url": recipe_data.url, "include_tags": False}, headers=unique_user.token
+    )

    assert response.status_code == 201
    assert json.loads(response.text) == recipe_data.expected_slug


+def test_create_by_url_with_tags(
+    api_client: TestClient,
+    api_routes: AppRoutes,
+    unique_user: TestUser,
+    monkeypatch: MonkeyPatch,
+):
+    html_file = data.html_nutty_umami_noodles_with_scallion_brown_butter_and_snow_peas_recipe
+
+    # Override init function for AbstractScraper to use the test html instead of calling the url
+    monkeypatch.setattr(
+        AbstractScraper,
+        "__init__",
+        get_init(html_file),
+    )
+    # Override the get_html method of the RecipeScraperOpenGraph to return the test html
+    monkeypatch.setattr(
+        RecipeScraperOpenGraph,
+        "get_html",
+        open_graph_override(html_file.read_text()),
+    )
+    # Skip image downloader
+    monkeypatch.setattr(
+        RecipeDataService,
+        "scrape_image",
+        lambda *_: "TEST_IMAGE",
+    )
+
+    response = api_client.post(
+        api_routes.recipes_create_url,
+        json={"url": "https://google.com", "include_tags": True},  # URL Doesn't matter
+        headers=unique_user.token,
+    )
+    assert response.status_code == 201
+    slug = "nutty-umami-noodles-with-scallion-brown-butter-and-snow-peas"
+
+    # Get the recipe
+    response = api_client.get(api_routes.recipes_recipe_slug(slug), headers=unique_user.token)
+    assert response.status_code == 200
+
+    # Verifiy the tags are present
+    expected_tags = {
+        "sauté",
+        "pea",
+        "noodle",
+        "udon noodle",
+        "ramen noodle",
+        "dinner",
+        "main",
+        "vegetarian",
+        "easy",
+        "quick",
+        "weeknight meals",
+        "web",
+    }
+
+    recipe = json.loads(response.text)
+
+    assert len(recipe["tags"]) == len(expected_tags)
+
+    for tag in recipe["tags"]:
+        assert tag["name"] in expected_tags
+
+
@pytest.mark.parametrize("recipe_data", recipe_test_data)
 def test_read_update(
    api_client: TestClient,
--- a/tests/unit_tests/test_recipe_parser.py
+++ b/tests/unit_tests/test_recipe_parser.py
@ -17,7 +17,7 @@ and then use this test case by removing the `@pytest.mark.skip` and than testing
@pytest.mark.skipif(True, reason="Long Running API Test - manually run when updating the parser")
@pytest.mark.parametrize("recipe_test_data", test_cases)
 def test_recipe_parser(recipe_test_data: RecipeSiteTestCase):
-    recipe = scraper.create_from_url(recipe_test_data.url)
+    recipe, _ = scraper.create_from_url(recipe_test_data.url)

    assert recipe.slug == recipe_test_data.expected_slug
    assert len(recipe.recipe_instructions) == recipe_test_data.num_steps
--- a/tests/utils/recipe_data.py
+++ b/tests/utils/recipe_data.py
@ -1,16 +1,17 @@
 from dataclasses import dataclass
+from pathlib import Path

 from tests import data as test_data


-@dataclass
+@dataclass(slots=True)
 class RecipeSiteTestCase:
    url: str
    html: str
    expected_slug: str
    num_ingredients: int
    num_steps: int
-    html_file: str
+    html_file: Path


 def get_recipe_test_cases():