diff --git a/.dockerignore b/.dockerignore index e601eb50..0677d0b8 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,7 +2,6 @@ .github .dockerignore .gitignore - .idea .vscode @@ -26,4 +25,6 @@ venv */dist */data/db */mealie/test -*/mealie/.temp \ No newline at end of file +*/mealie/.temp + +model.crfmodel diff --git a/.gitignore b/.gitignore index 6c207578..89c39468 100644 --- a/.gitignore +++ b/.gitignore @@ -154,3 +154,4 @@ dev/scripts/output/javascriptAPI/* mealie/services/scraper/ingredient_nlp/model.crfmodel dev/code-generation/generated/openapi.json dev/code-generation/generated/test_routes.py +mealie/services/parser_services/crfpp/model.crfmodel diff --git a/Dockerfile b/Dockerfile index 8a238858..e45ad49f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,3 @@ -############################################### -# # Frontend Builder Image -# ############################################### -# FROM node:lts-alpine as frontend-build -# WORKDIR /app -# COPY ./frontend/package*.json ./ -# RUN npm install -# COPY ./frontend/ . -# RUN npm run build - ############################################### # Base Image ############################################### @@ -91,6 +81,13 @@ WORKDIR / RUN chmod +x $MEALIE_HOME/mealie/run.sh ENTRYPOINT $MEALIE_HOME/mealie/run.sh "reload" +############################################### +# CRFPP Image +############################################### +FROM hkotel/crfpp as crfpp + +RUN echo "crfpp-container" + ############################################### # Production Image ############################################### @@ -108,6 +105,16 @@ RUN apt-get update \ COPY --from=builder-base $POETRY_HOME $POETRY_HOME COPY --from=builder-base $PYSETUP_PATH $PYSETUP_PATH +# copy CRF++ Binary from crfpp +ENV CRF_MODEL_URL=https://github.com/hay-kot/mealie-nlp-model/releases/download/v1.0.0/model.crfmodel + +ENV LD_LIBRARY_PATH=/usr/local/lib +COPY --from=crfpp /usr/local/lib/ /usr/local/lib +COPY --from=crfpp /usr/local/bin/crf_learn /usr/local/bin/crf_learn +COPY --from=crfpp /usr/local/bin/crf_test /usr/local/bin/crf_test + + + # copying caddy into image COPY --from=builder-base /usr/bin/caddy /usr/bin/caddy @@ -129,6 +136,9 @@ WORKDIR / COPY ./dev/data/templates $MEALIE_HOME/data/templates COPY ./Caddyfile $MEALIE_HOME +# Grab CRF++ Model Release +RUN curl -L0 $CRF_MODEL_URL --output $MEALIE_HOME/mealie/services/parser_services/crfpp/model.crfmodel + VOLUME [ "$MEALIE_HOME/data/" ] ENV APP_PORT=80 diff --git a/docs/docs/contributors/guides/ingredient-parser.md b/docs/docs/contributors/guides/ingredient-parser.md new file mode 100644 index 00000000..a7f1532a --- /dev/null +++ b/docs/docs/contributors/guides/ingredient-parser.md @@ -0,0 +1,20 @@ +# Improving the Ingredient Parser + +Mealie uses Conditional Random Fields (CRFs) for parsing and processing ingredients. The model used for ingredients is based off a data set of over 100,000 ingredients from a dataset compiled by the New York Times. I believe that the model used is sufficient enough to handle most of the ingredients, therefore, more data to train the model won't necessarily help improve the model. + +## Improving The CRF Parser + +To improve results with the model, you'll likely need to focus on improving the tokenization and parsing of the original string to aid the model in determine what the ingredient is. Datascience is not my forte, but I have done some tokenization to improve the model. You can find that code under `/mealie/services/parser_services/crfpp` along with some other utility functions to aid in the tokenization and processing of ingredient strings. + +The best way to test on improving the parser is to register additional test cases in `/mealie/tests/unit_tests/test_crfpp_parser.py` and run the test after making changes to the tokenizer. Note that the test cases DO NOT run in the CI environment, therefore you will need to have CRF++ installed on your machine. If you're using a Mac the easiest way to do this is through brew. + +When submitting a PR to improve the parser it is important to provide your test cases, the problem you were trying to solve, and the results of the changes you made. As the tests don't run in CI, not providing these details may delay your PR from being merged. + +## Alternative Parsers +Alternatively, you can register a new parser by fulfilling the `ABCIngredientParser` interface. Satisfying this single method interface allows us to register additional parsing strategies at runtime and gives the user several options when trying to parse a recipe. + + +## Links +- [Pretrained Model](https://github.com/hay-kot/mealie-nlp-model) +- [CRF++ (Forked)](https://github.com/hay-kot/crfpp) + diff --git a/docs/docs/overrides/api.html b/docs/docs/overrides/api.html index 861af07e..c971cd76 100644 --- a/docs/docs/overrides/api.html +++ b/docs/docs/overrides/api.html @@ -14,7 +14,7 @@
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index ffb236d6..aa06c1d1 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -97,6 +97,8 @@ nav: - Dev Getting Started: "contributors/developers-guide/starting-dev-server.md" - Guidelines: "contributors/developers-guide/general-guidelines.md" - Style Guide: "contributors/developers-guide/style-guide.md" + - Guides: + - Improving Ingredient Parser: "contributors/guides/ingredient-parser.md" - Development Road Map: "roadmap.md" - Change Log: - v1.0.0 A Whole New App: "changelog/v1.0.0.md" diff --git a/frontend/api/class-interfaces/recipes.ts b/frontend/api/class-interfaces/recipes.ts index ca271dc0..9e2926b6 100644 --- a/frontend/api/class-interfaces/recipes.ts +++ b/frontend/api/class-interfaces/recipes.ts @@ -10,7 +10,8 @@ const routes = { recipesCreateUrl: `${prefix}/recipes/create-url`, recipesCreateFromZip: `${prefix}/recipes/create-from-zip`, recipesCategory: `${prefix}/recipes/category`, - recipesParseIngredients: `${prefix}/parse/ingredient`, + recipesParseIngredient: `${prefix}/parser/ingredient`, + recipesParseIngredients: `${prefix}/parser/ingredients`, recipesRecipeSlug: (recipe_slug: string) => `${prefix}/recipes/${recipe_slug}`, recipesRecipeSlugZip: (recipe_slug: string) => `${prefix}/recipes/${recipe_slug}/zip`, @@ -86,4 +87,8 @@ export class RecipeAPI extends BaseCRUDAPI { async parseIngredients(ingredients: Array) { return await this.requests.post(routes.recipesParseIngredients, { ingredients }); } + + async parseIngredient(ingredient: string) { + return await this.requests.post(routes.recipesParseIngredient, { ingredient }); + } } diff --git a/frontend/components/Domain/Recipe/RecipeIngredientParserMenu.vue b/frontend/components/Domain/Recipe/RecipeIngredientParserMenu.vue index e8f9b736..22d55409 100644 --- a/frontend/components/Domain/Recipe/RecipeIngredientParserMenu.vue +++ b/frontend/components/Domain/Recipe/RecipeIngredientParserMenu.vue @@ -21,7 +21,7 @@ - + @@ -89,6 +89,10 @@ export default defineComponent({ type: Array, required: true, }, + slug: { + type: String, + required: true, + }, }, setup(props) { const ingredients = props.ingredients; diff --git a/frontend/components/global/BaseCardSectionTitle.vue b/frontend/components/global/BaseCardSectionTitle.vue index e0979465..c36000a0 100644 --- a/frontend/components/global/BaseCardSectionTitle.vue +++ b/frontend/components/global/BaseCardSectionTitle.vue @@ -6,9 +6,11 @@ {{ title }} -

- -

+ +

+ +

+
diff --git a/frontend/layouts/admin.vue b/frontend/layouts/admin.vue index 9e13970f..ebafff21 100644 --- a/frontend/layouts/admin.vue +++ b/frontend/layouts/admin.vue @@ -120,6 +120,11 @@ export default defineComponent({ to: "/admin/backups", title: i18n.t("sidebar.backups"), }, + { + icon: $globals.icons.slotMachine, + to: "/admin/parser", + title: "Parser", + }, ]; const bottomLinks = [ diff --git a/frontend/nuxt.config.js b/frontend/nuxt.config.js index b75bf57c..53d8f7bc 100644 --- a/frontend/nuxt.config.js +++ b/frontend/nuxt.config.js @@ -30,7 +30,7 @@ export default { css: [{ src: "~/assets/main.css" }, { src: "~/assets/style-overrides.scss" }], // Plugins to run before rendering page: https://go.nuxtjs.dev/config-plugins - plugins: ["~/plugins/globals.ts", "~/plugins/theme.ts"], + plugins: ["~/plugins/globals.ts", "~/plugins/theme.ts", "~/plugins/toast.client.ts"], // Auto import components: https://go.nuxtjs.dev/config-components components: true, diff --git a/frontend/pages/admin/parser.vue b/frontend/pages/admin/parser.vue new file mode 100644 index 00000000..f2fd4c1d --- /dev/null +++ b/frontend/pages/admin/parser.vue @@ -0,0 +1,138 @@ + + + + + \ No newline at end of file diff --git a/frontend/pages/recipe/_slug.vue b/frontend/pages/recipe/_slug/index.vue similarity index 99% rename from frontend/pages/recipe/_slug.vue rename to frontend/pages/recipe/_slug/index.vue index f3644b82..50ec930f 100644 --- a/frontend/pages/recipe/_slug.vue +++ b/frontend/pages/recipe/_slug/index.vue @@ -110,7 +110,7 @@ />
- + {{ $t("general.new") }}
diff --git a/frontend/pages/recipe/_slug/ingredient-parser.vue b/frontend/pages/recipe/_slug/ingredient-parser.vue new file mode 100644 index 00000000..3e8178c5 --- /dev/null +++ b/frontend/pages/recipe/_slug/ingredient-parser.vue @@ -0,0 +1,73 @@ + + + + + \ No newline at end of file diff --git a/frontend/plugins/toast.client.ts b/frontend/plugins/toast.client.ts new file mode 100644 index 00000000..c26627ec --- /dev/null +++ b/frontend/plugins/toast.client.ts @@ -0,0 +1,15 @@ +import { NuxtAxiosInstance } from "@nuxtjs/axios"; +import { alert } from "~/composables/use-toast"; + +export default function ({ $axios }: { $axios: NuxtAxiosInstance }) { + $axios.onResponse((response) => { + if (response.data.message) { + alert.info(response.data.message); + } + }); + $axios.onError((error) => { + if (error.response?.data?.detail?.message) { + alert.error(error.response.data.detail.message); + } + }); +} diff --git a/frontend/utils/icons/icons.ts b/frontend/utils/icons/icons.ts index d753a930..e2a4d98f 100644 --- a/frontend/utils/icons/icons.ts +++ b/frontend/utils/icons/icons.ts @@ -12,6 +12,7 @@ import { mdiBookOutline, mdiAccountCog, mdiAccountGroup, + mdiSlotMachine, mdiHome, mdiMagnify, mdiTranslate, @@ -208,4 +209,5 @@ export const icons = { forward: mdiArrowRightBoldOutline, back: mdiArrowLeftBoldOutline, + slotMachine: mdiSlotMachine, }; diff --git a/mealie/lang/__init__.py b/mealie/lang/__init__.py new file mode 100644 index 00000000..58d29823 --- /dev/null +++ b/mealie/lang/__init__.py @@ -0,0 +1 @@ +from .providers import * diff --git a/mealie/lang/messages/en-US.json b/mealie/lang/messages/en-US.json new file mode 100644 index 00000000..eb731ee2 --- /dev/null +++ b/mealie/lang/messages/en-US.json @@ -0,0 +1,8 @@ +{ + "generic": { + "server-error": "Something went wrong" + }, + "recipe": { + "unique-name-error": "Recipe names must be unique" + } +} \ No newline at end of file diff --git a/mealie/lang/providers.py b/mealie/lang/providers.py new file mode 100644 index 00000000..288a2009 --- /dev/null +++ b/mealie/lang/providers.py @@ -0,0 +1,35 @@ +from abc import ABC, abstractmethod +from functools import lru_cache +from pathlib import Path + +import i18n +from bcrypt import os + +CWD = Path(__file__).parent +TRANSLATIONS = CWD / "messages" + + +class AbstractLocaleProvider(ABC): + @abstractmethod + def t(self, key): + pass + + +class i18nProvider(AbstractLocaleProvider): + def __init__(self, locale): + i18n.set("file_format", "json") + i18n.set("filename_format", "{locale}.{format}") + i18n.set("skip_locale_root_data", True) + i18n.load_path.append(TRANSLATIONS) + i18n.set("locale", locale) + i18n.set("fallback", "en-US") + self._t = i18n.t + + def t(self, key): + return self._t(key) + + +@lru_cache() +def get_locale_provider(): + lang = os.environ.get("LANG", "en-US") + return i18nProvider(lang) diff --git a/mealie/routes/__init__.py b/mealie/routes/__init__.py index c31394e8..48b9ec74 100644 --- a/mealie/routes/__init__.py +++ b/mealie/routes/__init__.py @@ -1,6 +1,6 @@ from fastapi import APIRouter -from . import admin, app, auth, categories, groups, recipe, shopping_lists, tags, unit_and_foods, users +from . import admin, app, auth, categories, groups, parser, recipe, shopping_lists, tags, unit_and_foods, users router = APIRouter(prefix="/api") @@ -9,6 +9,7 @@ router.include_router(auth.router) router.include_router(users.router) router.include_router(groups.router) router.include_router(recipe.router) +router.include_router(parser.router) router.include_router(unit_and_foods.router) router.include_router(categories.router) router.include_router(tags.router) diff --git a/mealie/routes/parser/__init__.py b/mealie/routes/parser/__init__.py new file mode 100644 index 00000000..784f6a75 --- /dev/null +++ b/mealie/routes/parser/__init__.py @@ -0,0 +1,6 @@ +from fastapi import APIRouter + +from . import ingredient_parser + +router = APIRouter() +router.include_router(ingredient_parser.public_router, tags=["Recipe: Ingredient Parser"]) diff --git a/mealie/routes/parser/ingredient_parser.py b/mealie/routes/parser/ingredient_parser.py new file mode 100644 index 00000000..bda6ed8c --- /dev/null +++ b/mealie/routes/parser/ingredient_parser.py @@ -0,0 +1,31 @@ +from fastapi import APIRouter, Depends +from pydantic import BaseModel + +from mealie.schema.recipe import RecipeIngredient +from mealie.services.parser_services import IngredientParserService + +public_router = APIRouter(prefix="/parser") + + +class IngredientsRequest(BaseModel): + ingredients: list[str] + + +class IngredientRequest(BaseModel): + ingredient: str + + +@public_router.post("/ingredients", response_model=list[RecipeIngredient]) +def parse_ingredients( + ingredients: IngredientsRequest, + p_service: IngredientParserService = Depends(IngredientParserService.private), +): + return {"ingredients": p_service.parse_ingredients(ingredients.ingredients)} + + +@public_router.post("/ingredient") +def parse_ingredient( + ingredient: IngredientRequest, + p_service: IngredientParserService = Depends(IngredientParserService.private), +): + return {"ingredient": p_service.parse_ingredient(ingredient.ingredient)} diff --git a/mealie/routes/recipe/__init__.py b/mealie/routes/recipe/__init__.py index 754dc7d0..8a73ce57 100644 --- a/mealie/routes/recipe/__init__.py +++ b/mealie/routes/recipe/__init__.py @@ -1,13 +1,6 @@ from fastapi import APIRouter -from mealie.routes.recipe import ( - all_recipe_routes, - comments, - image_and_assets, - ingredient_parser, - recipe_crud_routes, - recipe_export, -) +from . import all_recipe_routes, comments, image_and_assets, recipe_crud_routes, recipe_export prefix = "/recipes" @@ -18,4 +11,3 @@ router.include_router(recipe_export.user_router, prefix=prefix, tags=["Recipe: E router.include_router(recipe_crud_routes.user_router, prefix=prefix, tags=["Recipe: CRUD"]) router.include_router(image_and_assets.user_router, prefix=prefix, tags=["Recipe: Images and Assets"]) router.include_router(comments.router, prefix=prefix, tags=["Recipe: Comments"]) -router.include_router(ingredient_parser.public_router, tags=["Recipe: Ingredient Parser"]) diff --git a/mealie/routes/recipe/ingredient_parser.py b/mealie/routes/recipe/ingredient_parser.py deleted file mode 100644 index b621c2c9..00000000 --- a/mealie/routes/recipe/ingredient_parser.py +++ /dev/null @@ -1,25 +0,0 @@ -from fastapi import APIRouter -from pydantic import BaseModel - -from mealie.services.scraper.ingredient_nlp.processor import ( - convert_crf_models_to_ingredients, - convert_list_to_crf_model, -) - -public_router = APIRouter() - - -class IngredientRequest(BaseModel): - ingredients: list[str] - - -@public_router.post("/parse/ingredient") -def parse_ingredients(ingredients: IngredientRequest): - """ - Parse an ingredient string. - """ - - crf_models = convert_list_to_crf_model(ingredients.ingredients) - ingredients = convert_crf_models_to_ingredients(crf_models) - - return {"ingredient": ingredients} diff --git a/mealie/schema/response/__init__.py b/mealie/schema/response/__init__.py new file mode 100644 index 00000000..7bd5cc2c --- /dev/null +++ b/mealie/schema/response/__init__.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel + + +class ErrorResponse(BaseModel): + message: str + error: bool = True + exception: str = None diff --git a/mealie/services/_base_http_service/base_http_service.py b/mealie/services/_base_http_service/base_http_service.py index f90a0f9c..6a7e7514 100644 --- a/mealie/services/_base_http_service/base_http_service.py +++ b/mealie/services/_base_http_service/base_http_service.py @@ -9,6 +9,7 @@ from mealie.core.config import get_app_dirs, get_app_settings from mealie.core.root_logger import get_logger from mealie.db.database import get_database from mealie.db.db_setup import SessionLocal +from mealie.lang import get_locale_provider from mealie.schema.user.user import PrivateUser logger = get_logger() @@ -64,10 +65,11 @@ class BaseHttpService(Generic[T, D], ABC): self.db = get_database(session) self.app_dirs = get_app_dirs() self.settings = get_app_settings() + self.t = get_locale_provider().t def _existing_factory(dependency: Type[CLS_DEP]) -> classmethod: def cls_method(cls, item_id: T, deps: CLS_DEP = Depends(dependency)): - new_class = cls(deps.session, deps.user, deps.bg_task) + new_class = cls(session=deps.session, user=deps.user, background_tasks=deps.bg_task) new_class.assert_existing(item_id) return new_class @@ -75,7 +77,7 @@ class BaseHttpService(Generic[T, D], ABC): def _class_method_factory(dependency: Type[CLS_DEP]) -> classmethod: def cls_method(cls, deps: CLS_DEP = Depends(dependency)): - return cls(deps.session, deps.user, deps.bg_task) + return cls(session=deps.session, user=deps.user, background_tasks=deps.bg_task) return classmethod(cls_method) diff --git a/mealie/services/_base_http_service/crud_http_mixins.py b/mealie/services/_base_http_service/crud_http_mixins.py index 406f1324..45198f81 100644 --- a/mealie/services/_base_http_service/crud_http_mixins.py +++ b/mealie/services/_base_http_service/crud_http_mixins.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from abc import ABC, abstractmethod from typing import Generic, TypeVar @@ -8,6 +10,7 @@ from sqlalchemy.orm import Session from mealie.core.root_logger import get_logger from mealie.db.data_access_layer._access_model import AccessModel +from mealie.schema.response import ErrorResponse C = TypeVar("C", bound=BaseModel) R = TypeVar("R", bound=BaseModel) @@ -29,12 +32,23 @@ class CrudHttpMixins(Generic[C, R, U], ABC): self.item = self.dal.get_one(id) return self.item - def _create_one(self, data: C, exception_msg="generic-create-error") -> R: + def _create_one(self, data: C, default_msg="generic-create-error", exception_msgs: dict | None = None) -> R: try: self.item = self.dal.create(data) except Exception as ex: logger.exception(ex) - raise HTTPException(status.HTTP_400_BAD_REQUEST, detail={"message": exception_msg, "exception": str(ex)}) + + msg = default_msg + if exception_msgs: + msg = exception_msgs.get(type(ex), default_msg) + + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + detail=ErrorResponse( + message=msg, + exception=str(ex), + ).dict(), + ) return self.item diff --git a/mealie/services/_base_service/__init__.py b/mealie/services/_base_service/__init__.py index 334e8597..ce0a857e 100644 --- a/mealie/services/_base_service/__init__.py +++ b/mealie/services/_base_service/__init__.py @@ -1,7 +1,9 @@ from mealie.core.config import get_app_dirs, get_app_settings +from mealie.lang import get_locale_provider class BaseService: def __init__(self) -> None: self.app_dirs = get_app_dirs() self.settings = get_app_settings() + self.t = get_locale_provider() diff --git a/mealie/services/parser_services/__init__.py b/mealie/services/parser_services/__init__.py new file mode 100644 index 00000000..ff10630c --- /dev/null +++ b/mealie/services/parser_services/__init__.py @@ -0,0 +1 @@ +from .ingredient_parser_service import * diff --git a/mealie/services/scraper/ingredient_nlp/__init__.py b/mealie/services/parser_services/crfpp/__init__.py similarity index 100% rename from mealie/services/scraper/ingredient_nlp/__init__.py rename to mealie/services/parser_services/crfpp/__init__.py diff --git a/mealie/services/scraper/ingredient_nlp/pre_processor.py b/mealie/services/parser_services/crfpp/pre_processor.py similarity index 97% rename from mealie/services/scraper/ingredient_nlp/pre_processor.py rename to mealie/services/parser_services/crfpp/pre_processor.py index 514fb187..a389dc95 100644 --- a/mealie/services/scraper/ingredient_nlp/pre_processor.py +++ b/mealie/services/parser_services/crfpp/pre_processor.py @@ -38,7 +38,7 @@ def replace_fraction_unicode(string: str): continue if name.startswith("VULGAR FRACTION"): normalized = unicodedata.normalize("NFKC", c) - numerator, _slash, denominator = normalized.partition("⁄") + numerator, _, denominator = normalized.partition("⁄") # _ = slash text = f" {numerator}/{denominator}" return string.replace(c, text).replace(" ", " ") diff --git a/mealie/services/parser_services/crfpp/processor.py b/mealie/services/parser_services/crfpp/processor.py new file mode 100644 index 00000000..6b05e244 --- /dev/null +++ b/mealie/services/parser_services/crfpp/processor.py @@ -0,0 +1,46 @@ +import subprocess +import tempfile +from fractions import Fraction +from pathlib import Path + +from pydantic import BaseModel, validator + +from . import utils +from .pre_processor import pre_process_string + +CWD = Path(__file__).parent +MODEL_PATH = CWD / "model.crfmodel" + + +class CRFIngredient(BaseModel): + input: str = "" + name: str = "" + other: str = "" + qty: str = "" + comment: str = "" + unit: str = "" + + @validator("qty", always=True, pre=True) + def validate_qty(qty, values): # sourcery skip: merge-nested-ifs + if qty is None or qty == "": + # Check if other contains a fraction + if values["other"] is not None and values["other"].find("/") != -1: + return float(Fraction(values["other"])).__round__(1) + else: + return 1 + + return qty + + +def _exec_crf_test(input_text): + with tempfile.NamedTemporaryFile(mode="w") as input_file: + input_file.write(utils.export_data(input_text)) + input_file.flush() + return subprocess.check_output(["crf_test", "--verbose=1", "--model", MODEL_PATH, input_file.name]).decode( + "utf-8" + ) + + +def convert_list_to_crf_model(list_of_ingrdeint_text: list[str]): + crf_output = _exec_crf_test([pre_process_string(x) for x in list_of_ingrdeint_text]) + return [CRFIngredient(**ingredient) for ingredient in utils.import_data(crf_output.split("\n"))] diff --git a/mealie/services/scraper/ingredient_nlp/tokenizer.py b/mealie/services/parser_services/crfpp/tokenizer.py similarity index 100% rename from mealie/services/scraper/ingredient_nlp/tokenizer.py rename to mealie/services/parser_services/crfpp/tokenizer.py diff --git a/mealie/services/scraper/ingredient_nlp/utils.py b/mealie/services/parser_services/crfpp/utils.py similarity index 100% rename from mealie/services/scraper/ingredient_nlp/utils.py rename to mealie/services/parser_services/crfpp/utils.py diff --git a/mealie/services/parser_services/ingredient_parser.py b/mealie/services/parser_services/ingredient_parser.py new file mode 100644 index 00000000..d1a084b1 --- /dev/null +++ b/mealie/services/parser_services/ingredient_parser.py @@ -0,0 +1,55 @@ +from abc import ABC, abstractmethod +from fractions import Fraction + +from mealie.core.root_logger import get_logger +from mealie.schema.recipe import RecipeIngredient +from mealie.schema.recipe.recipe_ingredient import CreateIngredientFood, CreateIngredientUnit + +from .crfpp.processor import CRFIngredient, convert_list_to_crf_model + +logger = get_logger(__name__) + + +class ABCIngredientParser(ABC): + """ + Abstract class for ingredient parsers. + """ + + @abstractmethod + def parse(self, ingredients: list[str]) -> list[RecipeIngredient]: + ... + + +class CRFPPIngredientParser(ABCIngredientParser): + """ + Class for CRFPP ingredient parsers. + """ + + def __init__(self) -> None: + pass + + def _crf_to_ingredient(self, crf_model: CRFIngredient) -> RecipeIngredient: + ingredient = None + + try: + ingredient = RecipeIngredient( + title="", + note=crf_model.comment, + unit=CreateIngredientUnit(name=crf_model.unit), + food=CreateIngredientFood(name=crf_model.name), + disable_amount=False, + quantity=float(sum(Fraction(s) for s in crf_model.qty.split())), + ) + except Exception as e: + # TODO: Capture some sort of state for the user to see that an exception occured + logger.exception(e) + ingredient = RecipeIngredient( + title="", + note=crf_model.input, + ) + + return ingredient + + def parse(self, ingredients: list[str]) -> list[RecipeIngredient]: + crf_models = convert_list_to_crf_model(ingredients) + return [self._crf_to_ingredient(crf_model) for crf_model in crf_models] diff --git a/mealie/services/parser_services/ingredient_parser_service.py b/mealie/services/parser_services/ingredient_parser_service.py new file mode 100644 index 00000000..ffeae155 --- /dev/null +++ b/mealie/services/parser_services/ingredient_parser_service.py @@ -0,0 +1,28 @@ +from mealie.schema.recipe import RecipeIngredient +from mealie.services._base_http_service.http_services import UserHttpService + +from .ingredient_parser import ABCIngredientParser, CRFPPIngredientParser + + +class IngredientParserService(UserHttpService): + def __init__(self, parser: ABCIngredientParser = None, *args, **kwargs) -> None: + self.parser: ABCIngredientParser = parser() if parser else CRFPPIngredientParser() + super().__init__(*args, **kwargs) + + def populate_item(self) -> None: + """Satisfy abstract method""" + pass + + def parse_ingredient(self, ingredient: str) -> RecipeIngredient: + parsed = self.parser.parse([ingredient]) + + if parsed: + return parsed[0] + # TODO: Raise Exception + + def parse_ingredients(self, ingredients: list[str]) -> list[RecipeIngredient]: + parsed = self.parser.parse(ingredients) + + if parsed: + return parsed + # TODO: Raise Exception diff --git a/mealie/services/recipe/recipe_service.py b/mealie/services/recipe/recipe_service.py index a3ee9595..7bf080c1 100644 --- a/mealie/services/recipe/recipe_service.py +++ b/mealie/services/recipe/recipe_service.py @@ -7,6 +7,7 @@ from typing import Union from zipfile import ZipFile from fastapi import Depends, HTTPException, UploadFile, status +from sqlalchemy import exc from mealie.core.dependencies.grouped import PublicDeps, UserDeps from mealie.core.root_logger import get_logger @@ -33,6 +34,10 @@ class RecipeService(CrudHttpMixins[CreateRecipe, Recipe, Recipe], UserHttpServic event_func = create_recipe_event + @cached_property + def exception_key(self) -> dict: + return {exc.IntegrityError: self.t("recipe.unique-name-error")} + @cached_property def dal(self) -> RecipeDataAccessModel: return self.db.recipes @@ -53,14 +58,13 @@ class RecipeService(CrudHttpMixins[CreateRecipe, Recipe, Recipe], UserHttpServic if not self.item.settings.public and not self.user: raise HTTPException(status.HTTP_403_FORBIDDEN) - # CRUD METHODS def get_all(self, start=0, limit=None): items = self.db.recipes.summary(self.user.group_id, start=start, limit=limit) return [RecipeSummary.construct(**x.__dict__) for x in items] def create_one(self, create_data: Union[Recipe, CreateRecipe]) -> Recipe: create_data = recipe_creation_factory(self.user, name=create_data.name, additional_attrs=create_data.dict()) - self._create_one(create_data, "RECIPE_ALREAD_EXISTS") + self._create_one(create_data, self.t("generic.server-error"), self.exception_key) self._create_event( "Recipe Created", f"'{self.item.name}' by {self.user.username} \n {self.settings.BASE_URL}/recipe/{self.item.slug}", diff --git a/mealie/services/scraper/ingredient_nlp/processor.py b/mealie/services/scraper/ingredient_nlp/processor.py deleted file mode 100644 index c879f3a9..00000000 --- a/mealie/services/scraper/ingredient_nlp/processor.py +++ /dev/null @@ -1,85 +0,0 @@ -import subprocess -import tempfile -from fractions import Fraction -from pathlib import Path -from typing import Optional - -from pydantic import BaseModel, validator - -from mealie.core.config import get_app_settings -from mealie.schema.recipe import RecipeIngredient -from mealie.schema.recipe.recipe_ingredient import CreateIngredientFood, CreateIngredientUnit - -from . import utils -from .pre_processor import pre_process_string - -CWD = Path(__file__).parent -MODEL_PATH = CWD / "model.crfmodel" -settings = get_app_settings() - - -INGREDIENT_TEXT = [ - "2 tablespoons honey", - "1/2 cup flour", - "Black pepper, to taste", - "2 cups of garlic finely chopped", - "2 liters whole milk", -] - - -class CRFIngredient(BaseModel): - input: Optional[str] = "" - name: Optional[str] = "" - other: Optional[str] = "" - qty: Optional[str] = "" - comment: Optional[str] = "" - unit: Optional[str] = "" - - @validator("qty", always=True, pre=True) - def validate_qty(qty, values): # sourcery skip: merge-nested-ifs - if qty is None or qty == "": - # Check if other contains a fraction - if values["other"] is not None and values["other"].find("/") != -1: - return float(Fraction(values["other"])).__round__(1) - else: - return 1 - - return qty - - -def _exec_crf_test(input_text): - with tempfile.NamedTemporaryFile(mode="w") as input_file: - input_file.write(utils.export_data(input_text)) - input_file.flush() - return subprocess.check_output(["crf_test", "--verbose=1", "--model", MODEL_PATH, input_file.name]).decode( - "utf-8" - ) - - -def convert_list_to_crf_model(list_of_ingrdeint_text: list[str]): - crf_output = _exec_crf_test([pre_process_string(x) for x in list_of_ingrdeint_text]) - crf_models = [CRFIngredient(**ingredient) for ingredient in utils.import_data(crf_output.split("\n"))] - - for model in crf_models: - print(model) - - return crf_models - - -def convert_crf_models_to_ingredients(crf_models: list[CRFIngredient]): - return [ - RecipeIngredient( - title="", - note=crf_model.comment, - unit=CreateIngredientUnit(name=crf_model.unit), - food=CreateIngredientFood(name=crf_model.name), - disable_amount=settings.RECIPE_DISABLE_AMOUNT, - quantity=float(sum(Fraction(s) for s in crf_model.qty.split())), - ) - for crf_model in crf_models - ] - - -if __name__ == "__main__": - crf_models = convert_list_to_crf_model(INGREDIENT_TEXT) - ingredients = convert_crf_models_to_ingredients(crf_models) diff --git a/poetry.lock b/poetry.lock index bb54912e..f48d5888 100644 --- a/poetry.lock +++ b/poetry.lock @@ -976,6 +976,17 @@ python-versions = "*" [package.extras] cli = ["click (>=5.0)"] +[[package]] +name = "python-i18n" +version = "0.3.9" +description = "Translation library for Python" +category = "main" +optional = false +python-versions = "*" + +[package.extras] +yaml = ["pyyaml (>=3.10)"] + [[package]] name = "python-jose" version = "3.3.0" @@ -1405,7 +1416,7 @@ pgsql = ["psycopg2-binary"] [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "c030cae2012cedbcad514df8f63a79288d0390d211cfdf4f5a6489a11c96d923" +content-hash = "b976d7a2b1eeebfc7bd1b641e9a9e0e3f723d427dbbe688d20108747dfa9fa19" [metadata.files] aiofiles = [ @@ -2102,6 +2113,10 @@ python-dotenv = [ {file = "python-dotenv-0.15.0.tar.gz", hash = "sha256:587825ed60b1711daea4832cf37524dfd404325b7db5e25ebe88c495c9f807a0"}, {file = "python_dotenv-0.15.0-py2.py3-none-any.whl", hash = "sha256:0c8d1b80d1a1e91717ea7d526178e3882732420b03f08afea0406db6402e220e"}, ] +python-i18n = [ + {file = "python-i18n-0.3.9.tar.gz", hash = "sha256:df97f3d2364bf3a7ebfbd6cbefe8e45483468e52a9e30b909c6078f5f471e4e8"}, + {file = "python_i18n-0.3.9-py3-none-any.whl", hash = "sha256:bda5b8d889ebd51973e22e53746417bd32783c9bd6780fd27cadbb733915651d"}, +] python-jose = [ {file = "python-jose-3.3.0.tar.gz", hash = "sha256:55779b5e6ad599c6336191246e95eb2293a9ddebd555f796a65f838f07e5d78a"}, {file = "python_jose-3.3.0-py2.py3-none-any.whl", hash = "sha256:9b1376b023f8b298536eedd47ae1089bcdb848f1535ab30555cd92002d78923a"}, diff --git a/pyproject.toml b/pyproject.toml index 3eed9876..78f692df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ recipe-scrapers = "^13.2.7" psycopg2-binary = {version = "^2.9.1", optional = true} gunicorn = "^20.1.0" emails = "^0.6" +python-i18n = "^0.3.9" [tool.poetry.dev-dependencies] pylint = "^2.6.0" diff --git a/template.env b/template.env index e2e66e85..63e3ea7c 100644 --- a/template.env +++ b/template.env @@ -23,6 +23,8 @@ POSTGRES_PORT=5432 POSTGRES_DB=mealie TOKEN_TIME=24 +LANG=en-US + # NOT USED # SMTP_HOST="" # SMTP_PORT="" diff --git a/tests/unit_tests/test_crfpp_parser.py b/tests/unit_tests/test_crfpp_parser.py new file mode 100644 index 00000000..d09525d3 --- /dev/null +++ b/tests/unit_tests/test_crfpp_parser.py @@ -0,0 +1,43 @@ +from dataclasses import dataclass +from fractions import Fraction + +import pytest + +from mealie.services.parser_services.crfpp.processor import CRFIngredient, convert_list_to_crf_model + + +@dataclass +class TestIngredient: + input: str + quantity: float + unit: str + food: str + comments: str + + +# TODO - add more robust test cases +test_ingredients = [ + TestIngredient("½ cup all-purpose flour", 0.5, "cup", "all-purpose flour", ""), + TestIngredient("1 ½ teaspoons ground black pepper", 1.5, "teaspoon", "black pepper", "ground"), + TestIngredient("⅔ cup unsweetened flaked coconut", 0.7, "cup", "coconut", "unsweetened flaked"), + TestIngredient("⅓ cup panko bread crumbs", 0.3, "cup", "panko bread crumbs", ""), +] + + +def crf_exists() -> bool: + import shutil + + return shutil.which("crf_test") is not None + + +@pytest.mark.skipif(not crf_exists(), reason="CRF++ not installed") +def test_nlp_parser(): + models: list[CRFIngredient] = convert_list_to_crf_model([x.input for x in test_ingredients]) + + # Itterate over mdoels and test_ingreidnets to gether + for model, test_ingredient in zip(models, test_ingredients): + assert float(sum(Fraction(s) for s in model.qty.split())) == test_ingredient.quantity + + assert model.comment == test_ingredient.comments + assert model.name == test_ingredient.food + assert model.unit == test_ingredient.unit diff --git a/tests/unit_tests/test_nlp_parser.py b/tests/unit_tests/test_nlp_parser.py deleted file mode 100644 index 0748225e..00000000 --- a/tests/unit_tests/test_nlp_parser.py +++ /dev/null @@ -1,38 +0,0 @@ -from dataclasses import dataclass -from fractions import Fraction - -import pytest - -from mealie.services.scraper.ingredient_nlp.processor import CRFIngredient, convert_list_to_crf_model - - -@dataclass -class TestIngredient: - input: str - quantity: float - - -test_ingredients = [ - TestIngredient("½ cup all-purpose flour", 0.5), - TestIngredient("1 ½ teaspoons ground black pepper", 1.5), - TestIngredient("⅔ cup unsweetened flaked coconut", 0.7), - TestIngredient("⅓ cup panko bread crumbs", 0.3), -] - - -@pytest.mark.skip -def test_nlp_parser(): - models: list[CRFIngredient] = convert_list_to_crf_model([x.input for x in test_ingredients]) - - # Itterate over mdoels and test_ingreidnets to gether - print() - for model, test_ingredient in zip(models, test_ingredients): - print("Testing:", test_ingredient.input, end="") - - assert float(sum(Fraction(s) for s in model.qty.split())) == test_ingredient.quantity - - print(" ✅ Passed") - - -if __name__ == "__main__": - test_nlp_parser()