feat: improve automatic ingredient linking (#1836)
* Filtering special characters during automatic linking of ingredients to instructions Used a unicode group to have a set of all unicode punctuation marks * allowing for linking of ingredients to instruction at the beginning of a newline in the instruction * Extracted ingredient matching into a composable and added tests. Ignoring 2 letter words to avoid false matches. While testing the code 2 letter matches were a large source of false positives.
This commit is contained in:
parent
19ae89a195
commit
83b8ce659e
3 changed files with 135 additions and 44 deletions
|
@ -233,6 +233,7 @@ import { parseIngredientText } from "~/composables/recipes";
|
|||
import { uuid4, detectServerBaseUrl } from "~/composables/use-utils";
|
||||
import { useUserApi, useStaticRoutes } from "~/composables/api";
|
||||
import { usePageState } from "~/composables/recipe-page/shared-state";
|
||||
import { useExtractIngredientReferences } from "~/composables/recipe-page/use-extract-ingredient-references";
|
||||
import { NoUndefinedField } from "~/lib/api/types/non-generated";
|
||||
import DropZone from "~/components/global/DropZone.vue";
|
||||
|
||||
|
@ -427,50 +428,12 @@ export default defineComponent({
|
|||
}
|
||||
|
||||
function autoSetReferences() {
|
||||
// Ignore matching blacklisted words when auto-linking - This is kind of a cludgey implementation. We're blacklisting common words but
|
||||
// other common phrases trigger false positives and I'm not sure how else to approach this. In the future I maybe look at looking directly
|
||||
// at the food variable and seeing if the food is in the instructions, but I still need to support those who don't want to provide the value
|
||||
// and only use the "notes" feature.
|
||||
const blackListedText = [
|
||||
"and",
|
||||
"or",
|
||||
"the",
|
||||
"a",
|
||||
"an",
|
||||
"of",
|
||||
"in",
|
||||
"on",
|
||||
"to",
|
||||
"for",
|
||||
"by",
|
||||
"with",
|
||||
"without",
|
||||
"",
|
||||
" ",
|
||||
];
|
||||
const blackListedRegexMatch = /\d/gm; // Match Any Number
|
||||
|
||||
// Check if any of the words in the active text match the ingredient text
|
||||
const instructionsByWord = activeText.value.toLowerCase().split(" ");
|
||||
|
||||
instructionsByWord.forEach((word) => {
|
||||
if (blackListedText.includes(word) || word.match(blackListedRegexMatch)) {
|
||||
return;
|
||||
}
|
||||
|
||||
props.recipe.recipeIngredient.forEach((ingredient) => {
|
||||
const searchText = parseIngredientText(ingredient, props.recipe.settings.disableAmount);
|
||||
|
||||
if (ingredient.referenceId === undefined) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (searchText.toLowerCase().includes(" " + word) && !activeRefs.value.includes(ingredient.referenceId)) {
|
||||
console.info("Word Matched", `'${word}'`, ingredient.note);
|
||||
activeRefs.value.push(ingredient.referenceId);
|
||||
}
|
||||
});
|
||||
});
|
||||
useExtractIngredientReferences(
|
||||
props.recipe.recipeIngredient,
|
||||
activeRefs.value,
|
||||
activeText.value,
|
||||
props.recipe.settings.disableAmount
|
||||
).forEach((ingredient: string) => activeRefs.value.push(ingredient));
|
||||
}
|
||||
|
||||
const ingredientLookup = computed(() => {
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
import { describe, expect, test } from "vitest";
|
||||
import { useExtractIngredientReferences } from "./use-extract-ingredient-references";
|
||||
|
||||
const punctuationMarks = ["*", "?", "/", "!", "**", "&", "."];
|
||||
|
||||
|
||||
describe("test use extract ingredient references", () => {
|
||||
test("when text empty return empty", () => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "", true)
|
||||
expect(result).toStrictEqual(new Set());
|
||||
});
|
||||
|
||||
test("when and ingredient matches exactly and has a reference id, return the referenceId", () => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "A sentence containing Onion", true);
|
||||
|
||||
expect(result).toEqual(new Set(["123"]));
|
||||
});
|
||||
|
||||
|
||||
test.each(punctuationMarks)("when ingredient is suffixed by punctuation, return the referenceId", (suffix) => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "A sentence containing Onion" + suffix, true);
|
||||
|
||||
expect(result).toEqual(new Set(["123"]));
|
||||
});
|
||||
|
||||
test.each(punctuationMarks)("when ingredient is prefixed by punctuation, return the referenceId", (prefix) => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "A sentence containing " + prefix + "Onion", true);
|
||||
expect(result).toEqual(new Set(["123"]));
|
||||
});
|
||||
|
||||
test("when ingredient is first on a multiline, return the referenceId", () => {
|
||||
const multilineSting = "lksjdlk\nOnion"
|
||||
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], multilineSting, true);
|
||||
expect(result).toEqual(new Set(["123"]));
|
||||
});
|
||||
|
||||
test("when the ingredient matches partially exactly and has a reference id, return the referenceId", () => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "A sentence containing Onions", true);
|
||||
expect(result).toEqual(new Set(["123"]));
|
||||
});
|
||||
|
||||
|
||||
test("when the ingredient matches with different casing and has a reference id, return the referenceId", () => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "A sentence containing oNions", true);
|
||||
expect(result).toEqual(new Set(["123"]));
|
||||
});
|
||||
|
||||
test("when no ingredients, return empty", () => {
|
||||
const result = useExtractIngredientReferences([], [], "A sentence containing oNions", true);
|
||||
expect(result).toEqual(new Set());
|
||||
});
|
||||
|
||||
test("when and ingredient matches but in the existing referenceIds, do not return the referenceId", () => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onion", referenceId: "123" }], ["123"], "A sentence containing Onion", true);
|
||||
|
||||
expect(result).toEqual(new Set());
|
||||
});
|
||||
|
||||
test("when an word is 2 letter of shorter, it is ignored", () => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onion", referenceId: "123" }], [], "A sentence containing On", true);
|
||||
|
||||
expect(result).toEqual(new Set());
|
||||
|
||||
})
|
||||
|
||||
|
||||
|
||||
});
|
|
@ -0,0 +1,60 @@
|
|||
import { RecipeIngredient } from "~/lib/api/types/recipe";
|
||||
import { parseIngredientText } from "~/composables/recipes";
|
||||
|
||||
|
||||
function normalize(word: string): string {
|
||||
let normalizing = word;
|
||||
normalizing = removeTrailingPunctuation(normalizing);
|
||||
normalizing = removeStartingPunctuation(normalizing);
|
||||
return normalizing;
|
||||
}
|
||||
|
||||
function removeTrailingPunctuation(word: string): string {
|
||||
const punctuationAtEnding = /\p{P}+$/u;
|
||||
return word.replace(punctuationAtEnding, "");
|
||||
}
|
||||
|
||||
function removeStartingPunctuation(word: string): string {
|
||||
const punctuationAtBeginning = /^\p{P}+/u;
|
||||
return word.replace(punctuationAtBeginning, "");
|
||||
}
|
||||
|
||||
function ingredientMatchesWord(ingredient: RecipeIngredient, word: string, recipeIngredientAmountsDisabled: boolean) {
|
||||
const searchText = parseIngredientText(ingredient, recipeIngredientAmountsDisabled);
|
||||
return searchText.toLowerCase().includes(word.toLowerCase());
|
||||
}
|
||||
|
||||
function isBlackListedWord(word: string) {
|
||||
// Ignore matching blacklisted words when auto-linking - This is kind of a cludgey implementation. We're blacklisting common words but
|
||||
// other common phrases trigger false positives and I'm not sure how else to approach this. In the future I maybe look at looking directly
|
||||
// at the food variable and seeing if the food is in the instructions, but I still need to support those who don't want to provide the value
|
||||
// and only use the "notes" feature.
|
||||
const blackListedText: string[] = [
|
||||
"and",
|
||||
"the",
|
||||
"for",
|
||||
"with",
|
||||
"without"
|
||||
];
|
||||
const blackListedRegexMatch = /\d/gm; // Match Any Number
|
||||
return blackListedText.includes(word) || word.match(blackListedRegexMatch);
|
||||
}
|
||||
|
||||
export function useExtractIngredientReferences(recipeIngredients: RecipeIngredient[], activeRefs: string[], text: string, recipeIngredientAmountsDisabled: boolean): Set<string> {
|
||||
const availableIngredients = recipeIngredients
|
||||
.filter((ingredient) => ingredient.referenceId !== undefined)
|
||||
.filter((ingredient) => !activeRefs.includes(ingredient.referenceId as string));
|
||||
|
||||
const allMatchedIngredientIds: string[] = text
|
||||
.toLowerCase()
|
||||
.split(/\s/)
|
||||
.map(normalize)
|
||||
.filter((word) => word.length > 2)
|
||||
.filter((word) => !isBlackListedWord(word))
|
||||
.flatMap((word) => availableIngredients.filter((ingredient) => ingredientMatchesWord(ingredient, word, recipeIngredientAmountsDisabled)))
|
||||
.map((ingredient) => ingredient.referenceId as string);
|
||||
// deduplicate
|
||||
|
||||
return new Set<string>(allMatchedIngredientIds)
|
||||
|
||||
}
|
Loading…
Reference in a new issue