feat: improve automatic ingredient linking (#1836)

* Filtering special characters during automatic linking of ingredients to instructions

Used a unicode group to have a set of all unicode punctuation marks

* allowing for linking of ingredients to instruction at the beginning of a newline in the instruction

* Extracted ingredient matching into a composable and added tests. Ignoring 2 letter words to avoid false matches.

While testing the code 2 letter matches were a large source of false positives.
This commit is contained in:
jenscalaerts 2022-12-29 23:00:31 +01:00 committed by GitHub
parent 19ae89a195
commit 83b8ce659e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 135 additions and 44 deletions

View file

@ -233,6 +233,7 @@ import { parseIngredientText } from "~/composables/recipes";
import { uuid4, detectServerBaseUrl } from "~/composables/use-utils";
import { useUserApi, useStaticRoutes } from "~/composables/api";
import { usePageState } from "~/composables/recipe-page/shared-state";
import { useExtractIngredientReferences } from "~/composables/recipe-page/use-extract-ingredient-references";
import { NoUndefinedField } from "~/lib/api/types/non-generated";
import DropZone from "~/components/global/DropZone.vue";
@ -427,50 +428,12 @@ export default defineComponent({
}
function autoSetReferences() {
// Ignore matching blacklisted words when auto-linking - This is kind of a cludgey implementation. We're blacklisting common words but
// other common phrases trigger false positives and I'm not sure how else to approach this. In the future I maybe look at looking directly
// at the food variable and seeing if the food is in the instructions, but I still need to support those who don't want to provide the value
// and only use the "notes" feature.
const blackListedText = [
"and",
"or",
"the",
"a",
"an",
"of",
"in",
"on",
"to",
"for",
"by",
"with",
"without",
"",
" ",
];
const blackListedRegexMatch = /\d/gm; // Match Any Number
// Check if any of the words in the active text match the ingredient text
const instructionsByWord = activeText.value.toLowerCase().split(" ");
instructionsByWord.forEach((word) => {
if (blackListedText.includes(word) || word.match(blackListedRegexMatch)) {
return;
}
props.recipe.recipeIngredient.forEach((ingredient) => {
const searchText = parseIngredientText(ingredient, props.recipe.settings.disableAmount);
if (ingredient.referenceId === undefined) {
return;
}
if (searchText.toLowerCase().includes(" " + word) && !activeRefs.value.includes(ingredient.referenceId)) {
console.info("Word Matched", `'${word}'`, ingredient.note);
activeRefs.value.push(ingredient.referenceId);
}
});
});
useExtractIngredientReferences(
props.recipe.recipeIngredient,
activeRefs.value,
activeText.value,
props.recipe.settings.disableAmount
).forEach((ingredient: string) => activeRefs.value.push(ingredient));
}
const ingredientLookup = computed(() => {

View file

@ -0,0 +1,68 @@
import { describe, expect, test } from "vitest";
import { useExtractIngredientReferences } from "./use-extract-ingredient-references";
const punctuationMarks = ["*", "?", "/", "!", "**", "&", "."];
describe("test use extract ingredient references", () => {
test("when text empty return empty", () => {
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "", true)
expect(result).toStrictEqual(new Set());
});
test("when and ingredient matches exactly and has a reference id, return the referenceId", () => {
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "A sentence containing Onion", true);
expect(result).toEqual(new Set(["123"]));
});
test.each(punctuationMarks)("when ingredient is suffixed by punctuation, return the referenceId", (suffix) => {
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "A sentence containing Onion" + suffix, true);
expect(result).toEqual(new Set(["123"]));
});
test.each(punctuationMarks)("when ingredient is prefixed by punctuation, return the referenceId", (prefix) => {
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "A sentence containing " + prefix + "Onion", true);
expect(result).toEqual(new Set(["123"]));
});
test("when ingredient is first on a multiline, return the referenceId", () => {
const multilineSting = "lksjdlk\nOnion"
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], multilineSting, true);
expect(result).toEqual(new Set(["123"]));
});
test("when the ingredient matches partially exactly and has a reference id, return the referenceId", () => {
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "A sentence containing Onions", true);
expect(result).toEqual(new Set(["123"]));
});
test("when the ingredient matches with different casing and has a reference id, return the referenceId", () => {
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "A sentence containing oNions", true);
expect(result).toEqual(new Set(["123"]));
});
test("when no ingredients, return empty", () => {
const result = useExtractIngredientReferences([], [], "A sentence containing oNions", true);
expect(result).toEqual(new Set());
});
test("when and ingredient matches but in the existing referenceIds, do not return the referenceId", () => {
const result = useExtractIngredientReferences([{ note: "Onion", referenceId: "123" }], ["123"], "A sentence containing Onion", true);
expect(result).toEqual(new Set());
});
test("when an word is 2 letter of shorter, it is ignored", () => {
const result = useExtractIngredientReferences([{ note: "Onion", referenceId: "123" }], [], "A sentence containing On", true);
expect(result).toEqual(new Set());
})
});

View file

@ -0,0 +1,60 @@
import { RecipeIngredient } from "~/lib/api/types/recipe";
import { parseIngredientText } from "~/composables/recipes";
function normalize(word: string): string {
let normalizing = word;
normalizing = removeTrailingPunctuation(normalizing);
normalizing = removeStartingPunctuation(normalizing);
return normalizing;
}
function removeTrailingPunctuation(word: string): string {
const punctuationAtEnding = /\p{P}+$/u;
return word.replace(punctuationAtEnding, "");
}
function removeStartingPunctuation(word: string): string {
const punctuationAtBeginning = /^\p{P}+/u;
return word.replace(punctuationAtBeginning, "");
}
function ingredientMatchesWord(ingredient: RecipeIngredient, word: string, recipeIngredientAmountsDisabled: boolean) {
const searchText = parseIngredientText(ingredient, recipeIngredientAmountsDisabled);
return searchText.toLowerCase().includes(word.toLowerCase());
}
function isBlackListedWord(word: string) {
// Ignore matching blacklisted words when auto-linking - This is kind of a cludgey implementation. We're blacklisting common words but
// other common phrases trigger false positives and I'm not sure how else to approach this. In the future I maybe look at looking directly
// at the food variable and seeing if the food is in the instructions, but I still need to support those who don't want to provide the value
// and only use the "notes" feature.
const blackListedText: string[] = [
"and",
"the",
"for",
"with",
"without"
];
const blackListedRegexMatch = /\d/gm; // Match Any Number
return blackListedText.includes(word) || word.match(blackListedRegexMatch);
}
export function useExtractIngredientReferences(recipeIngredients: RecipeIngredient[], activeRefs: string[], text: string, recipeIngredientAmountsDisabled: boolean): Set<string> {
const availableIngredients = recipeIngredients
.filter((ingredient) => ingredient.referenceId !== undefined)
.filter((ingredient) => !activeRefs.includes(ingredient.referenceId as string));
const allMatchedIngredientIds: string[] = text
.toLowerCase()
.split(/\s/)
.map(normalize)
.filter((word) => word.length > 2)
.filter((word) => !isBlackListedWord(word))
.flatMap((word) => availableIngredients.filter((ingredient) => ingredientMatchesWord(ingredient, word, recipeIngredientAmountsDisabled)))
.map((ingredient) => ingredient.referenceId as string);
// deduplicate
return new Set<string>(allMatchedIngredientIds)
}