Fix bug in instruction normalization

This commit is contained in:
Richard Mitic 2021-01-06 18:37:35 +01:00
parent de17085e04
commit 8ab1bdeb4a
2 changed files with 29 additions and 44 deletions

View file

@ -28,7 +28,7 @@ def normalize_image_url(image) -> str:
def normalize_instructions(instructions) -> List[dict]:
# One long string split by (possibly multiple) new lines
if type(instructions) == str:
return [{"text": line.strip()} for line in filter(None, instructions.split("\n"))]
return [{"text": line.strip()} for line in filter(None, instructions.splitlines())]
# Plain strings in a list
elif type(instructions) == list and type(instructions[0]) == str:

View file

@ -7,48 +7,33 @@ from services.scrape_services import normalize_data, normalize_instructions
CWD = Path(__file__).parent
RAW_RECIPE_DIR = CWD.joinpath("data", "recipes-raw")
def pytest_generate_tests(metafunc):
# called once per each test function
funcarglist = metafunc.cls.params[metafunc.function.__name__]
argnames = sorted(funcarglist[0])
metafunc.parametrize(
argnames, [[funcargs[name] for name in argnames] for funcargs in funcarglist]
)
@pytest.mark.parametrize("json_file,num_steps", [
("best-homemade-salsa-recipe.json", 2),
("blue-cheese-stuffed-turkey-meatballs-with-raspberry-balsamic-glaze-2.json", 3),
("bon_appetit.json", 8),
("chunky-apple-cake.json", 4),
("dairy-free-impossible-pumpkin-pie.json", 7),
("how-to-make-instant-pot-spaghetti.json", 8),
("instant-pot-chicken-and-potatoes.json", 4),
("instant-pot-kerala-vegetable-stew.json", 13),
("jalapeno-popper-dip.json", 4),
("microwave_sweet_potatoes_04783.json", 4),
("moroccan-skirt-steak-with-roasted-pepper-couscous.json", 4),
("Pizza-Knoblauch-Champignon-Paprika-vegan.html.json", 3),
])
def test_normalize_data(json_file, num_steps):
recipe_data = normalize_data(json.load(open(RAW_RECIPE_DIR.joinpath(json_file))))
assert len(recipe_data["recipeInstructions"]) == num_steps
def raw_recipe_info(file_name: str, num_steps: int) -> dict:
return {"json_file": RAW_RECIPE_DIR.joinpath(file_name), "num_steps": num_steps}
class TestScraper:
# a map specifying multiple argument sets for a test method
params = {
"test_normalize_instructions": [
dict(instructions="A\n\nB\n\nC\n\n"),
dict(instructions=["A","B","C"]),
dict(instructions=[{"@type": "HowToStep", "text": "A"},
{"@type": "HowToStep", "text": "B"},
{"@type": "HowToStep", "text": "C"}]),
],
"test_normalize_data": [
raw_recipe_info("best-homemade-salsa-recipe.json", 2),
raw_recipe_info("blue-cheese-stuffed-turkey-meatballs-with-raspberry-balsamic-glaze-2.json", 3),
raw_recipe_info("bon_appetit.json", 8),
raw_recipe_info("chunky-apple-cake.json", 4),
raw_recipe_info("dairy-free-impossible-pumpkin-pie.json", 7),
raw_recipe_info("how-to-make-instant-pot-spaghetti.json", 8),
raw_recipe_info("instant-pot-chicken-and-potatoes.json", 4),
raw_recipe_info("instant-pot-kerala-vegetable-stew.json", 13),
raw_recipe_info("jalapeno-popper-dip.json", 4),
raw_recipe_info("microwave_sweet_potatoes_04783.json", 4),
raw_recipe_info("moroccan-skirt-steak-with-roasted-pepper-couscous.json", 4),
raw_recipe_info("Pizza-Knoblauch-Champignon-Paprika-vegan.html.json", 5),
]
}
def test_normalize_data(self, json_file, num_steps):
recipe_data = normalize_data(json.load(open(json_file)))
assert len(recipe_data["recipeInstructions"]) == num_steps
def test_normalize_instructions(self, instructions):
assert normalize_instructions(instructions) == [{"text": "A"}, {"text": "B"}, {"text": "C"}]
@pytest.mark.parametrize("instructions", [
"A\n\nB\n\nC\n\n",
"A\nB\nC\n",
"A\r\n\r\nB\r\n\r\nC\r\n\r\n",
"A\r\nB\r\nC\r\n",
["A","B","C"],
[{"@type": "HowToStep", "text": x} for x in ["A","B","C"]]
])
def test_normalize_instructions(instructions):
assert normalize_instructions(instructions) == [{"text": "A"}, {"text": "B"}, {"text": "C"}]