From bc0c6108e4ab2094efac10d80ae5408214ecb062 Mon Sep 17 00:00:00 2001 From: Gabriel Augendre Date: Sat, 29 Oct 2022 11:25:21 +0200 Subject: [PATCH] Complete path import --- .../management/commands/import_paths.py | 62 ++++++++++++++----- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/src/character/management/commands/import_paths.py b/src/character/management/commands/import_paths.py index 532d052..a7c0588 100644 --- a/src/character/management/commands/import_paths.py +++ b/src/character/management/commands/import_paths.py @@ -1,5 +1,6 @@ from django.core.management import BaseCommand from selenium import webdriver +from selenium.webdriver import Keys from selenium.webdriver.common.by import By from character.models import Path, Profile, Race @@ -10,9 +11,13 @@ class Command(BaseCommand): url = "https://www.co-drs.org/fr/jeu/voies" self.setup_selenium() self.selenium.get(url) - anchors = self.selenium.find_elements( - By.CSS_SELECTOR, ".card-body .card-title a" - ) + anchors = [] + expected_path_count = 95 + while len(anchors) < expected_path_count: + self.selenium.find_element(By.TAG_NAME, "body").send_keys(Keys.END) + anchors = self.selenium.find_elements( + By.CSS_SELECTOR, ".card-body .card-title a" + ) urls = [anchor.get_attribute("href") for anchor in anchors] for url in urls: try: @@ -23,18 +28,27 @@ class Command(BaseCommand): def import_path(self, url: str): self.selenium.get(url) - name = self.selenium.find_element(By.TAG_NAME, "h1").text - category = self.get_category(name) + name = self.selenium.find_element(By.TAG_NAME, "h1").text.replace("’", "'") + if name == "Voie du haut-elfe": # Fix for incorrect data + category = Path.Category.RACE + else: + category = self.get_category(name) profile = None if category == Path.Category.PROFILE: - profile = self.get_profile() + profile = self.get_profile(name) race = None if category == Path.Category.RACE: - profile = self.get_race(name) + race = self.get_race(name) + notes = self.get_notes(name) path, _ = Path.objects.update_or_create( name=name, - defaults={"category": category, "profile": profile, "race": race}, + defaults={ + "category": category, + "profile": profile, + "race": race, + "notes": notes, + }, ) self.stdout.write(self.style.SUCCESS(f"Created/updated path {path}")) @@ -61,18 +75,32 @@ class Command(BaseCommand): return Path.Category.CREATURE return Path.Category(category) - def get_profile(self) -> Profile: - profile_name = self.selenium.find_element( - By.CSS_SELECTOR, ".field--name-type + strong + a" - ).text - return Profile.objects.get_by_natural_key(profile_name) + def get_profile(self, name: str) -> Profile | None: + try: + profile_name = self.selenium.find_element( + By.CSS_SELECTOR, ".field--name-type + strong + a" + ).text + return Profile.objects.get_by_natural_key(profile_name) + except Exception: + self.stdout.write(self.style.WARNING(f"Couldn't find profile for {name}")) - def get_race(self, path_name: str) -> Race: - return None - to_remove = [""] + def get_race(self, path_name: str) -> Race | None: + to_remove = ["voie de la", "voie de l'", "voie du"] + path_name = path_name.lower() for text in to_remove: path_name = path_name.replace(text, "") - return Race.objects.get(name__iexact=path_name) + try: + return Race.objects.get(name__iexact=path_name.strip()) + except Exception: + self.stdout.write(self.style.WARNING(f"Couldn't find race for {path_name}")) + + def get_notes(self, path_name: str) -> str: + try: + return self.selenium.find_element( + By.CLASS_NAME, "field--name-description" + ).text.strip() + except Exception: + return "" def setup_selenium(self): options = webdriver.FirefoxOptions()