Complete path import

This commit is contained in:
Gabriel Augendre 2022-10-29 11:25:21 +02:00
parent f601b71928
commit bc0c6108e4

View file

@ -1,5 +1,6 @@
from django.core.management import BaseCommand from django.core.management import BaseCommand
from selenium import webdriver from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from character.models import Path, Profile, Race from character.models import Path, Profile, Race
@ -10,9 +11,13 @@ class Command(BaseCommand):
url = "https://www.co-drs.org/fr/jeu/voies" url = "https://www.co-drs.org/fr/jeu/voies"
self.setup_selenium() self.setup_selenium()
self.selenium.get(url) self.selenium.get(url)
anchors = self.selenium.find_elements( anchors = []
By.CSS_SELECTOR, ".card-body .card-title a" expected_path_count = 95
) while len(anchors) < expected_path_count:
self.selenium.find_element(By.TAG_NAME, "body").send_keys(Keys.END)
anchors = self.selenium.find_elements(
By.CSS_SELECTOR, ".card-body .card-title a"
)
urls = [anchor.get_attribute("href") for anchor in anchors] urls = [anchor.get_attribute("href") for anchor in anchors]
for url in urls: for url in urls:
try: try:
@ -23,18 +28,27 @@ class Command(BaseCommand):
def import_path(self, url: str): def import_path(self, url: str):
self.selenium.get(url) self.selenium.get(url)
name = self.selenium.find_element(By.TAG_NAME, "h1").text name = self.selenium.find_element(By.TAG_NAME, "h1").text.replace("", "'")
category = self.get_category(name) if name == "Voie du haut-elfe": # Fix for incorrect data
category = Path.Category.RACE
else:
category = self.get_category(name)
profile = None profile = None
if category == Path.Category.PROFILE: if category == Path.Category.PROFILE:
profile = self.get_profile() profile = self.get_profile(name)
race = None race = None
if category == Path.Category.RACE: if category == Path.Category.RACE:
profile = self.get_race(name) race = self.get_race(name)
notes = self.get_notes(name)
path, _ = Path.objects.update_or_create( path, _ = Path.objects.update_or_create(
name=name, name=name,
defaults={"category": category, "profile": profile, "race": race}, defaults={
"category": category,
"profile": profile,
"race": race,
"notes": notes,
},
) )
self.stdout.write(self.style.SUCCESS(f"Created/updated path {path}")) self.stdout.write(self.style.SUCCESS(f"Created/updated path {path}"))
@ -61,18 +75,32 @@ class Command(BaseCommand):
return Path.Category.CREATURE return Path.Category.CREATURE
return Path.Category(category) return Path.Category(category)
def get_profile(self) -> Profile: def get_profile(self, name: str) -> Profile | None:
profile_name = self.selenium.find_element( try:
By.CSS_SELECTOR, ".field--name-type + strong + a" profile_name = self.selenium.find_element(
).text By.CSS_SELECTOR, ".field--name-type + strong + a"
return Profile.objects.get_by_natural_key(profile_name) ).text
return Profile.objects.get_by_natural_key(profile_name)
except Exception:
self.stdout.write(self.style.WARNING(f"Couldn't find profile for {name}"))
def get_race(self, path_name: str) -> Race: def get_race(self, path_name: str) -> Race | None:
return None to_remove = ["voie de la", "voie de l'", "voie du"]
to_remove = [""] path_name = path_name.lower()
for text in to_remove: for text in to_remove:
path_name = path_name.replace(text, "") path_name = path_name.replace(text, "")
return Race.objects.get(name__iexact=path_name) try:
return Race.objects.get(name__iexact=path_name.strip())
except Exception:
self.stdout.write(self.style.WARNING(f"Couldn't find race for {path_name}"))
def get_notes(self, path_name: str) -> str:
try:
return self.selenium.find_element(
By.CLASS_NAME, "field--name-description"
).text.strip()
except Exception:
return ""
def setup_selenium(self): def setup_selenium(self):
options = webdriver.FirefoxOptions() options = webdriver.FirefoxOptions()