2022-10-29 11:03:23 +02:00
|
|
|
|
from django.core.management import BaseCommand
|
|
|
|
|
from selenium import webdriver
|
2022-10-29 11:25:21 +02:00
|
|
|
|
from selenium.webdriver import Keys
|
2022-10-29 11:03:23 +02:00
|
|
|
|
from selenium.webdriver.common.by import By
|
|
|
|
|
|
|
|
|
|
from character.models import Path, Profile, Race
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Command(BaseCommand):
|
2023-01-29 10:38:41 +01:00
|
|
|
|
def handle(self, *args, **options) -> None: # noqa: ARG002
|
2022-10-29 11:03:23 +02:00
|
|
|
|
url = "https://www.co-drs.org/fr/jeu/voies"
|
|
|
|
|
self.setup_selenium()
|
|
|
|
|
self.selenium.get(url)
|
2022-10-29 11:25:21 +02:00
|
|
|
|
anchors = []
|
|
|
|
|
expected_path_count = 95
|
|
|
|
|
while len(anchors) < expected_path_count:
|
|
|
|
|
self.selenium.find_element(By.TAG_NAME, "body").send_keys(Keys.END)
|
|
|
|
|
anchors = self.selenium.find_elements(
|
2023-02-28 12:34:45 +01:00
|
|
|
|
By.CSS_SELECTOR,
|
|
|
|
|
".card-body .card-title a",
|
2022-10-29 11:25:21 +02:00
|
|
|
|
)
|
2022-10-29 11:03:23 +02:00
|
|
|
|
urls = [anchor.get_attribute("href") for anchor in anchors]
|
|
|
|
|
for url in urls:
|
|
|
|
|
try:
|
|
|
|
|
self.import_path(url)
|
|
|
|
|
except Exception as e:
|
2023-01-29 10:38:41 +01:00
|
|
|
|
self.stderr.write(f"{type(e)}: {e}")
|
2022-10-29 11:03:23 +02:00
|
|
|
|
self.stdout.write(f"Finished processing {len(urls)} paths.")
|
|
|
|
|
|
|
|
|
|
def import_path(self, url: str):
|
|
|
|
|
self.selenium.get(url)
|
2022-10-29 11:25:21 +02:00
|
|
|
|
name = self.selenium.find_element(By.TAG_NAME, "h1").text.replace("’", "'")
|
|
|
|
|
if name == "Voie du haut-elfe": # Fix for incorrect data
|
|
|
|
|
category = Path.Category.RACE
|
|
|
|
|
else:
|
|
|
|
|
category = self.get_category(name)
|
2022-10-29 11:03:23 +02:00
|
|
|
|
profile = None
|
|
|
|
|
if category == Path.Category.PROFILE:
|
2022-10-29 11:25:21 +02:00
|
|
|
|
profile = self.get_profile(name)
|
2022-10-29 11:03:23 +02:00
|
|
|
|
race = None
|
|
|
|
|
if category == Path.Category.RACE:
|
2022-10-29 11:25:21 +02:00
|
|
|
|
race = self.get_race(name)
|
2022-10-30 16:14:57 +01:00
|
|
|
|
notes = self.get_notes()
|
2022-10-29 11:03:23 +02:00
|
|
|
|
|
|
|
|
|
path, _ = Path.objects.update_or_create(
|
|
|
|
|
name=name,
|
2022-10-29 11:25:21 +02:00
|
|
|
|
defaults={
|
|
|
|
|
"category": category,
|
|
|
|
|
"profile": profile,
|
|
|
|
|
"race": race,
|
|
|
|
|
"notes": notes,
|
2022-10-30 09:44:28 +01:00
|
|
|
|
"url": url,
|
2022-10-29 11:25:21 +02:00
|
|
|
|
},
|
2022-10-29 11:03:23 +02:00
|
|
|
|
)
|
|
|
|
|
self.stdout.write(self.style.SUCCESS(f"Created/updated path {path}"))
|
|
|
|
|
|
|
|
|
|
def get_category(self, name: str) -> Path.Category | None:
|
|
|
|
|
try:
|
|
|
|
|
category = (
|
|
|
|
|
self.selenium.find_element(
|
2023-02-28 12:34:45 +01:00
|
|
|
|
By.CSS_SELECTOR,
|
|
|
|
|
".field--name-type .field__item",
|
2022-10-29 11:03:23 +02:00
|
|
|
|
)
|
|
|
|
|
.text.lower()
|
|
|
|
|
.strip()
|
|
|
|
|
)
|
|
|
|
|
except Exception:
|
|
|
|
|
self.stdout.write(
|
|
|
|
|
self.style.WARNING(
|
2023-02-28 12:34:45 +01:00
|
|
|
|
f"Couldn't find category for {name}. Defaulting to profile.",
|
|
|
|
|
),
|
2022-10-29 11:03:23 +02:00
|
|
|
|
)
|
|
|
|
|
return Path.Category.PROFILE
|
|
|
|
|
|
|
|
|
|
if category == "personnage":
|
|
|
|
|
return Path.Category.PROFILE
|
|
|
|
|
if category == "créature":
|
|
|
|
|
return Path.Category.CREATURE
|
|
|
|
|
return Path.Category(category)
|
|
|
|
|
|
2022-10-29 11:25:21 +02:00
|
|
|
|
def get_profile(self, name: str) -> Profile | None:
|
|
|
|
|
try:
|
|
|
|
|
profile_name = self.selenium.find_element(
|
2023-02-28 12:34:45 +01:00
|
|
|
|
By.CSS_SELECTOR,
|
|
|
|
|
".field--name-type + strong + a",
|
2022-10-29 11:25:21 +02:00
|
|
|
|
).text
|
|
|
|
|
except Exception:
|
|
|
|
|
self.stdout.write(self.style.WARNING(f"Couldn't find profile for {name}"))
|
2023-01-29 10:38:41 +01:00
|
|
|
|
else:
|
|
|
|
|
return Profile.objects.get_by_natural_key(profile_name)
|
2022-10-29 11:03:23 +02:00
|
|
|
|
|
2022-10-29 11:25:21 +02:00
|
|
|
|
def get_race(self, path_name: str) -> Race | None:
|
|
|
|
|
to_remove = ["voie de la", "voie de l'", "voie du"]
|
|
|
|
|
path_name = path_name.lower()
|
2022-10-29 11:03:23 +02:00
|
|
|
|
for text in to_remove:
|
|
|
|
|
path_name = path_name.replace(text, "")
|
2022-10-29 11:25:21 +02:00
|
|
|
|
try:
|
|
|
|
|
return Race.objects.get(name__iexact=path_name.strip())
|
|
|
|
|
except Exception:
|
|
|
|
|
self.stdout.write(self.style.WARNING(f"Couldn't find race for {path_name}"))
|
|
|
|
|
|
2022-10-30 16:14:57 +01:00
|
|
|
|
def get_notes(self) -> str:
|
2022-10-29 11:25:21 +02:00
|
|
|
|
try:
|
|
|
|
|
return self.selenium.find_element(
|
2023-02-28 12:34:45 +01:00
|
|
|
|
By.CSS_SELECTOR,
|
|
|
|
|
".mt-3 > .field--name-description",
|
2022-10-29 11:25:21 +02:00
|
|
|
|
).text.strip()
|
|
|
|
|
except Exception:
|
|
|
|
|
return ""
|
2022-10-29 11:03:23 +02:00
|
|
|
|
|
|
|
|
|
def setup_selenium(self):
|
|
|
|
|
options = webdriver.FirefoxOptions()
|
|
|
|
|
options.add_argument("-headless")
|
|
|
|
|
self.selenium = webdriver.Firefox(options=options)
|