diff --git a/poetry.lock b/poetry.lock index 489344e..db3c93b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -296,14 +296,6 @@ gevent = ["gevent (>=1.4.0)"] setproctitle = ["setproctitle"] tornado = ["tornado (>=0.2)"] -[[package]] -name = "html2text" -version = "2020.1.16" -description = "Turn HTML into equivalent Markdown-structured text." -category = "main" -optional = false -python-versions = ">=3.5" - [[package]] name = "identify" version = "2.4.0" @@ -844,7 +836,7 @@ multidict = ">=4.0" [metadata] lock-version = "1.1" python-versions = "^3.10" -content-hash = "30053d4662f7e86ae956249a199beeaed34b338fe420d09ad2db0ae4f9d2d8bd" +content-hash = "de3fbd11622b6f872fc18cd21b78878fe8e0b3e8faa54527400fe98f0ecb6e34" [metadata.files] asgiref = [ @@ -1015,10 +1007,6 @@ gunicorn = [ {file = "gunicorn-20.1.0-py3-none-any.whl", hash = "sha256:9dcc4547dbb1cb284accfb15ab5667a0e5d1881cc443e0677b4882a4067a807e"}, {file = "gunicorn-20.1.0.tar.gz", hash = "sha256:e0a968b5ba15f8a328fdfd7ab1fcb5af4470c28aaf7e55df02a99bc13138e6e8"}, ] -html2text = [ - {file = "html2text-2020.1.16-py3-none-any.whl", hash = "sha256:c7c629882da0cf377d66f073329ccf34a12ed2adf0169b9285ae4e63ef54c82b"}, - {file = "html2text-2020.1.16.tar.gz", hash = "sha256:e296318e16b059ddb97f7a8a1d6a5c1d7af4544049a01e261731d2d5cc277bbb"}, -] identify = [ {file = "identify-2.4.0-py2.py3-none-any.whl", hash = "sha256:eba31ca80258de6bb51453084bff4a923187cd2193b9c13710f2516ab30732cc"}, {file = "identify-2.4.0.tar.gz", hash = "sha256:a33ae873287e81651c7800ca309dc1f84679b763c9c8b30680e16fbfa82f0107"}, diff --git a/pyproject.toml b/pyproject.toml index 53ceea6..3925a47 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,6 @@ django-anymail = {version = "^8.4", extras = ["mailgun"]} pillow = "^8.4" django-cleanup = "^5.0" requests = "^2.24" -html2text = "^2020.1.16" readtime = "^1.1.1" pylibmc = "^1.6.1" django-debug-toolbar = "^3.2" @@ -23,6 +22,7 @@ whitenoise = {extras = ["brotli"], version = "^5.2.0"} rcssmin = "^1.0.6" django-csp = "^3.7" django-two-factor-auth = {extras = ["phonenumberslite"], git = "https://github.com/Bouke/django-two-factor-auth.git", rev = "ffe4422e"} +beautifulsoup4 = "^4.10.0" [tool.poetry.dev-dependencies] pre-commit = "^2.7" diff --git a/src/articles/models.py b/src/articles/models.py index a5b9aa0..90c584c 100644 --- a/src/articles/models.py +++ b/src/articles/models.py @@ -4,6 +4,7 @@ from functools import cached_property import rcssmin import readtime +from bs4 import BeautifulSoup from django.conf import settings from django.contrib.auth.models import AbstractUser from django.contrib.contenttypes.models import ContentType @@ -16,7 +17,6 @@ from django.utils import timezone from articles.utils import ( build_full_absolute_url, format_article_content, - get_html_to_text_converter, truncate_words_after_char_count, ) @@ -88,8 +88,8 @@ class Article(models.Model): @cached_property def get_description(self): html = self.get_formatted_content - converter = get_html_to_text_converter() - text = converter.handle(html) + bs = BeautifulSoup(html, "html.parser") + text = bs.find("p", recursive=False).text return truncate_words_after_char_count(text, 160) @cached_property diff --git a/src/articles/utils.py b/src/articles/utils.py index 4ee6af4..0c8e954 100644 --- a/src/articles/utils.py +++ b/src/articles/utils.py @@ -1,6 +1,5 @@ import re -import html2text import markdown from django.conf import settings from markdown.extensions.codehilite import CodeHiliteExtension @@ -37,12 +36,3 @@ def truncate_words_after_char_count(text, char_count): text_result.append(word) total_length += len(word) + 1 return " ".join(text_result) + "..." - - -def get_html_to_text_converter(): - converter = html2text.HTML2Text() - converter.ignore_images = True - converter.ignore_links = True - converter.ignore_tables = True - converter.ignore_emphasis = True - return converter