Fix empty description

This happened with articles starting with an image. We now fetch the first paragraph containing some text.
2021-12-28 10:52:25 +01:00 · 2021-12-28 10:52:25 +01:00 · ed4c290ba5
commit ed4c290ba5
parent 01bf989c5d
2 changed files with 16 additions and 3 deletions
--- a/src/articles/models.py
+++ b/src/articles/models.py
@ -4,7 +4,6 @@ from functools import cached_property
 import rcssmin
 import readtime
 from bs4 import BeautifulSoup
 from django.conf import settings
 from django.contrib.auth.models import AbstractUser
 from django.contrib.contenttypes.models import ContentType
@ -16,6 +15,7 @@ from django.utils import timezone
 from articles.utils import (
    build_full_absolute_url,
    find_first_paragraph_with_text,
    format_article_content,
    truncate_words_after_char_count,
 )
@ -92,8 +92,7 @@ class Article(models.Model):
    @cached_property
    def get_description(self):
        html = self.get_formatted_content
-        bs = BeautifulSoup(html, "html.parser")
+        text = find_first_paragraph_with_text(html)
        text = bs.find("p", recursive=False).text
        return truncate_words_after_char_count(text, 160)
    @cached_property
--- a/src/articles/utils.py
+++ b/src/articles/utils.py
@ -1,6 +1,7 @@
 import re
 import markdown
 from bs4 import BeautifulSoup
 from django.conf import settings
 from markdown.extensions.codehilite import CodeHiliteExtension
@ -36,3 +37,16 @@ def truncate_words_after_char_count(text, char_count):
        text_result.append(word)
        total_length += len(word) + 1
    return " ".join(text_result) + "..."
 def find_first_paragraph_with_text(html):
    bs = BeautifulSoup(html, "html.parser")
    paragraph = bs.find("p", recursive=False)
    text = paragraph.text.strip()
    while not text:
        try:
            paragraph = paragraph.next_sibling
            text = paragraph.text.strip()
        except Exception:
            break
    return text