From ed4c290ba5bcdcaba4acbca3e2496ef9f2239054 Mon Sep 17 00:00:00 2001 From: Gabriel Augendre Date: Tue, 28 Dec 2021 10:52:25 +0100 Subject: [PATCH] Fix empty description This happened with articles starting with an image. We now fetch the first paragraph containing some text. --- src/articles/models.py | 5 ++--- src/articles/utils.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/articles/models.py b/src/articles/models.py index c148275..a9dc88f 100644 --- a/src/articles/models.py +++ b/src/articles/models.py @@ -4,7 +4,6 @@ from functools import cached_property import rcssmin import readtime -from bs4 import BeautifulSoup from django.conf import settings from django.contrib.auth.models import AbstractUser from django.contrib.contenttypes.models import ContentType @@ -16,6 +15,7 @@ from django.utils import timezone from articles.utils import ( build_full_absolute_url, + find_first_paragraph_with_text, format_article_content, truncate_words_after_char_count, ) @@ -92,8 +92,7 @@ class Article(models.Model): @cached_property def get_description(self): html = self.get_formatted_content - bs = BeautifulSoup(html, "html.parser") - text = bs.find("p", recursive=False).text + text = find_first_paragraph_with_text(html) return truncate_words_after_char_count(text, 160) @cached_property diff --git a/src/articles/utils.py b/src/articles/utils.py index 0c8e954..224b84c 100644 --- a/src/articles/utils.py +++ b/src/articles/utils.py @@ -1,6 +1,7 @@ import re import markdown +from bs4 import BeautifulSoup from django.conf import settings from markdown.extensions.codehilite import CodeHiliteExtension @@ -36,3 +37,16 @@ def truncate_words_after_char_count(text, char_count): text_result.append(word) total_length += len(word) + 1 return " ".join(text_result) + "..." + + +def find_first_paragraph_with_text(html): + bs = BeautifulSoup(html, "html.parser") + paragraph = bs.find("p", recursive=False) + text = paragraph.text.strip() + while not text: + try: + paragraph = paragraph.next_sibling + text = paragraph.text.strip() + except Exception: + break + return text