Fix empty description

This happened with articles starting with an image.
We now fetch the first paragraph containing some text.
This commit is contained in:
Gabriel Augendre 2021-12-28 10:52:25 +01:00
parent 01bf989c5d
commit ed4c290ba5
2 changed files with 16 additions and 3 deletions

View file

@ -4,7 +4,6 @@ from functools import cached_property
import rcssmin import rcssmin
import readtime import readtime
from bs4 import BeautifulSoup
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import AbstractUser from django.contrib.auth.models import AbstractUser
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
@ -16,6 +15,7 @@ from django.utils import timezone
from articles.utils import ( from articles.utils import (
build_full_absolute_url, build_full_absolute_url,
find_first_paragraph_with_text,
format_article_content, format_article_content,
truncate_words_after_char_count, truncate_words_after_char_count,
) )
@ -92,8 +92,7 @@ class Article(models.Model):
@cached_property @cached_property
def get_description(self): def get_description(self):
html = self.get_formatted_content html = self.get_formatted_content
bs = BeautifulSoup(html, "html.parser") text = find_first_paragraph_with_text(html)
text = bs.find("p", recursive=False).text
return truncate_words_after_char_count(text, 160) return truncate_words_after_char_count(text, 160)
@cached_property @cached_property

View file

@ -1,6 +1,7 @@
import re import re
import markdown import markdown
from bs4 import BeautifulSoup
from django.conf import settings from django.conf import settings
from markdown.extensions.codehilite import CodeHiliteExtension from markdown.extensions.codehilite import CodeHiliteExtension
@ -36,3 +37,16 @@ def truncate_words_after_char_count(text, char_count):
text_result.append(word) text_result.append(word)
total_length += len(word) + 1 total_length += len(word) + 1
return " ".join(text_result) + "..." return " ".join(text_result) + "..."
def find_first_paragraph_with_text(html):
bs = BeautifulSoup(html, "html.parser")
paragraph = bs.find("p", recursive=False)
text = paragraph.text.strip()
while not text:
try:
paragraph = paragraph.next_sibling
text = paragraph.text.strip()
except Exception:
break
return text