python-blog/src/articles/utils.py

import re

import markdown
from bs4 import BeautifulSoup
from django.conf import settings
from markdown.extensions.codehilite import CodeHiliteExtension
from markdown.extensions.toc import TocExtension

from articles.markdown import LazyLoadingImageExtension


def build_full_absolute_url(request, url):
    if request:
        return request.build_absolute_uri(url)
    else:
        return (settings.BLOG["base_url"] + url)[::-1].replace("//", "/", 1)[::-1]


def format_article_content(content):
    md = markdown.Markdown(
        extensions=[
            "extra",
            "admonition",
            TocExtension(anchorlink=True),
            CodeHiliteExtension(linenums=False, guess_lang=False),
            LazyLoadingImageExtension(),
        ]
    )
    content = re.sub(r"(\s)#(\w+)", r"\1\#\2", content)
    return md.convert(content)


def truncate_words_after_char_count(text, char_count):
    total_length = 0
    text_result = []
    for word in text.split():
        if len(word) + 1 + total_length > char_count:
            break
        text_result.append(word)
        total_length += len(word) + 1
    return " ".join(text_result) + "..."


def find_first_paragraph_with_text(html):
    bs = BeautifulSoup(html, "html.parser")
    paragraph = bs.find("p", recursive=False)
    text = paragraph.text.strip()
    while not text:
        try:
            paragraph = paragraph.next_sibling
            text = paragraph.text.strip()
        except Exception:
            break
    return text
Refactor articles model 2021-01-03 21:51:46 +01:00			`import re`

			`import markdown`
Fix empty description This happened with articles starting with an image. We now fetch the first paragraph containing some text. 2021-12-28 10:52:25 +01:00			`from bs4 import BeautifulSoup`
Use an absolute url for open graph image 2020-11-28 20:26:37 +01:00			`from django.conf import settings`
Refactor articles model 2021-01-03 21:51:46 +01:00			`from markdown.extensions.codehilite import CodeHiliteExtension`
Add links to titles 2021-12-28 12:40:42 +01:00			`from markdown.extensions.toc import TocExtension`
Refactor articles model 2021-01-03 21:51:46 +01:00
			`from articles.markdown import LazyLoadingImageExtension`
Use an absolute url for open graph image 2020-11-28 20:26:37 +01:00

			`def build_full_absolute_url(request, url):`
			`if request:`
			`return request.build_absolute_uri(url)`
			`else:`
Fix build full absolute url when no request 2020-12-27 19:56:50 +01:00			`return (settings.BLOG["base_url"] + url)[::-1].replace("//", "/", 1)[::-1]`
Refactor articles model 2021-01-03 21:51:46 +01:00

Revert "Remove code line numbers in RSS" This reverts commit fc7b14192ce94efe97603fb6db31634eaa3efc9e. 2021-04-15 18:25:53 +02:00			`def format_article_content(content):`
Refactor articles model 2021-01-03 21:51:46 +01:00			`md = markdown.Markdown(`
			`extensions=[`
			`"extra",`
			`"admonition",`
Add links to titles 2021-12-28 12:40:42 +01:00			`TocExtension(anchorlink=True),`
Revert "Add line numbers on code samples" This reverts commit 20b2da9cd09c36da35e470656ad9eea661f66c7a. 2021-04-15 18:25:57 +02:00			`CodeHiliteExtension(linenums=False, guess_lang=False),`
Refactor articles model 2021-01-03 21:51:46 +01:00			`LazyLoadingImageExtension(),`
			`]`
			`)`
			`content = re.sub(r"(\s)#(\w+)", r"\1\#\2", content)`
			`return md.convert(content)`


			`def truncate_words_after_char_count(text, char_count):`
			`total_length = 0`
			`text_result = []`
			`for word in text.split():`
			`if len(word) + 1 + total_length > char_count:`
			`break`
			`text_result.append(word)`
			`total_length += len(word) + 1`
			`return " ".join(text_result) + "..."`
Fix empty description This happened with articles starting with an image. We now fetch the first paragraph containing some text. 2021-12-28 10:52:25 +01:00

			`def find_first_paragraph_with_text(html):`
			`bs = BeautifulSoup(html, "html.parser")`
			`paragraph = bs.find("p", recursive=False)`
			`text = paragraph.text.strip()`
			`while not text:`
			`try:`
			`paragraph = paragraph.next_sibling`
			`text = paragraph.text.strip()`
			`except Exception:`
			`break`
			`return text`