♻ Use the unicode paragraph separator instead of \n\n

This should reduce misfire of the second replace and allow for a more correct count of characters to avoid elipsing too much of the content.
tibonihoo · Jun 22, 2024 · 32b1ec3 · 32b1ec3
1 parent 08655db
commit 32b1ec3
Showing 1 changed file with 4 additions and 2 deletions.
diff --git a/wom_tributary/utils/tweet_summarizers.py b/wom_tributary/utils/tweet_summarizers.py
@@ -74,12 +74,14 @@ def from_activity_item(item, link_builder):
 
 
 def build_content_excerpt(content_unicode):
-  content_unicode = LINE_BREAK_REGEX.sub("\n\n", content_unicode)
+  unicode_paragraph = "\u2029"
+  content_unicode = LINE_BREAK_REGEX.sub(unicode_paragraph, content_unicode)
   content_unicode = html.unescape(strip_tags(content_unicode)).strip()
   excerpt = content_unicode[:MAX_CONTENT_SIZE_CHARS].strip()
   if len(excerpt) < len(content_unicode):
       excerpt += "(...)"
-  return excerpt.replace("\n\n", "<br>")
+  return excerpt.replace(unicode_paragraph, "<br>")
+
 
 def build_tweet_index_by_tag(data, keep_only_after_datetime, link_builder):
   reverse_index = defaultdict(list)