From 32b1ec37e0d1b46fd7460fcea77753bdb773ff1c Mon Sep 17 00:00:00 2001
From: Thibauld Nion <thibauld@tibonihoo.net>
Date: Sat, 22 Jun 2024 21:40:03 +0200
Subject: [PATCH] =?UTF-8?q?=E2=99=BB=20Use=20the=20unicode=20paragraph=20s?=
 =?UTF-8?q?eparator=20instead=20of=20\n\n?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This should reduce misfire of the second replace and allow for a more
correct count of characters to avoid elipsing too much of the content.
---
 wom_tributary/utils/tweet_summarizers.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/wom_tributary/utils/tweet_summarizers.py b/wom_tributary/utils/tweet_summarizers.py
index 078969b..ad01146 100644
--- a/wom_tributary/utils/tweet_summarizers.py
+++ b/wom_tributary/utils/tweet_summarizers.py
@@ -74,12 +74,14 @@ def from_activity_item(item, link_builder):
 
 
 def build_content_excerpt(content_unicode):
-  content_unicode = LINE_BREAK_REGEX.sub("\n\n", content_unicode)
+  unicode_paragraph = "\u2029"
+  content_unicode = LINE_BREAK_REGEX.sub(unicode_paragraph, content_unicode)
   content_unicode = html.unescape(strip_tags(content_unicode)).strip()
   excerpt = content_unicode[:MAX_CONTENT_SIZE_CHARS].strip()
   if len(excerpt) < len(content_unicode):
       excerpt += "(...)"
-  return excerpt.replace("\n\n", "<br>")
+  return excerpt.replace(unicode_paragraph, "<br>")
+
 
 def build_tweet_index_by_tag(data, keep_only_after_datetime, link_builder):
   reverse_index = defaultdict(list)