diff --git a/lib/pleroma/web/metadata/utils.ex b/lib/pleroma/web/metadata/utils.ex
index 3c0dfb6ad..a519fbf01 100644
--- a/lib/pleroma/web/metadata/utils.ex
+++ b/lib/pleroma/web/metadata/utils.ex
@@ -6,6 +6,7 @@ defmodule Pleroma.Web.Metadata.Utils do
alias Pleroma.Activity
alias Pleroma.Emoji
alias Pleroma.Formatter
+ alias Pleroma.HTML
def scrub_html_and_truncate(%{data: %{"content" => content}} = object) do
content
@@ -21,8 +22,9 @@ defmodule Pleroma.Web.Metadata.Utils do
def scrub_html_and_truncate(content, max_length \\ 200) when is_binary(content) do
content
|> Emoji.Formatter.demojify()
+ |> HTML.filter_tags(Pleroma.HTML.Scrubber.BreaksOnly)
|> HtmlEntities.decode()
- |> String.replace(~r/
/, " ")
+ |> String.replace(~r/
/, "
")
|> Formatter.truncate(max_length)
end
diff --git a/priv/scrubbers/breaks_only.ex b/priv/scrubbers/breaks_only.ex
new file mode 100644
index 000000000..f952c7224
--- /dev/null
+++ b/priv/scrubbers/breaks_only.ex
@@ -0,0 +1,15 @@
+defmodule Pleroma.HTML.Scrubber.BreaksOnly do
+ @moduledoc """
+ An HTML scrubbing policy which limits to linebreaks only.
+ """
+
+ require FastSanitize.Sanitizer.Meta
+ alias FastSanitize.Sanitizer.Meta
+
+ Meta.strip_comments()
+
+ # linebreaks only
+ Meta.allow_tag_with_these_attributes(:br, [])
+
+ Meta.strip_everything_not_covered()
+end
diff --git a/test/pleroma/web/metadata/utils_test.exs b/test/pleroma/web/metadata/utils_test.exs
index 8cdfb8ecc..b5d3e3566 100644
--- a/test/pleroma/web/metadata/utils_test.exs
+++ b/test/pleroma/web/metadata/utils_test.exs
@@ -30,5 +30,21 @@ defmodule Pleroma.Web.Metadata.UtilsTest do
test "it truncates to specified chars (binaries)" do
assert Utils.scrub_html_and_truncate("Pleroma's really cool!", 10) == "Pleroma..."
end
+
+ # push notifications and link previews should be able to display newlines
+ test "it replaces
with compatible HTML entity (binaries)" do
+ assert Utils.scrub_html_and_truncate("First line
Second line") ==
+ "First line
Second line"
+ end
+
+ test "it strips emojis (binaries)" do
+ assert Utils.scrub_html_and_truncate(
+ "Open the door get on the floor everybody walk the dinosaur :dinosaur:"
+ ) == "Open the door get on the floor everybody walk the dinosaur"
+ end
+
+ test "it strips HTML tags and other entities (binaries)" do
+ assert Utils.scrub_html_and_truncate("
and a paragraph!
") == "my title and a paragraph!" + end end end