diff --git a/lib/pleroma/web/metadata/utils.ex b/lib/pleroma/web/metadata/utils.ex index 3c0dfb6ad..a519fbf01 100644 --- a/lib/pleroma/web/metadata/utils.ex +++ b/lib/pleroma/web/metadata/utils.ex @@ -6,6 +6,7 @@ defmodule Pleroma.Web.Metadata.Utils do alias Pleroma.Activity alias Pleroma.Emoji alias Pleroma.Formatter + alias Pleroma.HTML def scrub_html_and_truncate(%{data: %{"content" => content}} = object) do content @@ -21,8 +22,9 @@ defmodule Pleroma.Web.Metadata.Utils do def scrub_html_and_truncate(content, max_length \\ 200) when is_binary(content) do content |> Emoji.Formatter.demojify() + |> HTML.filter_tags(Pleroma.HTML.Scrubber.BreaksOnly) |> HtmlEntities.decode() - |> String.replace(~r//, " ") + |> String.replace(~r//, " ") |> Formatter.truncate(max_length) end diff --git a/priv/scrubbers/breaks_only.ex b/priv/scrubbers/breaks_only.ex new file mode 100644 index 000000000..f952c7224 --- /dev/null +++ b/priv/scrubbers/breaks_only.ex @@ -0,0 +1,15 @@ +defmodule Pleroma.HTML.Scrubber.BreaksOnly do + @moduledoc """ + An HTML scrubbing policy which limits to linebreaks only. + """ + + require FastSanitize.Sanitizer.Meta + alias FastSanitize.Sanitizer.Meta + + Meta.strip_comments() + + # linebreaks only + Meta.allow_tag_with_these_attributes(:br, []) + + Meta.strip_everything_not_covered() +end diff --git a/test/pleroma/web/metadata/utils_test.exs b/test/pleroma/web/metadata/utils_test.exs index 8cdfb8ecc..b5d3e3566 100644 --- a/test/pleroma/web/metadata/utils_test.exs +++ b/test/pleroma/web/metadata/utils_test.exs @@ -30,5 +30,21 @@ defmodule Pleroma.Web.Metadata.UtilsTest do test "it truncates to specified chars (binaries)" do assert Utils.scrub_html_and_truncate("Pleroma's really cool!", 10) == "Pleroma..." end + + # push notifications and link previews should be able to display newlines + test "it replaces
with compatible HTML entity (binaries)" do + assert Utils.scrub_html_and_truncate("First line
Second line") == + "First line Second line" + end + + test "it strips emojis (binaries)" do + assert Utils.scrub_html_and_truncate( + "Open the door get on the floor everybody walk the dinosaur :dinosaur:" + ) == "Open the door get on the floor everybody walk the dinosaur" + end + + test "it strips HTML tags and other entities (binaries)" do + assert Utils.scrub_html_and_truncate("my title

and a paragraph!

") == "my title and a paragraph!" + end end end