Do not rely on cached stripped content. We need the control to preserve the breaks.

This commit is contained in:
Mark Felder 2021-06-11 15:35:38 -05:00
parent baf7fd2142
commit 07064f73bc
2 changed files with 21 additions and 6 deletions

View File

@ -3,19 +3,17 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.Metadata.Utils do
alias Pleroma.Activity
alias Pleroma.Emoji
alias Pleroma.Formatter
alias Pleroma.HTML
def scrub_html_and_truncate(%{data: %{"content" => content}} = object) do
def scrub_html_and_truncate(%{data: %{"content" => content}} = _object) do
content
# html content comes from DB already encoded, decode first and scrub after
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, " ")
|> Activity.HTML.get_cached_stripped_html_for_activity(object, "metadata")
|> Emoji.Formatter.demojify()
|> HTML.filter_tags(Pleroma.HTML.Scrubber.BreaksOnly)
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, "&#10;&#13;")
|> Formatter.truncate()
end

View File

@ -23,6 +23,22 @@ defmodule Pleroma.Web.Metadata.UtilsTest do
assert Utils.scrub_html_and_truncate(note) == "Pleroma's really cool!"
end
test "it replaces <br> with compatible HTML entity (objects)" do
user = insert(:user)
note =
insert(:note, %{
data: %{
"actor" => user.ap_id,
"id" => "https://pleroma.gov/objects/whatever",
"content" => "First line<br>Second line"
}
})
assert Utils.scrub_html_and_truncate(note) ==
"First line&#10;&#13;Second line"
end
test "it returns text without encode HTML (binaries)" do
assert Utils.scrub_html_and_truncate("Pleroma's really cool!") == "Pleroma's really cool!"
end
@ -44,7 +60,8 @@ defmodule Pleroma.Web.Metadata.UtilsTest do
end
test "it strips HTML tags and other entities (binaries)" do
assert Utils.scrub_html_and_truncate("<title>my title</title> <p>and a paragraph&#33;</p>") == "my title and a paragraph!"
assert Utils.scrub_html_and_truncate("<title>my title</title> <p>and a paragraph&#33;</p>") ==
"my title and a paragraph!"
end
end
end