Introduce a scrubber that filters only everything except breaks
Add more tests for scrub_html_and_truncate/2
This commit is contained in:
parent
65137044c1
commit
baf7fd2142
@ -6,6 +6,7 @@ defmodule Pleroma.Web.Metadata.Utils do
|
||||
alias Pleroma.Activity
|
||||
alias Pleroma.Emoji
|
||||
alias Pleroma.Formatter
|
||||
alias Pleroma.HTML
|
||||
|
||||
def scrub_html_and_truncate(%{data: %{"content" => content}} = object) do
|
||||
content
|
||||
@ -21,8 +22,9 @@ defmodule Pleroma.Web.Metadata.Utils do
|
||||
def scrub_html_and_truncate(content, max_length \\ 200) when is_binary(content) do
|
||||
content
|
||||
|> Emoji.Formatter.demojify()
|
||||
|> HTML.filter_tags(Pleroma.HTML.Scrubber.BreaksOnly)
|
||||
|> HtmlEntities.decode()
|
||||
|> String.replace(~r/<br\s?\/?>/, " ")
|
||||
|> String.replace(~r/<br\s?\/?>/, " ")
|
||||
|> Formatter.truncate(max_length)
|
||||
end
|
||||
|
||||
|
15
priv/scrubbers/breaks_only.ex
Normal file
15
priv/scrubbers/breaks_only.ex
Normal file
@ -0,0 +1,15 @@
|
||||
defmodule Pleroma.HTML.Scrubber.BreaksOnly do
|
||||
@moduledoc """
|
||||
An HTML scrubbing policy which limits to linebreaks only.
|
||||
"""
|
||||
|
||||
require FastSanitize.Sanitizer.Meta
|
||||
alias FastSanitize.Sanitizer.Meta
|
||||
|
||||
Meta.strip_comments()
|
||||
|
||||
# linebreaks only
|
||||
Meta.allow_tag_with_these_attributes(:br, [])
|
||||
|
||||
Meta.strip_everything_not_covered()
|
||||
end
|
@ -30,5 +30,21 @@ defmodule Pleroma.Web.Metadata.UtilsTest do
|
||||
test "it truncates to specified chars (binaries)" do
|
||||
assert Utils.scrub_html_and_truncate("Pleroma's really cool!", 10) == "Pleroma..."
|
||||
end
|
||||
|
||||
# push notifications and link previews should be able to display newlines
|
||||
test "it replaces <br> with compatible HTML entity (binaries)" do
|
||||
assert Utils.scrub_html_and_truncate("First line<br>Second line") ==
|
||||
"First line Second line"
|
||||
end
|
||||
|
||||
test "it strips emojis (binaries)" do
|
||||
assert Utils.scrub_html_and_truncate(
|
||||
"Open the door get on the floor everybody walk the dinosaur :dinosaur:"
|
||||
) == "Open the door get on the floor everybody walk the dinosaur"
|
||||
end
|
||||
|
||||
test "it strips HTML tags and other entities (binaries)" do
|
||||
assert Utils.scrub_html_and_truncate("<title>my title</title> <p>and a paragraph!</p>") == "my title and a paragraph!"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
Loading…
Reference in New Issue
Block a user