Compare commits

...

11 Commits

Author SHA1 Message Date
Mark Felder
5da53d3471 Remove tests; function is no longer used 2021-06-12 13:07:42 -05:00
Mark Felder
e0a521bbfb Web Push notifications should not embed HTML for preserving newlines, so give it its own filtering 2021-06-12 12:41:12 -05:00
Mark Felder
bb4130d48c Demojify does not decode for us 2021-06-11 19:23:35 -05:00
Mark Felder
45146b0010 Demojify already decodes 2021-06-11 18:06:25 -05:00
Mark Felder
f48f3e8b46 Clarify when we are scrubbing vs when we are filtering so we can keep newlines 2021-06-11 18:03:00 -05:00
Mark Felder
04b5f19a14 More tests for filter_html_and_truncate/1 2021-06-11 16:04:06 -05:00
Mark Felder
bb4ced0eb5 scrub_html_and_truncate/1 -> filter_html_and_truncate/1
They shouldn't share the same name when /1 was used for a different type of incoming data anyway
2021-06-11 15:58:55 -05:00
Mark Felder
07064f73bc Do not rely on cached stripped content. We need the control to preserve the breaks. 2021-06-11 15:35:38 -05:00
Mark Felder
baf7fd2142 Introduce a scrubber that filters only everything except breaks
Add more tests for scrub_html_and_truncate/2
2021-06-11 14:35:05 -05:00
Mark Felder
65137044c1 strip_html/1 is only called for titles of RSS feeds now, so move it 2021-06-11 13:59:32 -05:00
Mark Felder
2ad52086b8 Fix grouping of tests and validate truncation is working 2021-06-11 13:15:29 -05:00
8 changed files with 131 additions and 46 deletions

View File

@ -7,6 +7,7 @@ defmodule Pleroma.Web.Feed.FeedView do
use Pleroma.Web, :view
alias Pleroma.Formatter
alias Pleroma.HTML
alias Pleroma.Object
alias Pleroma.User
alias Pleroma.Web.MediaProxy
@ -70,7 +71,9 @@ defmodule Pleroma.Web.Feed.FeedView do
def activity_title(%{"content" => content}, opts \\ %{}) do
content
|> Pleroma.Web.Metadata.Utils.scrub_html()
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, " ")
|> HTML.strip_tags()
|> Pleroma.Emoji.Formatter.demojify()
|> Formatter.truncate(opts[:max_length], opts[:omission])
|> escape()

View File

@ -19,7 +19,7 @@ defmodule Pleroma.Web.Metadata.Providers.OpenGraph do
user: user
}) do
attachments = build_attachments(object)
scrubbed_content = Utils.scrub_html_and_truncate(object)
filtered_content = Utils.filter_html_and_truncate(object)
[
{:meta,
@ -31,7 +31,7 @@ defmodule Pleroma.Web.Metadata.Providers.OpenGraph do
{:meta,
[
property: "og:description",
content: scrubbed_content
content: filtered_content
], []},
{:meta, [property: "og:type", content: "article"], []}
] ++
@ -49,7 +49,7 @@ defmodule Pleroma.Web.Metadata.Providers.OpenGraph do
@impl Provider
def build_tags(%{user: user}) do
with truncated_bio = Utils.scrub_html_and_truncate(user.bio) do
with truncated_bio = Utils.filter_html_and_truncate(user.bio) do
[
{:meta,
[

View File

@ -16,11 +16,11 @@ defmodule Pleroma.Web.Metadata.Providers.TwitterCard do
@impl Provider
def build_tags(%{activity_id: id, object: object, user: user}) do
attachments = build_attachments(id, object)
scrubbed_content = Utils.scrub_html_and_truncate(object)
filtered_content = Utils.filter_html_and_truncate(object)
[
title_tag(user),
{:meta, [property: "twitter:description", content: scrubbed_content], []}
{:meta, [property: "twitter:description", content: filtered_content], []}
] ++
if attachments == [] or Metadata.activity_nsfw?(object) do
[
@ -34,7 +34,7 @@ defmodule Pleroma.Web.Metadata.Providers.TwitterCard do
@impl Provider
def build_tags(%{user: user}) do
with truncated_bio = Utils.scrub_html_and_truncate(user.bio) do
with truncated_bio = Utils.filter_html_and_truncate(user.bio) do
[
title_tag(user),
{:meta, [property: "twitter:description", content: truncated_bio], []},

View File

@ -3,39 +3,15 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.Metadata.Utils do
alias Pleroma.Activity
alias Pleroma.Emoji
alias Pleroma.Formatter
alias Pleroma.HTML
def scrub_html_and_truncate(%{data: %{"content" => content}} = object) do
content
# html content comes from DB already encoded, decode first and scrub after
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, " ")
|> Activity.HTML.get_cached_stripped_html_for_activity(object, "metadata")
|> Emoji.Formatter.demojify()
|> HtmlEntities.decode()
|> Formatter.truncate()
end
def filter_html_and_truncate(%{data: %{"content" => content}} = _object),
do: do_filter_html_and_truncate(content)
def scrub_html_and_truncate(content, max_length \\ 200) when is_binary(content) do
content
|> scrub_html
|> Emoji.Formatter.demojify()
|> HtmlEntities.decode()
|> Formatter.truncate(max_length)
end
def scrub_html(content) when is_binary(content) do
content
# html content comes from DB already encoded, decode first and scrub after
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, " ")
|> HTML.strip_tags()
end
def scrub_html(content), do: content
def filter_html_and_truncate(content, max_length \\ nil),
do: do_filter_html_and_truncate(content, max_length)
def user_name_string(user) do
"#{user.name} " <>
@ -52,4 +28,15 @@ defmodule Pleroma.Web.Metadata.Utils do
String.starts_with?(media_type, support_type)
end)
end
defp do_filter_html_and_truncate(content, max_length \\ 200) when is_binary(content) do
# html content comes from DB already encoded
content
|> HtmlEntities.decode()
|> Emoji.Formatter.demojify()
|> HTML.filter_tags(Pleroma.HTML.Scrubber.BreaksOnly)
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, "&#10;&#13;")
|> Formatter.truncate(max_length)
end
end

View File

@ -6,11 +6,13 @@ defmodule Pleroma.Web.Push.Impl do
@moduledoc "The module represents implementation push web notification"
alias Pleroma.Activity
alias Pleroma.Emoji
alias Pleroma.Formatter
alias Pleroma.HTML
alias Pleroma.Notification
alias Pleroma.Object
alias Pleroma.Repo
alias Pleroma.User
alias Pleroma.Web.Metadata.Utils
alias Pleroma.Web.Push.Subscription
require Logger
@ -127,7 +129,7 @@ defmodule Pleroma.Web.Push.Impl do
def format_body(_activity, actor, %{data: %{"type" => "ChatMessage", "content" => content}}, _) do
case content do
nil -> "@#{actor.nickname}: (Attachment)"
content -> "@#{actor.nickname}: #{Utils.scrub_html_and_truncate(content, 80)}"
content -> "@#{actor.nickname}: #{filter_html_and_truncate(content, 80)}"
end
end
@ -137,7 +139,7 @@ defmodule Pleroma.Web.Push.Impl do
%{data: %{"content" => content}},
_mastodon_type
) do
"@#{actor.nickname}: #{Utils.scrub_html_and_truncate(content, 80)}"
"@#{actor.nickname}: #{filter_html_and_truncate(content, 80)}"
end
def format_body(
@ -146,7 +148,7 @@ defmodule Pleroma.Web.Push.Impl do
%{data: %{"content" => content}},
_mastodon_type
) do
"@#{actor.nickname} repeated: #{Utils.scrub_html_and_truncate(content, 80)}"
"@#{actor.nickname} repeated: #{filter_html_and_truncate(content, 80)}"
end
def format_body(
@ -192,4 +194,15 @@ defmodule Pleroma.Web.Push.Impl do
type -> "New #{String.capitalize(type || "event")}"
end
end
defp filter_html_and_truncate(content, max_length) when is_binary(content) do
# html content comes from DB already encoded
content
|> HtmlEntities.decode()
|> Emoji.Formatter.demojify()
|> HTML.filter_tags(Pleroma.HTML.Scrubber.BreaksOnly)
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, "\r\n")
|> Formatter.truncate(max_length)
end
end

View File

@ -0,0 +1,15 @@
defmodule Pleroma.HTML.Scrubber.BreaksOnly do
@moduledoc """
An HTML scrubbing policy which limits to linebreaks only.
"""
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments()
# linebreaks only
Meta.allow_tag_with_these_attributes(:br, [])
Meta.strip_everything_not_covered()
end

View File

@ -7,8 +7,8 @@ defmodule Pleroma.Web.Metadata.UtilsTest do
import Pleroma.Factory
alias Pleroma.Web.Metadata.Utils
describe "scrub_html_and_truncate/1" do
test "it returns text without encode HTML" do
describe "filter_html_and_truncate/1" do
test "it returns text without encoded HTML entities" do
user = insert(:user)
note =
@ -20,13 +20,55 @@ defmodule Pleroma.Web.Metadata.UtilsTest do
}
})
assert Utils.scrub_html_and_truncate(note) == "Pleroma's really cool!"
end
assert Utils.filter_html_and_truncate(note) == "Pleroma's really cool!"
end
describe "scrub_html_and_truncate/2" do
test "it returns text without encode HTML" do
assert Utils.scrub_html_and_truncate("Pleroma's really cool!") == "Pleroma's really cool!"
test "it replaces <br> with compatible HTML entity (meta tags, push notifications)" do
user = insert(:user)
note =
insert(:note, %{
data: %{
"actor" => user.ap_id,
"id" => "https://pleroma.gov/objects/whatever",
"content" => "First line<br>Second line"
}
})
assert Utils.filter_html_and_truncate(note) ==
"First line&#10;&#13;Second line"
end
test "it strips emojis" do
user = insert(:user)
note =
insert(:note, %{
data: %{
"actor" => user.ap_id,
"id" => "https://pleroma.gov/objects/whatever",
"content" => "Mozilla Firefox :firefox:"
}
})
assert Utils.filter_html_and_truncate(note) ==
"Mozilla Firefox"
end
test "it strips HTML tags and other entities" do
user = insert(:user)
note =
insert(:note, %{
data: %{
"actor" => user.ap_id,
"id" => "https://pleroma.gov/objects/whatever",
"content" => "<title>my title</title> <p>and a paragraph&#33;</p>"
}
})
assert Utils.filter_html_and_truncate(note) ==
"my title and a paragraph!"
end
end
end

View File

@ -359,4 +359,29 @@ defmodule Pleroma.Web.Push.ImplTest do
}
end
end
test "body for create activity handles newlines" do
user = insert(:user, nickname: "bob")
_user2 = insert(:user, nickname: "alice")
{:ok, activity} =
CommonAPI.post(user, %{
status: """
@alice Line one
Line two
Line three
"""
})
object = Object.normalize(activity, fetch: false)
assert Impl.format_body(
%{
activity: activity
},
user,
object
) ==
"@bob: @alice Line one\r\nLine two\r\nLine three"
end
end