Compare commits

...

11 Commits

Author SHA1 Message Date
Mark Felder
5da53d3471 Remove tests; function is no longer used 2021-06-12 13:07:42 -05:00
Mark Felder
e0a521bbfb Web Push notifications should not embed HTML for preserving newlines, so give it its own filtering 2021-06-12 12:41:12 -05:00
Mark Felder
bb4130d48c Demojify does not decode for us 2021-06-11 19:23:35 -05:00
Mark Felder
45146b0010 Demojify already decodes 2021-06-11 18:06:25 -05:00
Mark Felder
f48f3e8b46 Clarify when we are scrubbing vs when we are filtering so we can keep newlines 2021-06-11 18:03:00 -05:00
Mark Felder
04b5f19a14 More tests for filter_html_and_truncate/1 2021-06-11 16:04:06 -05:00
Mark Felder
bb4ced0eb5 scrub_html_and_truncate/1 -> filter_html_and_truncate/1
They shouldn't share the same name when /1 was used for a different type of incoming data anyway
2021-06-11 15:58:55 -05:00
Mark Felder
07064f73bc Do not rely on cached stripped content. We need the control to preserve the breaks. 2021-06-11 15:35:38 -05:00
Mark Felder
baf7fd2142 Introduce a scrubber that filters only everything except breaks
Add more tests for scrub_html_and_truncate/2
2021-06-11 14:35:05 -05:00
Mark Felder
65137044c1 strip_html/1 is only called for titles of RSS feeds now, so move it 2021-06-11 13:59:32 -05:00
Mark Felder
2ad52086b8 Fix grouping of tests and validate truncation is working 2021-06-11 13:15:29 -05:00
8 changed files with 131 additions and 46 deletions

View File

@ -7,6 +7,7 @@ defmodule Pleroma.Web.Feed.FeedView do
use Pleroma.Web, :view use Pleroma.Web, :view
alias Pleroma.Formatter alias Pleroma.Formatter
alias Pleroma.HTML
alias Pleroma.Object alias Pleroma.Object
alias Pleroma.User alias Pleroma.User
alias Pleroma.Web.MediaProxy alias Pleroma.Web.MediaProxy
@ -70,7 +71,9 @@ defmodule Pleroma.Web.Feed.FeedView do
def activity_title(%{"content" => content}, opts \\ %{}) do def activity_title(%{"content" => content}, opts \\ %{}) do
content content
|> Pleroma.Web.Metadata.Utils.scrub_html() |> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, " ")
|> HTML.strip_tags()
|> Pleroma.Emoji.Formatter.demojify() |> Pleroma.Emoji.Formatter.demojify()
|> Formatter.truncate(opts[:max_length], opts[:omission]) |> Formatter.truncate(opts[:max_length], opts[:omission])
|> escape() |> escape()

View File

@ -19,7 +19,7 @@ defmodule Pleroma.Web.Metadata.Providers.OpenGraph do
user: user user: user
}) do }) do
attachments = build_attachments(object) attachments = build_attachments(object)
scrubbed_content = Utils.scrub_html_and_truncate(object) filtered_content = Utils.filter_html_and_truncate(object)
[ [
{:meta, {:meta,
@ -31,7 +31,7 @@ defmodule Pleroma.Web.Metadata.Providers.OpenGraph do
{:meta, {:meta,
[ [
property: "og:description", property: "og:description",
content: scrubbed_content content: filtered_content
], []}, ], []},
{:meta, [property: "og:type", content: "article"], []} {:meta, [property: "og:type", content: "article"], []}
] ++ ] ++
@ -49,7 +49,7 @@ defmodule Pleroma.Web.Metadata.Providers.OpenGraph do
@impl Provider @impl Provider
def build_tags(%{user: user}) do def build_tags(%{user: user}) do
with truncated_bio = Utils.scrub_html_and_truncate(user.bio) do with truncated_bio = Utils.filter_html_and_truncate(user.bio) do
[ [
{:meta, {:meta,
[ [

View File

@ -16,11 +16,11 @@ defmodule Pleroma.Web.Metadata.Providers.TwitterCard do
@impl Provider @impl Provider
def build_tags(%{activity_id: id, object: object, user: user}) do def build_tags(%{activity_id: id, object: object, user: user}) do
attachments = build_attachments(id, object) attachments = build_attachments(id, object)
scrubbed_content = Utils.scrub_html_and_truncate(object) filtered_content = Utils.filter_html_and_truncate(object)
[ [
title_tag(user), title_tag(user),
{:meta, [property: "twitter:description", content: scrubbed_content], []} {:meta, [property: "twitter:description", content: filtered_content], []}
] ++ ] ++
if attachments == [] or Metadata.activity_nsfw?(object) do if attachments == [] or Metadata.activity_nsfw?(object) do
[ [
@ -34,7 +34,7 @@ defmodule Pleroma.Web.Metadata.Providers.TwitterCard do
@impl Provider @impl Provider
def build_tags(%{user: user}) do def build_tags(%{user: user}) do
with truncated_bio = Utils.scrub_html_and_truncate(user.bio) do with truncated_bio = Utils.filter_html_and_truncate(user.bio) do
[ [
title_tag(user), title_tag(user),
{:meta, [property: "twitter:description", content: truncated_bio], []}, {:meta, [property: "twitter:description", content: truncated_bio], []},

View File

@ -3,39 +3,15 @@
# SPDX-License-Identifier: AGPL-3.0-only # SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.Metadata.Utils do defmodule Pleroma.Web.Metadata.Utils do
alias Pleroma.Activity
alias Pleroma.Emoji alias Pleroma.Emoji
alias Pleroma.Formatter alias Pleroma.Formatter
alias Pleroma.HTML alias Pleroma.HTML
def scrub_html_and_truncate(%{data: %{"content" => content}} = object) do def filter_html_and_truncate(%{data: %{"content" => content}} = _object),
content do: do_filter_html_and_truncate(content)
# html content comes from DB already encoded, decode first and scrub after
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, " ")
|> Activity.HTML.get_cached_stripped_html_for_activity(object, "metadata")
|> Emoji.Formatter.demojify()
|> HtmlEntities.decode()
|> Formatter.truncate()
end
def scrub_html_and_truncate(content, max_length \\ 200) when is_binary(content) do def filter_html_and_truncate(content, max_length \\ nil),
content do: do_filter_html_and_truncate(content, max_length)
|> scrub_html
|> Emoji.Formatter.demojify()
|> HtmlEntities.decode()
|> Formatter.truncate(max_length)
end
def scrub_html(content) when is_binary(content) do
content
# html content comes from DB already encoded, decode first and scrub after
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, " ")
|> HTML.strip_tags()
end
def scrub_html(content), do: content
def user_name_string(user) do def user_name_string(user) do
"#{user.name} " <> "#{user.name} " <>
@ -52,4 +28,15 @@ defmodule Pleroma.Web.Metadata.Utils do
String.starts_with?(media_type, support_type) String.starts_with?(media_type, support_type)
end) end)
end end
defp do_filter_html_and_truncate(content, max_length \\ 200) when is_binary(content) do
# html content comes from DB already encoded
content
|> HtmlEntities.decode()
|> Emoji.Formatter.demojify()
|> HTML.filter_tags(Pleroma.HTML.Scrubber.BreaksOnly)
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, "&#10;&#13;")
|> Formatter.truncate(max_length)
end
end end

View File

@ -6,11 +6,13 @@ defmodule Pleroma.Web.Push.Impl do
@moduledoc "The module represents implementation push web notification" @moduledoc "The module represents implementation push web notification"
alias Pleroma.Activity alias Pleroma.Activity
alias Pleroma.Emoji
alias Pleroma.Formatter
alias Pleroma.HTML
alias Pleroma.Notification alias Pleroma.Notification
alias Pleroma.Object alias Pleroma.Object
alias Pleroma.Repo alias Pleroma.Repo
alias Pleroma.User alias Pleroma.User
alias Pleroma.Web.Metadata.Utils
alias Pleroma.Web.Push.Subscription alias Pleroma.Web.Push.Subscription
require Logger require Logger
@ -127,7 +129,7 @@ defmodule Pleroma.Web.Push.Impl do
def format_body(_activity, actor, %{data: %{"type" => "ChatMessage", "content" => content}}, _) do def format_body(_activity, actor, %{data: %{"type" => "ChatMessage", "content" => content}}, _) do
case content do case content do
nil -> "@#{actor.nickname}: (Attachment)" nil -> "@#{actor.nickname}: (Attachment)"
content -> "@#{actor.nickname}: #{Utils.scrub_html_and_truncate(content, 80)}" content -> "@#{actor.nickname}: #{filter_html_and_truncate(content, 80)}"
end end
end end
@ -137,7 +139,7 @@ defmodule Pleroma.Web.Push.Impl do
%{data: %{"content" => content}}, %{data: %{"content" => content}},
_mastodon_type _mastodon_type
) do ) do
"@#{actor.nickname}: #{Utils.scrub_html_and_truncate(content, 80)}" "@#{actor.nickname}: #{filter_html_and_truncate(content, 80)}"
end end
def format_body( def format_body(
@ -146,7 +148,7 @@ defmodule Pleroma.Web.Push.Impl do
%{data: %{"content" => content}}, %{data: %{"content" => content}},
_mastodon_type _mastodon_type
) do ) do
"@#{actor.nickname} repeated: #{Utils.scrub_html_and_truncate(content, 80)}" "@#{actor.nickname} repeated: #{filter_html_and_truncate(content, 80)}"
end end
def format_body( def format_body(
@ -192,4 +194,15 @@ defmodule Pleroma.Web.Push.Impl do
type -> "New #{String.capitalize(type || "event")}" type -> "New #{String.capitalize(type || "event")}"
end end
end end
defp filter_html_and_truncate(content, max_length) when is_binary(content) do
# html content comes from DB already encoded
content
|> HtmlEntities.decode()
|> Emoji.Formatter.demojify()
|> HTML.filter_tags(Pleroma.HTML.Scrubber.BreaksOnly)
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, "\r\n")
|> Formatter.truncate(max_length)
end
end end

View File

@ -0,0 +1,15 @@
defmodule Pleroma.HTML.Scrubber.BreaksOnly do
@moduledoc """
An HTML scrubbing policy which limits to linebreaks only.
"""
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments()
# linebreaks only
Meta.allow_tag_with_these_attributes(:br, [])
Meta.strip_everything_not_covered()
end

View File

@ -7,8 +7,8 @@ defmodule Pleroma.Web.Metadata.UtilsTest do
import Pleroma.Factory import Pleroma.Factory
alias Pleroma.Web.Metadata.Utils alias Pleroma.Web.Metadata.Utils
describe "scrub_html_and_truncate/1" do describe "filter_html_and_truncate/1" do
test "it returns text without encode HTML" do test "it returns text without encoded HTML entities" do
user = insert(:user) user = insert(:user)
note = note =
@ -20,13 +20,55 @@ defmodule Pleroma.Web.Metadata.UtilsTest do
} }
}) })
assert Utils.scrub_html_and_truncate(note) == "Pleroma's really cool!" assert Utils.filter_html_and_truncate(note) == "Pleroma's really cool!"
end end
end
describe "scrub_html_and_truncate/2" do test "it replaces <br> with compatible HTML entity (meta tags, push notifications)" do
test "it returns text without encode HTML" do user = insert(:user)
assert Utils.scrub_html_and_truncate("Pleroma's really cool!") == "Pleroma's really cool!"
note =
insert(:note, %{
data: %{
"actor" => user.ap_id,
"id" => "https://pleroma.gov/objects/whatever",
"content" => "First line<br>Second line"
}
})
assert Utils.filter_html_and_truncate(note) ==
"First line&#10;&#13;Second line"
end
test "it strips emojis" do
user = insert(:user)
note =
insert(:note, %{
data: %{
"actor" => user.ap_id,
"id" => "https://pleroma.gov/objects/whatever",
"content" => "Mozilla Firefox :firefox:"
}
})
assert Utils.filter_html_and_truncate(note) ==
"Mozilla Firefox"
end
test "it strips HTML tags and other entities" do
user = insert(:user)
note =
insert(:note, %{
data: %{
"actor" => user.ap_id,
"id" => "https://pleroma.gov/objects/whatever",
"content" => "<title>my title</title> <p>and a paragraph&#33;</p>"
}
})
assert Utils.filter_html_and_truncate(note) ==
"my title and a paragraph!"
end end
end end
end end

View File

@ -359,4 +359,29 @@ defmodule Pleroma.Web.Push.ImplTest do
} }
end end
end end
test "body for create activity handles newlines" do
user = insert(:user, nickname: "bob")
_user2 = insert(:user, nickname: "alice")
{:ok, activity} =
CommonAPI.post(user, %{
status: """
@alice Line one
Line two
Line three
"""
})
object = Object.normalize(activity, fetch: false)
assert Impl.format_body(
%{
activity: activity
},
user,
object
) ==
"@bob: @alice Line one\r\nLine two\r\nLine three"
end
end end