Clarify when we are scrubbing vs when we are filtering so we can keep newlines

This commit is contained in:
Mark Felder 2021-06-11 18:03:00 -05:00
parent 04b5f19a14
commit f48f3e8b46
3 changed files with 37 additions and 24 deletions

View File

@ -19,7 +19,7 @@ defmodule Pleroma.Web.Metadata.Providers.OpenGraph do
user: user user: user
}) do }) do
attachments = build_attachments(object) attachments = build_attachments(object)
scrubbed_content = Utils.scrub_html_and_truncate(object) filtered_content = Utils.filter_html_and_truncate(object)
[ [
{:meta, {:meta,
@ -31,7 +31,7 @@ defmodule Pleroma.Web.Metadata.Providers.OpenGraph do
{:meta, {:meta,
[ [
property: "og:description", property: "og:description",
content: scrubbed_content content: filtered_content
], []}, ], []},
{:meta, [property: "og:type", content: "article"], []} {:meta, [property: "og:type", content: "article"], []}
] ++ ] ++
@ -49,7 +49,7 @@ defmodule Pleroma.Web.Metadata.Providers.OpenGraph do
@impl Provider @impl Provider
def build_tags(%{user: user}) do def build_tags(%{user: user}) do
with truncated_bio = Utils.scrub_html_and_truncate(user.bio) do with truncated_bio = Utils.filter_html_and_truncate(user.bio) do
[ [
{:meta, {:meta,
[ [

View File

@ -16,11 +16,11 @@ defmodule Pleroma.Web.Metadata.Providers.TwitterCard do
@impl Provider @impl Provider
def build_tags(%{activity_id: id, object: object, user: user}) do def build_tags(%{activity_id: id, object: object, user: user}) do
attachments = build_attachments(id, object) attachments = build_attachments(id, object)
scrubbed_content = Utils.scrub_html_and_truncate(object) filtered_content = Utils.filter_html_and_truncate(object)
[ [
title_tag(user), title_tag(user),
{:meta, [property: "twitter:description", content: scrubbed_content], []} {:meta, [property: "twitter:description", content: filtered_content], []}
] ++ ] ++
if attachments == [] or Metadata.activity_nsfw?(object) do if attachments == [] or Metadata.activity_nsfw?(object) do
[ [
@ -34,7 +34,7 @@ defmodule Pleroma.Web.Metadata.Providers.TwitterCard do
@impl Provider @impl Provider
def build_tags(%{user: user}) do def build_tags(%{user: user}) do
with truncated_bio = Utils.scrub_html_and_truncate(user.bio) do with truncated_bio = Utils.filter_html_and_truncate(user.bio) do
[ [
title_tag(user), title_tag(user),
{:meta, [property: "twitter:description", content: truncated_bio], []}, {:meta, [property: "twitter:description", content: truncated_bio], []},

View File

@ -7,25 +7,17 @@ defmodule Pleroma.Web.Metadata.Utils do
alias Pleroma.Formatter alias Pleroma.Formatter
alias Pleroma.HTML alias Pleroma.HTML
def filter_html_and_truncate(%{data: %{"content" => content}} = _object) do def filter_html_and_truncate(%{data: %{"content" => content}} = _object),
content do: do_filter_html_and_truncate(content)
# html content comes from DB already encoded, decode first and scrub after
|> Emoji.Formatter.demojify()
|> HTML.filter_tags(Pleroma.HTML.Scrubber.BreaksOnly)
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, "&#10;&#13;")
|> Formatter.truncate()
end
def scrub_html_and_truncate(content, max_length \\ 200) when is_binary(content) do def filter_html_and_truncate(content, max_length \\ nil),
content do: do_filter_html_and_truncate(content, max_length)
|> Emoji.Formatter.demojify()
|> HtmlEntities.decode() def scrub_html_and_truncate(%{data: %{"content" => content}} = _object),
|> String.replace(~r/<br\s?\/?>/, " ") do: do_scrub_html_and_truncate(content)
|> HTML.strip_tags()
|> HtmlEntities.decode() def scrub_html_and_truncate(content, max_length \\ nil),
|> Formatter.truncate(max_length) do: do_scrub_html_and_truncate(content, max_length)
end
def user_name_string(user) do def user_name_string(user) do
"#{user.name} " <> "#{user.name} " <>
@ -42,4 +34,25 @@ defmodule Pleroma.Web.Metadata.Utils do
String.starts_with?(media_type, support_type) String.starts_with?(media_type, support_type)
end) end)
end end
defp do_filter_html_and_truncate(content, max_length \\ 200) when is_binary(content) do
# html content comes from DB already encoded, but demojify decodes for us
content
|> Emoji.Formatter.demojify()
|> HTML.filter_tags(Pleroma.HTML.Scrubber.BreaksOnly)
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, "&#10;&#13;")
|> Formatter.truncate(max_length)
end
defp do_scrub_html_and_truncate(content, max_length \\ 200) when is_binary(content) do
# html content comes from DB already encoded, but demojify decodes for us
content
|> Emoji.Formatter.demojify()
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, " ")
|> HTML.strip_tags()
|> HtmlEntities.decode()
|> Formatter.truncate(max_length)
end
end end