Clarify when we are scrubbing vs when we are filtering so we can keep newlines

This commit is contained in:
Mark Felder 2021-06-11 18:03:00 -05:00
parent 04b5f19a14
commit f48f3e8b46
3 changed files with 37 additions and 24 deletions

View File

@ -19,7 +19,7 @@ defmodule Pleroma.Web.Metadata.Providers.OpenGraph do
user: user
}) do
attachments = build_attachments(object)
scrubbed_content = Utils.scrub_html_and_truncate(object)
filtered_content = Utils.filter_html_and_truncate(object)
[
{:meta,
@ -31,7 +31,7 @@ defmodule Pleroma.Web.Metadata.Providers.OpenGraph do
{:meta,
[
property: "og:description",
content: scrubbed_content
content: filtered_content
], []},
{:meta, [property: "og:type", content: "article"], []}
] ++
@ -49,7 +49,7 @@ defmodule Pleroma.Web.Metadata.Providers.OpenGraph do
@impl Provider
def build_tags(%{user: user}) do
with truncated_bio = Utils.scrub_html_and_truncate(user.bio) do
with truncated_bio = Utils.filter_html_and_truncate(user.bio) do
[
{:meta,
[

View File

@ -16,11 +16,11 @@ defmodule Pleroma.Web.Metadata.Providers.TwitterCard do
@impl Provider
def build_tags(%{activity_id: id, object: object, user: user}) do
attachments = build_attachments(id, object)
scrubbed_content = Utils.scrub_html_and_truncate(object)
filtered_content = Utils.filter_html_and_truncate(object)
[
title_tag(user),
{:meta, [property: "twitter:description", content: scrubbed_content], []}
{:meta, [property: "twitter:description", content: filtered_content], []}
] ++
if attachments == [] or Metadata.activity_nsfw?(object) do
[
@ -34,7 +34,7 @@ defmodule Pleroma.Web.Metadata.Providers.TwitterCard do
@impl Provider
def build_tags(%{user: user}) do
with truncated_bio = Utils.scrub_html_and_truncate(user.bio) do
with truncated_bio = Utils.filter_html_and_truncate(user.bio) do
[
title_tag(user),
{:meta, [property: "twitter:description", content: truncated_bio], []},

View File

@ -7,25 +7,17 @@ defmodule Pleroma.Web.Metadata.Utils do
alias Pleroma.Formatter
alias Pleroma.HTML
def filter_html_and_truncate(%{data: %{"content" => content}} = _object) do
content
# html content comes from DB already encoded, decode first and scrub after
|> Emoji.Formatter.demojify()
|> HTML.filter_tags(Pleroma.HTML.Scrubber.BreaksOnly)
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, "&#10;&#13;")
|> Formatter.truncate()
end
def filter_html_and_truncate(%{data: %{"content" => content}} = _object),
do: do_filter_html_and_truncate(content)
def scrub_html_and_truncate(content, max_length \\ 200) when is_binary(content) do
content
|> Emoji.Formatter.demojify()
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, " ")
|> HTML.strip_tags()
|> HtmlEntities.decode()
|> Formatter.truncate(max_length)
end
def filter_html_and_truncate(content, max_length \\ nil),
do: do_filter_html_and_truncate(content, max_length)
def scrub_html_and_truncate(%{data: %{"content" => content}} = _object),
do: do_scrub_html_and_truncate(content)
def scrub_html_and_truncate(content, max_length \\ nil),
do: do_scrub_html_and_truncate(content, max_length)
def user_name_string(user) do
"#{user.name} " <>
@ -42,4 +34,25 @@ defmodule Pleroma.Web.Metadata.Utils do
String.starts_with?(media_type, support_type)
end)
end
defp do_filter_html_and_truncate(content, max_length \\ 200) when is_binary(content) do
# html content comes from DB already encoded, but demojify decodes for us
content
|> Emoji.Formatter.demojify()
|> HTML.filter_tags(Pleroma.HTML.Scrubber.BreaksOnly)
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, "&#10;&#13;")
|> Formatter.truncate(max_length)
end
defp do_scrub_html_and_truncate(content, max_length \\ 200) when is_binary(content) do
# html content comes from DB already encoded, but demojify decodes for us
content
|> Emoji.Formatter.demojify()
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, " ")
|> HTML.strip_tags()
|> HtmlEntities.decode()
|> Formatter.truncate(max_length)
end
end