From d6c89068f3c6765b7a3ef63199725b7833c34c3a Mon Sep 17 00:00:00 2001 From: rinpatch Date: Sun, 8 Dec 2019 19:42:40 +0300 Subject: [PATCH] HTML: Compile Scrubbers on boot This makes it possible to configure their behavior on OTP releases. --- .formatter.exs | 2 +- CHANGELOG.md | 1 + lib/pleroma/application.ex | 1 + lib/pleroma/html.ex | 232 ++++------------------------------------- priv/scrubbers/default.ex | 93 +++++++++++++++++ priv/scrubbers/links_only.ex | 27 +++++ priv/scrubbers/media_proxy.ex | 32 ++++++ priv/scrubbers/twitter_text.ex | 57 ++++++++++ 8 files changed, 231 insertions(+), 214 deletions(-) create mode 100644 priv/scrubbers/default.ex create mode 100644 priv/scrubbers/links_only.ex create mode 100644 priv/scrubbers/media_proxy.ex create mode 100644 priv/scrubbers/twitter_text.ex diff --git a/.formatter.exs b/.formatter.exs index 7fa95a619..5799ac127 100644 --- a/.formatter.exs +++ b/.formatter.exs @@ -1,3 +1,3 @@ [ - inputs: ["mix.exs", "{config,lib,test}/**/*.{ex,exs}", "priv/repo/migrations/*.exs"] + inputs: ["mix.exs", "{config,lib,test}/**/*.{ex,exs}", "priv/repo/migrations/*.exs", "priv/scrubbers/*.ex"] ] diff --git a/CHANGELOG.md b/CHANGELOG.md index d00097748..3d578caef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -82,6 +82,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Report emails now include functional links to profiles of remote user accounts - Not being able to log in to some third-party apps when logged in to MastoFE - MRF: `Delete` activities being exempt from MRF policies +- OTP releases: Not being able to configure HTML sanitization policy
API Changes diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index 57462740c..5b844aa41 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -30,6 +30,7 @@ defmodule Pleroma.Application do # See http://elixir-lang.org/docs/stable/elixir/Application.html # for more information on OTP Applications def start(_type, _args) do + Pleroma.HTML.compile_scrubbers() Pleroma.Config.DeprecationWarnings.warn() setup_instrumenters() diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index 71c53ce0e..2cae29f35 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -3,6 +3,25 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.HTML do + # Scrubbers are compiled on boot so they can be configured in OTP releases + # @on_load :compile_scrubbers + + def compile_scrubbers do + dir = Path.join(:code.priv_dir(:pleroma), "scrubbers") + + dir + |> File.ls!() + |> Enum.map(&Path.join(dir, &1)) + |> Kernel.ParallelCompiler.compile() + |> case do + {:error, _errors, _warnings} -> + raise "Compiling scrubbers failed" + + {:ok, _modules, _warnings} -> + :ok + end + end + defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber] defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default] @@ -99,216 +118,3 @@ defmodule Pleroma.HTML do end) end end - -defmodule Pleroma.HTML.Scrubber.TwitterText do - @moduledoc """ - An HTML scrubbing policy which limits to twitter-style text. Only - paragraphs, breaks and links are allowed through the filter. - """ - - @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], []) - - require FastSanitize.Sanitizer.Meta - alias FastSanitize.Sanitizer.Meta - - Meta.strip_comments() - - # links - Meta.allow_tag_with_uri_attributes(:a, ["href", "data-user", "data-tag"], @valid_schemes) - - Meta.allow_tag_with_this_attribute_values(:a, "class", [ - "hashtag", - "u-url", - "mention", - "u-url mention", - "mention u-url" - ]) - - Meta.allow_tag_with_this_attribute_values(:a, "rel", [ - "tag", - "nofollow", - "noopener", - "noreferrer" - ]) - - Meta.allow_tag_with_these_attributes(:a, ["name", "title"]) - - # paragraphs and linebreaks - Meta.allow_tag_with_these_attributes(:br, []) - Meta.allow_tag_with_these_attributes(:p, []) - - # microformats - Meta.allow_tag_with_this_attribute_values(:span, "class", ["h-card"]) - Meta.allow_tag_with_these_attributes(:span, []) - - # allow inline images for custom emoji - if Pleroma.Config.get([:markup, :allow_inline_images]) do - # restrict img tags to http/https only, because of MediaProxy. - Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"]) - - Meta.allow_tag_with_these_attributes(:img, [ - "width", - "height", - "class", - "title", - "alt" - ]) - end - - Meta.strip_everything_not_covered() -end - -defmodule Pleroma.HTML.Scrubber.Default do - @doc "The default HTML scrubbing policy: no " - - require FastSanitize.Sanitizer.Meta - alias FastSanitize.Sanitizer.Meta - - # credo:disable-for-previous-line - # No idea how to fix this one… - - @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], []) - - Meta.strip_comments() - - Meta.allow_tag_with_uri_attributes(:a, ["href", "data-user", "data-tag"], @valid_schemes) - - Meta.allow_tag_with_this_attribute_values(:a, "class", [ - "hashtag", - "u-url", - "mention", - "u-url mention", - "mention u-url" - ]) - - Meta.allow_tag_with_this_attribute_values(:a, "rel", [ - "tag", - "nofollow", - "noopener", - "noreferrer", - "ugc" - ]) - - Meta.allow_tag_with_these_attributes(:a, ["name", "title"]) - - Meta.allow_tag_with_these_attributes(:abbr, ["title"]) - - Meta.allow_tag_with_these_attributes(:b, []) - Meta.allow_tag_with_these_attributes(:blockquote, []) - Meta.allow_tag_with_these_attributes(:br, []) - Meta.allow_tag_with_these_attributes(:code, []) - Meta.allow_tag_with_these_attributes(:del, []) - Meta.allow_tag_with_these_attributes(:em, []) - Meta.allow_tag_with_these_attributes(:i, []) - Meta.allow_tag_with_these_attributes(:li, []) - Meta.allow_tag_with_these_attributes(:ol, []) - Meta.allow_tag_with_these_attributes(:p, []) - Meta.allow_tag_with_these_attributes(:pre, []) - Meta.allow_tag_with_these_attributes(:strong, []) - Meta.allow_tag_with_these_attributes(:sub, []) - Meta.allow_tag_with_these_attributes(:sup, []) - Meta.allow_tag_with_these_attributes(:u, []) - Meta.allow_tag_with_these_attributes(:ul, []) - - Meta.allow_tag_with_this_attribute_values(:span, "class", ["h-card"]) - Meta.allow_tag_with_these_attributes(:span, []) - - @allow_inline_images Pleroma.Config.get([:markup, :allow_inline_images]) - - if @allow_inline_images do - # restrict img tags to http/https only, because of MediaProxy. - Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"]) - - Meta.allow_tag_with_these_attributes(:img, [ - "width", - "height", - "class", - "title", - "alt" - ]) - end - - if Pleroma.Config.get([:markup, :allow_tables]) do - Meta.allow_tag_with_these_attributes(:table, []) - Meta.allow_tag_with_these_attributes(:tbody, []) - Meta.allow_tag_with_these_attributes(:td, []) - Meta.allow_tag_with_these_attributes(:th, []) - Meta.allow_tag_with_these_attributes(:thead, []) - Meta.allow_tag_with_these_attributes(:tr, []) - end - - if Pleroma.Config.get([:markup, :allow_headings]) do - Meta.allow_tag_with_these_attributes(:h1, []) - Meta.allow_tag_with_these_attributes(:h2, []) - Meta.allow_tag_with_these_attributes(:h3, []) - Meta.allow_tag_with_these_attributes(:h4, []) - Meta.allow_tag_with_these_attributes(:h5, []) - end - - if Pleroma.Config.get([:markup, :allow_fonts]) do - Meta.allow_tag_with_these_attributes(:font, ["face"]) - end - - Meta.strip_everything_not_covered() -end - -defmodule Pleroma.HTML.Transform.MediaProxy do - @moduledoc "Transforms inline image URIs to use MediaProxy." - - alias Pleroma.Web.MediaProxy - - def before_scrub(html), do: html - - def scrub_attribute(:img, {"src", "http" <> target}) do - media_url = - ("http" <> target) - |> MediaProxy.url() - - {"src", media_url} - end - - def scrub_attribute(_tag, attribute), do: attribute - - def scrub({:img, attributes, children}) do - attributes = - attributes - |> Enum.map(fn attr -> scrub_attribute(:img, attr) end) - |> Enum.reject(&is_nil(&1)) - - {:img, attributes, children} - end - - def scrub({:comment, _text, _children}), do: "" - - def scrub({tag, attributes, children}), do: {tag, attributes, children} - def scrub({_tag, children}), do: children - def scrub(text), do: text -end - -defmodule Pleroma.HTML.Scrubber.LinksOnly do - @moduledoc """ - An HTML scrubbing policy which limits to links only. - """ - - @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], []) - - require FastSanitize.Sanitizer.Meta - alias FastSanitize.Sanitizer.Meta - - Meta.strip_comments() - - # links - Meta.allow_tag_with_uri_attributes(:a, ["href"], @valid_schemes) - - Meta.allow_tag_with_this_attribute_values(:a, "rel", [ - "tag", - "nofollow", - "noopener", - "noreferrer", - "me", - "ugc" - ]) - - Meta.allow_tag_with_these_attributes(:a, ["name", "title"]) - Meta.strip_everything_not_covered() -end diff --git a/priv/scrubbers/default.ex b/priv/scrubbers/default.ex new file mode 100644 index 000000000..ea0480dcd --- /dev/null +++ b/priv/scrubbers/default.ex @@ -0,0 +1,93 @@ +defmodule Pleroma.HTML.Scrubber.Default do + @doc "The default HTML scrubbing policy: no " + + require FastSanitize.Sanitizer.Meta + alias FastSanitize.Sanitizer.Meta + + # credo:disable-for-previous-line + # No idea how to fix this one… + + @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], []) + + Meta.strip_comments() + + Meta.allow_tag_with_uri_attributes(:a, ["href", "data-user", "data-tag"], @valid_schemes) + + Meta.allow_tag_with_this_attribute_values(:a, "class", [ + "hashtag", + "u-url", + "mention", + "u-url mention", + "mention u-url" + ]) + + Meta.allow_tag_with_this_attribute_values(:a, "rel", [ + "tag", + "nofollow", + "noopener", + "noreferrer", + "ugc" + ]) + + Meta.allow_tag_with_these_attributes(:a, ["name", "title"]) + + Meta.allow_tag_with_these_attributes(:abbr, ["title"]) + + Meta.allow_tag_with_these_attributes(:b, []) + Meta.allow_tag_with_these_attributes(:blockquote, []) + Meta.allow_tag_with_these_attributes(:br, []) + Meta.allow_tag_with_these_attributes(:code, []) + Meta.allow_tag_with_these_attributes(:del, []) + Meta.allow_tag_with_these_attributes(:em, []) + Meta.allow_tag_with_these_attributes(:i, []) + Meta.allow_tag_with_these_attributes(:li, []) + Meta.allow_tag_with_these_attributes(:ol, []) + Meta.allow_tag_with_these_attributes(:p, []) + Meta.allow_tag_with_these_attributes(:pre, []) + Meta.allow_tag_with_these_attributes(:strong, []) + Meta.allow_tag_with_these_attributes(:sub, []) + Meta.allow_tag_with_these_attributes(:sup, []) + Meta.allow_tag_with_these_attributes(:u, []) + Meta.allow_tag_with_these_attributes(:ul, []) + + Meta.allow_tag_with_this_attribute_values(:span, "class", ["h-card"]) + Meta.allow_tag_with_these_attributes(:span, []) + + @allow_inline_images Pleroma.Config.get([:markup, :allow_inline_images]) + + if @allow_inline_images do + # restrict img tags to http/https only, because of MediaProxy. + Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"]) + + Meta.allow_tag_with_these_attributes(:img, [ + "width", + "height", + "class", + "title", + "alt" + ]) + end + + if Pleroma.Config.get([:markup, :allow_tables]) do + Meta.allow_tag_with_these_attributes(:table, []) + Meta.allow_tag_with_these_attributes(:tbody, []) + Meta.allow_tag_with_these_attributes(:td, []) + Meta.allow_tag_with_these_attributes(:th, []) + Meta.allow_tag_with_these_attributes(:thead, []) + Meta.allow_tag_with_these_attributes(:tr, []) + end + + if Pleroma.Config.get([:markup, :allow_headings]) do + Meta.allow_tag_with_these_attributes(:h1, []) + Meta.allow_tag_with_these_attributes(:h2, []) + Meta.allow_tag_with_these_attributes(:h3, []) + Meta.allow_tag_with_these_attributes(:h4, []) + Meta.allow_tag_with_these_attributes(:h5, []) + end + + if Pleroma.Config.get([:markup, :allow_fonts]) do + Meta.allow_tag_with_these_attributes(:font, ["face"]) + end + + Meta.strip_everything_not_covered() +end diff --git a/priv/scrubbers/links_only.ex b/priv/scrubbers/links_only.ex new file mode 100644 index 000000000..b30a00589 --- /dev/null +++ b/priv/scrubbers/links_only.ex @@ -0,0 +1,27 @@ +defmodule Pleroma.HTML.Scrubber.LinksOnly do + @moduledoc """ + An HTML scrubbing policy which limits to links only. + """ + + @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], []) + + require FastSanitize.Sanitizer.Meta + alias FastSanitize.Sanitizer.Meta + + Meta.strip_comments() + + # links + Meta.allow_tag_with_uri_attributes(:a, ["href"], @valid_schemes) + + Meta.allow_tag_with_this_attribute_values(:a, "rel", [ + "tag", + "nofollow", + "noopener", + "noreferrer", + "me", + "ugc" + ]) + + Meta.allow_tag_with_these_attributes(:a, ["name", "title"]) + Meta.strip_everything_not_covered() +end diff --git a/priv/scrubbers/media_proxy.ex b/priv/scrubbers/media_proxy.ex new file mode 100644 index 000000000..5dbe57666 --- /dev/null +++ b/priv/scrubbers/media_proxy.ex @@ -0,0 +1,32 @@ +defmodule Pleroma.HTML.Transform.MediaProxy do + @moduledoc "Transforms inline image URIs to use MediaProxy." + + alias Pleroma.Web.MediaProxy + + def before_scrub(html), do: html + + def scrub_attribute(:img, {"src", "http" <> target}) do + media_url = + ("http" <> target) + |> MediaProxy.url() + + {"src", media_url} + end + + def scrub_attribute(_tag, attribute), do: attribute + + def scrub({:img, attributes, children}) do + attributes = + attributes + |> Enum.map(fn attr -> scrub_attribute(:img, attr) end) + |> Enum.reject(&is_nil(&1)) + + {:img, attributes, children} + end + + def scrub({:comment, _text, _children}), do: "" + + def scrub({tag, attributes, children}), do: {tag, attributes, children} + def scrub({_tag, children}), do: children + def scrub(text), do: text +end diff --git a/priv/scrubbers/twitter_text.ex b/priv/scrubbers/twitter_text.ex new file mode 100644 index 000000000..c4e796cad --- /dev/null +++ b/priv/scrubbers/twitter_text.ex @@ -0,0 +1,57 @@ +defmodule Pleroma.HTML.Scrubber.TwitterText do + @moduledoc """ + An HTML scrubbing policy which limits to twitter-style text. Only + paragraphs, breaks and links are allowed through the filter. + """ + + @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], []) + + require FastSanitize.Sanitizer.Meta + alias FastSanitize.Sanitizer.Meta + + Meta.strip_comments() + + # links + Meta.allow_tag_with_uri_attributes(:a, ["href", "data-user", "data-tag"], @valid_schemes) + + Meta.allow_tag_with_this_attribute_values(:a, "class", [ + "hashtag", + "u-url", + "mention", + "u-url mention", + "mention u-url" + ]) + + Meta.allow_tag_with_this_attribute_values(:a, "rel", [ + "tag", + "nofollow", + "noopener", + "noreferrer" + ]) + + Meta.allow_tag_with_these_attributes(:a, ["name", "title"]) + + # paragraphs and linebreaks + Meta.allow_tag_with_these_attributes(:br, []) + Meta.allow_tag_with_these_attributes(:p, []) + + # microformats + Meta.allow_tag_with_this_attribute_values(:span, "class", ["h-card"]) + Meta.allow_tag_with_these_attributes(:span, []) + + # allow inline images for custom emoji + if Pleroma.Config.get([:markup, :allow_inline_images]) do + # restrict img tags to http/https only, because of MediaProxy. + Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"]) + + Meta.allow_tag_with_these_attributes(:img, [ + "width", + "height", + "class", + "title", + "alt" + ]) + end + + Meta.strip_everything_not_covered() +end