From 50e0a9ae56ad2704240956d1f93cc04bafcb8b75 Mon Sep 17 00:00:00 2001 From: "Haelwenn (lanodan) Monnier" Date: Tue, 16 Oct 2018 03:00:37 +0200 Subject: [PATCH 1/4] lib/pleroma/html.ex: Fix scheme lists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gosh please don’t break ourselves… Also this is copy-paste of the list in lib/pleroma/formatter.ex, I think this should be put in a common variable, but where? --- lib/pleroma/html.ex | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index cf18f070c..f86855671 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -39,7 +39,22 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta - @valid_schemes ["http", "https"] + @valid_schemes [ + "https://", + "http://", + "dat://", + "dweb://", + "gopher://", + "ipfs://", + "ipns://", + "irc:", + "ircs:", + "magnet:", + "mailto:", + "mumble:", + "ssb://", + "xmpp:" + ] Meta.remove_cdata_sections_before_scrub() Meta.strip_comments() From d7654c77de1f027091a380630559bbda9abb9540 Mon Sep 17 00:00:00 2001 From: "Haelwenn (lanodan) Monnier" Date: Tue, 16 Oct 2018 03:34:33 +0200 Subject: [PATCH 2/4] lib/pleroma/html.ex: Use a function as a variable (broken for some reason) --- lib/pleroma/html.ex | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index f86855671..8a5ede614 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -3,6 +3,25 @@ defmodule Pleroma.HTML do @markup Application.get_env(:pleroma, :markup) + def valid_schemes() do + [ + "https://", + "http://", + "dat://", + "dweb://", + "gopher://", + "ipfs://", + "ipns://", + "irc:", + "ircs:", + "magnet:", + "mailto:", + "mumble:", + "ssb://", + "xmpp:" + ] + end + defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber] defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default] @@ -39,28 +58,13 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta - @valid_schemes [ - "https://", - "http://", - "dat://", - "dweb://", - "gopher://", - "ipfs://", - "ipns://", - "irc:", - "ircs:", - "magnet:", - "mailto:", - "mumble:", - "ssb://", - "xmpp:" - ] + alias Pleroma.HTML Meta.remove_cdata_sections_before_scrub() Meta.strip_comments() # links - Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes) + Meta.allow_tag_with_uri_attributes("a", ["href"], HTML.valid_schemes()) Meta.allow_tag_with_these_attributes("a", ["name", "title"]) # paragraphs and linebreaks @@ -75,7 +79,7 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do @allow_inline_images Keyword.get(@markup, :allow_inline_images) if @allow_inline_images do - Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes) + Meta.allow_tag_with_uri_attributes("img", ["src"], HTML.valid_schemes()) Meta.allow_tag_with_these_attributes("img", [ "width", @@ -94,12 +98,12 @@ defmodule Pleroma.HTML.Scrubber.Default do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta - @valid_schemes ["http", "https"] + alias Pleroma.HTML Meta.remove_cdata_sections_before_scrub() Meta.strip_comments() - Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes) + Meta.allow_tag_with_uri_attributes("a", ["href"], HTML.valid_schemes()) Meta.allow_tag_with_these_attributes("a", ["name", "title"]) Meta.allow_tag_with_these_attributes("b", []) @@ -122,7 +126,7 @@ defmodule Pleroma.HTML.Scrubber.Default do @allow_inline_images Keyword.get(@markup, :allow_inline_images) if @allow_inline_images do - Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes) + Meta.allow_tag_with_uri_attributes("img", ["src"], HTML.valid_schemes()) Meta.allow_tag_with_these_attributes("img", [ "width", From 2154c5dcd891cf2a85c0251e07424b5681aa88a2 Mon Sep 17 00:00:00 2001 From: "Haelwenn (lanodan) Monnier" Date: Thu, 18 Oct 2018 07:36:58 +0200 Subject: [PATCH 3/4] lib/pleroma/html.ex: Use macros for valid_schemes, change config for schemes --- config/config.exs | 18 +++++++++++++++++- lib/pleroma/formatter.ex | 27 +++------------------------ lib/pleroma/html.ex | 37 ++++++++++++------------------------- 3 files changed, 32 insertions(+), 50 deletions(-) diff --git a/config/config.exs b/config/config.exs index 320296991..e5f0b4f6f 100644 --- a/config/config.exs +++ b/config/config.exs @@ -24,7 +24,23 @@ config :pleroma, Pleroma.Uploaders.S3, config :pleroma, :emoji, shortcode_globs: ["/emoji/custom/**/*.png"] -config :pleroma, :uri_schemes, additionnal_schemes: [] +config :pleroma, :uri_schemes, + valid_schemes: [ + "https", + "http", + "dat", + "dweb", + "gopher", + "ipfs", + "ipns", + "irc", + "ircs", + "magnet", + "mailto", + "mumble", + "ssb", + "xmpp" + ] # Configures the endpoint config :pleroma, Pleroma.Web.Endpoint, diff --git a/lib/pleroma/formatter.ex b/lib/pleroma/formatter.ex index 5b63fb795..d7de5b483 100644 --- a/lib/pleroma/formatter.ex +++ b/lib/pleroma/formatter.ex @@ -171,25 +171,8 @@ defmodule Pleroma.Formatter do @link_regex ~r/[0-9a-z+\-\.]+:[0-9a-z$-_.+!*'(),]+/ui - # IANA got a list https://www.iana.org/assignments/uri-schemes/ but - # Stuff like ipfs isn’t in it - # There is very niche stuff - @uri_schemes [ - "https://", - "http://", - "dat://", - "dweb://", - "gopher://", - "ipfs://", - "ipns://", - "irc:", - "ircs:", - "magnet:", - "mailto:", - "mumble:", - "ssb://", - "xmpp:" - ] + @uri_schemes Application.get_env(:pleroma, :uri_schemes, []) + @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, []) # TODO: make it use something other than @link_regex def html_escape(text, "text/html") do @@ -207,14 +190,10 @@ defmodule Pleroma.Formatter do @doc "changes scheme:... urls to html links" def add_links({subs, text}) do - additionnal_schemes = - Application.get_env(:pleroma, :uri_schemes, []) - |> Keyword.get(:additionnal_schemes, []) - links = text |> String.split([" ", "\t", "
"]) - |> Enum.filter(fn word -> String.starts_with?(word, @uri_schemes ++ additionnal_schemes) end) + |> Enum.filter(fn word -> String.starts_with?(word, @valid_schemes) end) |> Enum.filter(fn word -> Regex.match?(@link_regex, word) end) |> Enum.map(fn url -> {Ecto.UUID.generate(), url} end) |> Enum.sort_by(fn {_, url} -> -String.length(url) end) diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index 8a5ede614..2d2155b82 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -3,25 +3,6 @@ defmodule Pleroma.HTML do @markup Application.get_env(:pleroma, :markup) - def valid_schemes() do - [ - "https://", - "http://", - "dat://", - "dweb://", - "gopher://", - "ipfs://", - "ipns://", - "irc:", - "ircs:", - "magnet:", - "mailto:", - "mumble:", - "ssb://", - "xmpp:" - ] - end - defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber] defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default] @@ -55,6 +36,10 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do paragraphs, breaks and links are allowed through the filter. """ + @markup Application.get_env(:pleroma, :markup) + @uri_schemes Application.get_env(:pleroma, :uri_schemes, []) + @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, []) + require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta @@ -64,7 +49,7 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do Meta.strip_comments() # links - Meta.allow_tag_with_uri_attributes("a", ["href"], HTML.valid_schemes()) + Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes) Meta.allow_tag_with_these_attributes("a", ["name", "title"]) # paragraphs and linebreaks @@ -75,11 +60,10 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do Meta.allow_tag_with_these_attributes("span", []) # allow inline images for custom emoji - @markup Application.get_env(:pleroma, :markup) @allow_inline_images Keyword.get(@markup, :allow_inline_images) if @allow_inline_images do - Meta.allow_tag_with_uri_attributes("img", ["src"], HTML.valid_schemes()) + Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes) Meta.allow_tag_with_these_attributes("img", [ "width", @@ -100,10 +84,14 @@ defmodule Pleroma.HTML.Scrubber.Default do alias Pleroma.HTML + @markup Application.get_env(:pleroma, :markup) + @uri_schemes Application.get_env(:pleroma, :uri_schemes, []) + @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, []) + Meta.remove_cdata_sections_before_scrub() Meta.strip_comments() - Meta.allow_tag_with_uri_attributes("a", ["href"], HTML.valid_schemes()) + Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes) Meta.allow_tag_with_these_attributes("a", ["name", "title"]) Meta.allow_tag_with_these_attributes("b", []) @@ -122,11 +110,10 @@ defmodule Pleroma.HTML.Scrubber.Default do Meta.allow_tag_with_these_attributes("u", []) Meta.allow_tag_with_these_attributes("ul", []) - @markup Application.get_env(:pleroma, :markup) @allow_inline_images Keyword.get(@markup, :allow_inline_images) if @allow_inline_images do - Meta.allow_tag_with_uri_attributes("img", ["src"], HTML.valid_schemes()) + Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes) Meta.allow_tag_with_these_attributes("img", [ "width", From 595d855f0ebd88faede51bf3e08384e956465722 Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Thu, 18 Oct 2018 14:29:31 +0000 Subject: [PATCH 4/4] html scrubbing policies: restrict img tags to http/https only for mediaproxy compatibility --- lib/pleroma/html.ex | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index 2d2155b82..fdc5e7dab 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -63,7 +63,8 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do @allow_inline_images Keyword.get(@markup, :allow_inline_images) if @allow_inline_images do - Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes) + # restrict img tags to http/https only, because of MediaProxy. + Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"]) Meta.allow_tag_with_these_attributes("img", [ "width", @@ -113,7 +114,8 @@ defmodule Pleroma.HTML.Scrubber.Default do @allow_inline_images Keyword.get(@markup, :allow_inline_images) if @allow_inline_images do - Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes) + # restrict img tags to http/https only, because of MediaProxy. + Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"]) Meta.allow_tag_with_these_attributes("img", [ "width",