From 46aa8c18a211034bc102cfffec61c9cc8c3cdf02 Mon Sep 17 00:00:00 2001 From: rinpatch Date: Fri, 8 Feb 2019 12:38:24 +0300 Subject: [PATCH 1/8] Add keyword policy --- docs/config.md | 5 ++ lib/pleroma/web/activity_pub/mrf/keyword_policy.ex | 74 ++++++++++++++++++++++ lib/pleroma/web/nodeinfo/nodeinfo_controller.ex | 28 ++++++++ 3 files changed, 107 insertions(+) create mode 100644 lib/pleroma/web/activity_pub/mrf/keyword_policy.ex diff --git a/docs/config.md b/docs/config.md index e042d216f..55d618925 100644 --- a/docs/config.md +++ b/docs/config.md @@ -151,6 +151,11 @@ This section is used to configure Pleroma-FE, unless ``:managed_config`` in ``:i * `delist_threshold`: Number of mentioned users after which the message gets delisted (the message can still be seen, but it will not show up in public timelines and mentioned users won't get notifications about it). Set to 0 to disable. * `reject_threshold`: Number of mentioned users after which the messaged gets rejected. Set to 0 to disable. +## :mrf_keyword +* `reject`: A list of patterns which result in message being rejected, each pattern can be a string or a [regular expression](https://hexdocs.pm/elixir/Regex.html) +* `ftl_removal`: A list of patterns which result in message being removed from federated timelines(a.k.a unlisted), each pattern can be a string or a [regular expression](https://hexdocs.pm/elixir/Regex.html) +* `replace`: A list of tuples containing `{pattern, replacement`, `pattern` can be a string or a [regular expression](https://hexdocs.pm/elixir/Regex.html) + ## :media_proxy * `enabled`: Enables proxying of remote media to the instance’s proxy * `base_url`: The base URL to access a user-uploaded file. Useful when you want to proxy the media files via another host/CDN fronts. diff --git a/lib/pleroma/web/activity_pub/mrf/keyword_policy.ex b/lib/pleroma/web/activity_pub/mrf/keyword_policy.ex new file mode 100644 index 000000000..6e2673e9c --- /dev/null +++ b/lib/pleroma/web/activity_pub/mrf/keyword_policy.ex @@ -0,0 +1,74 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2019 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Web.ActivityPub.MRF.KeywordPolicy do + @behaviour Pleroma.Web.ActivityPub.MRF + defp string_matches?(string, pattern) when is_binary(pattern) do + String.contains?(string, pattern) + end + + defp string_matches?(string, pattern) do + String.match?(string, pattern) + end + + defp check_reject(%{"object" => %{"content" => content}} = message) do + if Enum.any?(Pleroma.Config.get([:mrf_keyword, :reject]), fn pattern -> + string_matches?(content, pattern) + end) do + {:reject, nil} + else + {:ok, message} + end + end + + defp check_ftl_removal(%{"to" => to, "object" => %{"content" => content}} = message) do + if "https://www.w3.org/ns/activitystreams#Public" in to and + Enum.any?(Pleroma.Config.get([:mrf_keyword, :ftl_removal]), fn pattern -> + string_matches?(content, pattern) + end) do + to = List.delete(to, "https://www.w3.org/ns/activitystreams#Public") + cc = ["https://www.w3.org/ns/activitystreams#Public" | [message["cc"] || []]] + + message = + message + |> Map.put("to", to) + |> Map.put("cc", cc) + + IO.inspect(message) + {:ok, message} + else + {:ok, message} + end + end + + defp check_replace(%{"object" => %{"content" => content}} = message) do + content = + Enum.reduce(Pleroma.Config.get([:mrf_keyword, :replace]), content, fn {pattern, replacement}, + acc -> + String.replace(acc, pattern, replacement) + end) + + {:ok, put_in(message["object"]["content"], content)} + end + + @impl true + def filter(%{"object" => %{"content" => nil}} = message) do + {:ok, message} + end + + @impl true + def filter(%{"type" => "Create", "object" => %{"content" => _content}} = message) do + with {:ok, message} <- check_reject(message), + {:ok, message} <- check_ftl_removal(message), + {:ok, message} <- check_replace(message) do + {:ok, message} + else + _e -> + {:reject, nil} + end + end + + @impl true + def filter(message), do: {:ok, message} +end diff --git a/lib/pleroma/web/nodeinfo/nodeinfo_controller.ex b/lib/pleroma/web/nodeinfo/nodeinfo_controller.ex index 21694a5ee..7c24d4761 100644 --- a/lib/pleroma/web/nodeinfo/nodeinfo_controller.ex +++ b/lib/pleroma/web/nodeinfo/nodeinfo_controller.ex @@ -44,6 +44,33 @@ defmodule Pleroma.Web.Nodeinfo.NodeinfoController do Application.get_env(:pleroma, :mrf_simple) |> Enum.into(%{}) + # This horror is needed to convert regex sigils to strings + mrf_keyword = + Application.get_env(:pleroma, :mrf_keyword) + |> Enum.map(fn {key, value} -> + {key, + Enum.map(value, fn + {pattern, replacement} -> + %{ + "pattern" => + if not is_binary(pattern) do + inspect(pattern) + else + pattern + end, + "replacement" => replacement + } + + pattern -> + if not is_binary(pattern) do + inspect(pattern) + else + pattern + end + end)} + end) + |> Enum.into(%{}) + mrf_policies = MRF.get_policies() |> Enum.map(fn policy -> to_string(policy) |> String.split(".") |> List.last() end) @@ -73,6 +100,7 @@ defmodule Pleroma.Web.Nodeinfo.NodeinfoController do %{ mrf_policies: mrf_policies, mrf_simple: mrf_simple, + mrf_keyword: mrf_keyword, mrf_user_allowlist: mrf_user_allowlist, quarantined_instances: quarantined } From 2174f6eb4f2b9b970e9823fe0846643fb274f009 Mon Sep 17 00:00:00 2001 From: rinpatch Date: Fri, 8 Feb 2019 12:48:39 +0300 Subject: [PATCH 2/8] Add default config for keyword policy --- config/config.exs | 5 +++++ lib/pleroma/web/nodeinfo/nodeinfo_controller.ex | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/config/config.exs b/config/config.exs index 8b42a5351..223ab0c52 100644 --- a/config/config.exs +++ b/config/config.exs @@ -238,6 +238,11 @@ config :pleroma, :mrf_simple, reject: [], accept: [] +config :pleroma, :mrf_keyword, + reject: [], + ftl_removal: [], + replace: [] + config :pleroma, :rich_media, enabled: true config :pleroma, :media_proxy, diff --git a/lib/pleroma/web/nodeinfo/nodeinfo_controller.ex b/lib/pleroma/web/nodeinfo/nodeinfo_controller.ex index 7c24d4761..8c7df5b90 100644 --- a/lib/pleroma/web/nodeinfo/nodeinfo_controller.ex +++ b/lib/pleroma/web/nodeinfo/nodeinfo_controller.ex @@ -46,7 +46,7 @@ defmodule Pleroma.Web.Nodeinfo.NodeinfoController do # This horror is needed to convert regex sigils to strings mrf_keyword = - Application.get_env(:pleroma, :mrf_keyword) + Application.get_env(:pleroma, :mrf_keyword, []) |> Enum.map(fn {key, value} -> {key, Enum.map(value, fn From 8a0b755c19ef9c896f69de2b1bf22418a3aedf6f Mon Sep 17 00:00:00 2001 From: rinpatch Date: Fri, 8 Feb 2019 13:12:09 +0300 Subject: [PATCH 3/8] rename ftl_removal to federated_timeline_removal to keep consistent naming with SimplePolicy --- config/config.exs | 2 +- docs/config.md | 2 +- lib/pleroma/web/activity_pub/mrf/keyword_policy.ex | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/config.exs b/config/config.exs index 223ab0c52..33bb4b015 100644 --- a/config/config.exs +++ b/config/config.exs @@ -240,7 +240,7 @@ config :pleroma, :mrf_simple, config :pleroma, :mrf_keyword, reject: [], - ftl_removal: [], + federated_timeline_removal: [], replace: [] config :pleroma, :rich_media, enabled: true diff --git a/docs/config.md b/docs/config.md index 55d618925..c0eb4ceb4 100644 --- a/docs/config.md +++ b/docs/config.md @@ -153,7 +153,7 @@ This section is used to configure Pleroma-FE, unless ``:managed_config`` in ``:i ## :mrf_keyword * `reject`: A list of patterns which result in message being rejected, each pattern can be a string or a [regular expression](https://hexdocs.pm/elixir/Regex.html) -* `ftl_removal`: A list of patterns which result in message being removed from federated timelines(a.k.a unlisted), each pattern can be a string or a [regular expression](https://hexdocs.pm/elixir/Regex.html) +* `federated_timeline_removal`: A list of patterns which result in message being removed from federated timelines(a.k.a unlisted), each pattern can be a string or a [regular expression](https://hexdocs.pm/elixir/Regex.html) * `replace`: A list of tuples containing `{pattern, replacement`, `pattern` can be a string or a [regular expression](https://hexdocs.pm/elixir/Regex.html) ## :media_proxy diff --git a/lib/pleroma/web/activity_pub/mrf/keyword_policy.ex b/lib/pleroma/web/activity_pub/mrf/keyword_policy.ex index 6e2673e9c..073b86372 100644 --- a/lib/pleroma/web/activity_pub/mrf/keyword_policy.ex +++ b/lib/pleroma/web/activity_pub/mrf/keyword_policy.ex @@ -24,7 +24,7 @@ defmodule Pleroma.Web.ActivityPub.MRF.KeywordPolicy do defp check_ftl_removal(%{"to" => to, "object" => %{"content" => content}} = message) do if "https://www.w3.org/ns/activitystreams#Public" in to and - Enum.any?(Pleroma.Config.get([:mrf_keyword, :ftl_removal]), fn pattern -> + Enum.any?(Pleroma.Config.get([:mrf_keyword, :federated_timeline_removal]), fn pattern -> string_matches?(content, pattern) end) do to = List.delete(to, "https://www.w3.org/ns/activitystreams#Public") From f88dec8b33f96de64c3a9ee212ea265a87608b3b Mon Sep 17 00:00:00 2001 From: rinpatch Date: Fri, 8 Feb 2019 13:16:50 +0300 Subject: [PATCH 4/8] What idiot did that? (me) --- lib/pleroma/web/activity_pub/mrf/keyword_policy.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/web/activity_pub/mrf/keyword_policy.ex b/lib/pleroma/web/activity_pub/mrf/keyword_policy.ex index 073b86372..8f49d8bfb 100644 --- a/lib/pleroma/web/activity_pub/mrf/keyword_policy.ex +++ b/lib/pleroma/web/activity_pub/mrf/keyword_policy.ex @@ -28,7 +28,7 @@ defmodule Pleroma.Web.ActivityPub.MRF.KeywordPolicy do string_matches?(content, pattern) end) do to = List.delete(to, "https://www.w3.org/ns/activitystreams#Public") - cc = ["https://www.w3.org/ns/activitystreams#Public" | [message["cc"] || []]] + cc = ["https://www.w3.org/ns/activitystreams#Public" | message["cc"] || []] message = message From 73566592734320bce1262ff7e38199bdcdd2c7c9 Mon Sep 17 00:00:00 2001 From: rinpatch Date: Fri, 8 Feb 2019 15:12:13 +0300 Subject: [PATCH 5/8] wow --- lib/pleroma/web/activity_pub/mrf/keyword_policy.ex | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/pleroma/web/activity_pub/mrf/keyword_policy.ex b/lib/pleroma/web/activity_pub/mrf/keyword_policy.ex index 8f49d8bfb..ce6d2e529 100644 --- a/lib/pleroma/web/activity_pub/mrf/keyword_policy.ex +++ b/lib/pleroma/web/activity_pub/mrf/keyword_policy.ex @@ -35,7 +35,6 @@ defmodule Pleroma.Web.ActivityPub.MRF.KeywordPolicy do |> Map.put("to", to) |> Map.put("cc", cc) - IO.inspect(message) {:ok, message} else {:ok, message} From 38ff9b3568c637bd69a4bdadbdeb147d7871b09a Mon Sep 17 00:00:00 2001 From: rinpatch Date: Fri, 8 Feb 2019 15:12:44 +0300 Subject: [PATCH 6/8] fix typo in config.md --- docs/config.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/config.md b/docs/config.md index 08523ac53..89393449c 100644 --- a/docs/config.md +++ b/docs/config.md @@ -173,7 +173,7 @@ This section is used to configure Pleroma-FE, unless ``:managed_config`` in ``:i ## :mrf_keyword * `reject`: A list of patterns which result in message being rejected, each pattern can be a string or a [regular expression](https://hexdocs.pm/elixir/Regex.html) -* `federated_timeline_removal`: A list of patterns which result in message being removed from federated timelines(a.k.a unlisted), each pattern can be a string or a [regular expression](https://hexdocs.pm/elixir/Regex.html) +* `federated_timeline_removal`: A list of patterns which result in message being removed from federated timelines (a.k.a unlisted), each pattern can be a string or a [regular expression](https://hexdocs.pm/elixir/Regex.html) * `replace`: A list of tuples containing `{pattern, replacement`, `pattern` can be a string or a [regular expression](https://hexdocs.pm/elixir/Regex.html) ## :media_proxy From 9a23f8f3ea9e788978055f0ad3a10db105f68cc6 Mon Sep 17 00:00:00 2001 From: rinpatch Date: Fri, 8 Feb 2019 20:23:26 +0300 Subject: [PATCH 7/8] Add tests and fix a typo in docs --- docs/config.md | 2 +- test/web/activity_pub/mrf/keyword_policy_test.exs | 113 ++++++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 test/web/activity_pub/mrf/keyword_policy_test.exs diff --git a/docs/config.md b/docs/config.md index 89393449c..74badd0da 100644 --- a/docs/config.md +++ b/docs/config.md @@ -174,7 +174,7 @@ This section is used to configure Pleroma-FE, unless ``:managed_config`` in ``:i ## :mrf_keyword * `reject`: A list of patterns which result in message being rejected, each pattern can be a string or a [regular expression](https://hexdocs.pm/elixir/Regex.html) * `federated_timeline_removal`: A list of patterns which result in message being removed from federated timelines (a.k.a unlisted), each pattern can be a string or a [regular expression](https://hexdocs.pm/elixir/Regex.html) -* `replace`: A list of tuples containing `{pattern, replacement`, `pattern` can be a string or a [regular expression](https://hexdocs.pm/elixir/Regex.html) +* `replace`: A list of tuples containing `{pattern, replacement}`, `pattern` can be a string or a [regular expression](https://hexdocs.pm/elixir/Regex.html) ## :media_proxy * `enabled`: Enables proxying of remote media to the instance’s proxy diff --git a/test/web/activity_pub/mrf/keyword_policy_test.exs b/test/web/activity_pub/mrf/keyword_policy_test.exs new file mode 100644 index 000000000..77e2a08c9 --- /dev/null +++ b/test/web/activity_pub/mrf/keyword_policy_test.exs @@ -0,0 +1,113 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2019 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Web.ActivityPub.MRF.KeywordPolicyTest do + use Pleroma.DataCase + + alias Pleroma.Web.ActivityPub.MRF.KeywordPolicy + + setup do + Pleroma.Config.put([:mrf_keyword], %{reject: [], federated_timeline_removal: [], replace: []}) + end + + describe "rejecting based on keywords" do + test "rejects if string matches" do + Pleroma.Config.put([:mrf_keyword, :reject], ["pun"]) + + message = %{ + "type" => "Create", + "object" => %{"content" => "just a daily reminder that compLAINer is a good pun"} + } + + assert {:reject, nil} == KeywordPolicy.filter(message) + end + + test "rejects if regex matches" do + Pleroma.Config.put([:mrf_keyword, :reject], [~r/comp[lL][aA][iI][nN]er/]) + + assert true == + Enum.all?(["complainer", "compLainer", "compLAiNer", "compLAINer"], fn content -> + message = %{ + "type" => "Create", + "object" => %{ + "content" => "just a daily reminder that #{content} is a good pun" + } + } + + {:reject, nil} == KeywordPolicy.filter(message) + end) + end + end + + describe "delisting from ftl based on keywords" do + test "delists if string matches" do + Pleroma.Config.put([:mrf_keyword, :federated_timeline_removal], ["pun"]) + + message = %{ + "to" => ["https://www.w3.org/ns/activitystreams#Public"], + "type" => "Create", + "object" => %{"content" => "just a daily reminder that compLAINer is a good pun"} + } + + {:ok, result} = KeywordPolicy.filter(message) + assert ["https://www.w3.org/ns/activitystreams#Public"] == result["cc"] + refute ["https://www.w3.org/ns/activitystreams#Public"] == result["to"] + end + + test "delists if regex matches" do + Pleroma.Config.put([:mrf_keyword, :federated_timeline_removal], [~r/comp[lL][aA][iI][nN]er/]) + + assert true == + Enum.all?(["complainer", "compLainer", "compLAiNer", "compLAINer"], fn content -> + message = %{ + "type" => "Create", + "to" => ["https://www.w3.org/ns/activitystreams#Public"], + "object" => %{ + "content" => "just a daily reminder that #{content} is a good pun" + } + } + + {:ok, result} = KeywordPolicy.filter(message) + + ["https://www.w3.org/ns/activitystreams#Public"] == result["cc"] and + not (["https://www.w3.org/ns/activitystreams#Public"] == result["to"]) + end) + end + end + + describe "replacing keywords" do + test "replaces keyword if string matches" do + Pleroma.Config.put([:mrf_keyword, :replace], [{"opensource", "free software"}]) + + message = %{ + "type" => "Create", + "to" => ["https://www.w3.org/ns/activitystreams#Public"], + "object" => %{"content" => "ZFS is opensource"} + } + + {:ok, %{"object" => %{"content" => result}}} = KeywordPolicy.filter(message) + assert result == "ZFS is free software" + end + + test "replaces keyword if regex matches" do + Pleroma.Config.put([:mrf_keyword, :replace], [ + {~r/open(-|\s)?source\s?(software)?/, "free software"} + ]) + + assert true == + Enum.all?(["opensource", "open-source", "open source"], fn content -> + message = %{ + "type" => "Create", + "to" => ["https://www.w3.org/ns/activitystreams#Public"], + "object" => %{"content" => "ZFS is #{content}"} + } + + {:ok, %{"object" => %{"content" => result}}} = KeywordPolicy.filter(message) + IO.inspect(content) + IO.inspect(result) + result == "ZFS is free software" + end) + end + end +end From b05a3411871fbfb8d1a5525d6875a635917abffa Mon Sep 17 00:00:00 2001 From: rinpatch Date: Sat, 9 Feb 2019 08:12:30 +0300 Subject: [PATCH 8/8] oof --- test/web/activity_pub/mrf/keyword_policy_test.exs | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/web/activity_pub/mrf/keyword_policy_test.exs b/test/web/activity_pub/mrf/keyword_policy_test.exs index 77e2a08c9..67a5858d7 100644 --- a/test/web/activity_pub/mrf/keyword_policy_test.exs +++ b/test/web/activity_pub/mrf/keyword_policy_test.exs @@ -104,8 +104,6 @@ defmodule Pleroma.Web.ActivityPub.MRF.KeywordPolicyTest do } {:ok, %{"object" => %{"content" => result}}} = KeywordPolicy.filter(message) - IO.inspect(content) - IO.inspect(result) result == "ZFS is free software" end) end