From cbb19d0e1882f5ce641f30b51d7156336f81aba9 Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Sat, 26 Dec 2020 22:20:55 +0300 Subject: [PATCH] [#3213] Hashtag-filtering functions in ActivityPub. Mix task for migrating hashtags to `hashtags` table. --- lib/mix/tasks/pleroma/database.ex | 64 ++++++++ lib/pleroma/web/activity_pub/activity_pub.ex | 171 +++++++++++++++------ .../pleroma/web/activity_pub/activity_pub_test.exs | 48 +++--- 3 files changed, 218 insertions(+), 65 deletions(-) diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 22151ce08..093c7dd30 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -4,14 +4,18 @@ defmodule Mix.Tasks.Pleroma.Database do alias Pleroma.Conversation + alias Pleroma.Hashtag alias Pleroma.Maintenance alias Pleroma.Object alias Pleroma.Repo alias Pleroma.User + require Logger require Pleroma.Constants + import Ecto.Query import Mix.Pleroma + use Mix.Task @shortdoc "A collection of database related tasks" @@ -128,6 +132,66 @@ defmodule Mix.Tasks.Pleroma.Database do |> Stream.run() end + def run(["transfer_hashtags"]) do + import Ecto.Query + + start_pleroma() + + from( + object in Object, + left_join: hashtag in assoc(object, :hashtags), + where: is_nil(hashtag.id), + where: fragment("(?)->>'tag' != '[]'", object.data), + select: %{ + id: object.id, + inserted_at: object.inserted_at, + tag: fragment("(?)->>'tag'", object.data) + }, + order_by: [desc: object.id] + ) + |> Pleroma.Repo.chunk_stream(100, :batches) + |> Stream.each(fn objects -> + chunk_start = List.first(objects) + chunk_end = List.last(objects) + + Logger.info( + "transfer_hashtags: " <> + "#{chunk_start.id} (#{chunk_start.inserted_at}) -- " <> + "#{chunk_end.id} (#{chunk_end.inserted_at})" + ) + + Enum.map( + objects, + fn object -> + hashtags = + object.tag + |> Jason.decode!() + |> Enum.filter(&is_bitstring(&1)) + + with {:ok, hashtag_records} <- Hashtag.get_or_create_by_names(hashtags) do + Repo.transaction(fn -> + for hashtag_record <- hashtag_records do + with {:error, _} <- + Ecto.Adapters.SQL.query( + Repo, + "insert into hashtags_objects(hashtag_id, object_id) values " <> + "(#{hashtag_record.id}, #{object.id});" + ) do + Logger.warn( + "ERROR: could not link object #{object.id} and hashtag #{hashtag_record.id}" + ) + end + end + end) + else + e -> Logger.warn("ERROR: could not process object #{object.id}: #{inspect(e)}") + end + end + ) + end) + |> Stream.run() + end + def run(["vacuum", args]) do start_pleroma() diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 1c91bc074..2e25412c6 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -660,33 +660,41 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do defp restrict_since(query, _), do: query defp restrict_tag_reject(_query, %{tag_reject: _tag_reject, skip_preload: true}) do - raise "Can't use the child object without preloading!" + raise_on_missing_preload() end - defp restrict_tag_reject(query, %{tag_reject: [_ | _] = tag_reject}) do + defp restrict_tag_reject(query, %{tag_reject: tag_reject}) when is_list(tag_reject) do from( [_activity, object] in query, where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject) ) end + defp restrict_tag_reject(query, %{tag_reject: tag_reject}) when is_binary(tag_reject) do + restrict_tag_reject(query, %{tag_reject: [tag_reject]}) + end + defp restrict_tag_reject(query, _), do: query defp restrict_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) do - raise "Can't use the child object without preloading!" + raise_on_missing_preload() end - defp restrict_tag_all(query, %{tag_all: [_ | _] = tag_all}) do + defp restrict_tag_all(query, %{tag_all: tag_all}) when is_list(tag_all) do from( [_activity, object] in query, where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all) ) end + defp restrict_tag_all(query, %{tag_all: tag}) when is_binary(tag) do + restrict_tag(query, %{tag: tag}) + end + defp restrict_tag_all(query, _), do: query defp restrict_tag(_query, %{tag: _tag, skip_preload: true}) do - raise "Can't use the child object without preloading!" + raise_on_missing_preload() end defp restrict_tag(query, %{tag: tag}) when is_list(tag) do @@ -697,14 +705,80 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do end defp restrict_tag(query, %{tag: tag}) when is_binary(tag) do - from( - [_activity, object] in query, - where: fragment("(?)->'tag' \\? (?)", object.data, ^tag) - ) + restrict_tag(query, %{tag: [tag]}) end defp restrict_tag(query, _), do: query + defp restrict_hashtag_reject_any(_query, %{tag_reject: _tag_reject, skip_preload: true}) do + raise_on_missing_preload() + end + + defp restrict_hashtag_reject_any(query, %{tag_reject: tags_reject}) when is_list(tags_reject) do + if has_named_binding?(query, :thread_mute) do + from( + [activity, object, thread_mute] in query, + group_by: [activity.id, object.id, thread_mute.id] + ) + else + from( + [activity, object] in query, + group_by: [activity.id, object.id] + ) + end + |> join(:left, [_activity, object], hashtag in assoc(object, :hashtags), as: :hashtag) + |> having( + [hashtag: hashtag], + fragment("not(array_agg(?) && (?))", hashtag.name, ^tags_reject) + ) + end + + defp restrict_hashtag_reject_any(query, %{tag_reject: tag_reject}) when is_binary(tag_reject) do + restrict_hashtag_reject_any(query, %{tag_reject: [tag_reject]}) + end + + defp restrict_hashtag_reject_any(query, _), do: query + + defp restrict_hashtag_all(_query, %{tag_all: _tag, skip_preload: true}) do + raise_on_missing_preload() + end + + defp restrict_hashtag_all(query, %{tag_all: tags}) when is_list(tags) do + Enum.reduce( + tags, + query, + fn tag, acc -> restrict_hashtag_any(acc, %{tag: tag}) end + ) + end + + defp restrict_hashtag_all(query, %{tag_all: tag}) when is_binary(tag) do + restrict_hashtag_any(query, %{tag: tag}) + end + + defp restrict_hashtag_all(query, _), do: query + + defp restrict_hashtag_any(_query, %{tag: _tag, skip_preload: true}) do + raise_on_missing_preload() + end + + defp restrict_hashtag_any(query, %{tag: tags}) when is_list(tags) do + from( + [_activity, object] in query, + join: hashtag in assoc(object, :hashtags), + where: hashtag.name in ^tags + ) + end + + defp restrict_hashtag_any(query, %{tag: tag}) when is_binary(tag) do + restrict_hashtag_any(query, %{tag: [tag]}) + end + + defp restrict_hashtag_any(query, _), do: query + + defp raise_on_missing_preload do + raise "Can't use the child object without preloading!" + end + defp restrict_recipients(query, [], _user), do: query defp restrict_recipients(query, recipients, nil) do @@ -1088,40 +1162,51 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do skip_thread_containment: Config.get([:instance, :skip_thread_containment]) } - Activity - |> maybe_preload_objects(opts) - |> maybe_preload_bookmarks(opts) - |> maybe_preload_report_notes(opts) - |> maybe_set_thread_muted_field(opts) - |> maybe_order(opts) - |> restrict_recipients(recipients, opts[:user]) - |> restrict_replies(opts) - |> restrict_tag(opts) - |> restrict_tag_reject(opts) - |> restrict_tag_all(opts) - |> restrict_since(opts) - |> restrict_local(opts) - |> restrict_actor(opts) - |> restrict_type(opts) - |> restrict_state(opts) - |> restrict_favorited_by(opts) - |> restrict_blocked(restrict_blocked_opts) - |> restrict_muted(restrict_muted_opts) - |> restrict_filtered(opts) - |> restrict_media(opts) - |> restrict_visibility(opts) - |> restrict_thread_visibility(opts, config) - |> restrict_reblogs(opts) - |> restrict_pinned(opts) - |> restrict_muted_reblogs(restrict_muted_reblogs_opts) - |> restrict_instance(opts) - |> restrict_announce_object_actor(opts) - |> restrict_filtered(opts) - |> Activity.restrict_deactivated_users() - |> exclude_poll_votes(opts) - |> exclude_chat_messages(opts) - |> exclude_invisible_actors(opts) - |> exclude_visibility(opts) + query = + Activity + |> distinct([a], true) + |> maybe_preload_objects(opts) + |> maybe_preload_bookmarks(opts) + |> maybe_preload_report_notes(opts) + |> maybe_set_thread_muted_field(opts) + |> maybe_order(opts) + |> restrict_recipients(recipients, opts[:user]) + |> restrict_replies(opts) + |> restrict_since(opts) + |> restrict_local(opts) + |> restrict_actor(opts) + |> restrict_type(opts) + |> restrict_state(opts) + |> restrict_favorited_by(opts) + |> restrict_blocked(restrict_blocked_opts) + |> restrict_muted(restrict_muted_opts) + |> restrict_filtered(opts) + |> restrict_media(opts) + |> restrict_visibility(opts) + |> restrict_thread_visibility(opts, config) + |> restrict_reblogs(opts) + |> restrict_pinned(opts) + |> restrict_muted_reblogs(restrict_muted_reblogs_opts) + |> restrict_instance(opts) + |> restrict_announce_object_actor(opts) + |> restrict_filtered(opts) + |> Activity.restrict_deactivated_users() + |> exclude_poll_votes(opts) + |> exclude_chat_messages(opts) + |> exclude_invisible_actors(opts) + |> exclude_visibility(opts) + + if Config.get([:instance, :improved_hashtag_timeline]) do + query + |> restrict_hashtag_any(opts) + |> restrict_hashtag_all(opts) + |> restrict_hashtag_reject_any(opts) + else + query + |> restrict_tag(opts) + |> restrict_tag_reject(opts) + |> restrict_tag_all(opts) + end end def fetch_activities(recipients, opts \\ %{}, pagination \\ :keyset) do diff --git a/test/pleroma/web/activity_pub/activity_pub_test.exs b/test/pleroma/web/activity_pub/activity_pub_test.exs index bfec32042..573b26d66 100644 --- a/test/pleroma/web/activity_pub/activity_pub_test.exs +++ b/test/pleroma/web/activity_pub/activity_pub_test.exs @@ -199,33 +199,37 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubTest do {:ok, status_two} = CommonAPI.post(user, %{status: ". #essais"}) {:ok, status_three} = CommonAPI.post(user, %{status: ". #test #reject"}) - fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"}) + for new_timeline_enabled <- [true, false] do + clear_config([:instance, :improved_hashtag_timeline], new_timeline_enabled) - fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["test", "essais"]}) + fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"}) - fetch_three = - ActivityPub.fetch_activities([], %{ - type: "Create", - tag: ["test", "essais"], - tag_reject: ["reject"] - }) + fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["test", "essais"]}) - fetch_four = - ActivityPub.fetch_activities([], %{ - type: "Create", - tag: ["test"], - tag_all: ["test", "reject"] - }) + fetch_three = + ActivityPub.fetch_activities([], %{ + type: "Create", + tag: ["test", "essais"], + tag_reject: ["reject"] + }) - [fetch_one, fetch_two, fetch_three, fetch_four] = - Enum.map([fetch_one, fetch_two, fetch_three, fetch_four], fn statuses -> - Enum.map(statuses, fn s -> Repo.preload(s, object: :hashtags) end) - end) + fetch_four = + ActivityPub.fetch_activities([], %{ + type: "Create", + tag: ["test"], + tag_all: ["test", "reject"] + }) - assert fetch_one == [status_one, status_three] - assert fetch_two == [status_one, status_two, status_three] - assert fetch_three == [status_one, status_two] - assert fetch_four == [status_three] + [fetch_one, fetch_two, fetch_three, fetch_four] = + Enum.map([fetch_one, fetch_two, fetch_three, fetch_four], fn statuses -> + Enum.map(statuses, fn s -> Repo.preload(s, object: :hashtags) end) + end) + + assert fetch_one == [status_one, status_three] + assert fetch_two == [status_one, status_two, status_three] + assert fetch_three == [status_one, status_two] + assert fetch_four == [status_three] + end end describe "insertion" do