Different search backends, in particular meilisearch as an additional one See merge request pleroma/pleroma!3529merge-requests/3529/merge
@@ -847,9 +847,14 @@ config :pleroma, Pleroma.User.Backup, | |||
config :pleroma, ConcurrentLimiter, [ | |||
{Pleroma.Web.RichMedia.Helpers, [max_running: 5, max_waiting: 5]}, | |||
{Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]} | |||
{Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]}, | |||
{Pleroma.Search, [max_running: 30, max_waiting: 50]} | |||
] | |||
config :pleroma, Pleroma.Search, module: Pleroma.Activity.Search | |||
config :pleroma, Pleroma.Search.Meilisearch, url: "http://127.0.0.1:7700/", private_key: nil | |||
# Import environment specific config. This must remain at the bottom | |||
# of this file so it overrides the configuration defined above. | |||
import_config "#{Mix.env()}.exs" |
@@ -133,6 +133,10 @@ config :pleroma, :side_effects, | |||
ap_streamer: Pleroma.Web.ActivityPub.ActivityPubMock, | |||
logger: Pleroma.LoggerMock | |||
config :pleroma, Pleroma.Search, module: Pleroma.Activity | |||
config :pleroma, Pleroma.Search.Meilisearch, url: "http://127.0.0.1:7700/", private_key: nil | |||
# Reduce recompilation time | |||
# https://dashbit.co/blog/speeding-up-re-compilation-of-elixir-projects | |||
config :phoenix, :plug_init_mode, :runtime | |||
@@ -0,0 +1,108 @@ | |||
# Configuring search | |||
{! backend/administration/CLI_tasks/general_cli_task_info.include !} | |||
## Built-in search | |||
To use built-in search that has no external dependencies, set the search module to `Pleroma.Activity`: | |||
> config :pleroma, Pleroma.Search, module: Pleroma.Activity | |||
While it has no external dependencies, it has problems with performance and relevancy. | |||
## Meilisearch | |||
Note that it's quite a bit more memory hungry than PostgreSQL (around 4-5G for ~1.2 million | |||
posts while idle and up to 7G while indexing initially). The disk usage for this additional index is also | |||
around 4 gigabytes. Like [RUM](./cheatsheet.md#rum-indexing-for-full-text-search) indexes, it offers considerably | |||
higher performance and ordering by timestamp in a reasonable amount of time. | |||
Additionally, the search results seem to be more accurate. | |||
Due to high memory usage, it may be best to set it up on a different machine, if running pleroma on a low-resource | |||
computer, and use private key authentication to secure the remote search instance. | |||
To use [meilisearch](https://www.meilisearch.com/), set the search module to `Pleroma.Search.Meilisearch`: | |||
> config :pleroma, Pleroma.Search, module: Pleroma.Search.Meilisearch | |||
You then need to set the address of the meilisearch instance, and optionally the private key for authentication. | |||
> config :pleroma, Pleroma.Search.Meilisearch, | |||
> url: "http://127.0.0.1:7700/", | |||
> private_key: "private key" | |||
Information about setting up meilisearch can be found in the | |||
[official documentation](https://docs.meilisearch.com/learn/getting_started/installation.html). | |||
You probably want to start it with `MEILI_NO_ANALYTICS=true` and `MEILI_NO_CENTRY=true` environment variables, | |||
to disable analytics. | |||
### Private key authentication (optional) | |||
To set the private key, use the `MEILI_MASTER_KEY` environment variable when starting. After setting the _master key_, | |||
you have to get the _private key_, which is actually used for authentication. | |||
=== "OTP" | |||
```sh | |||
./bin/pleroma_ctl search.meilisearch show-private-key <your master key here> | |||
``` | |||
=== "From Source" | |||
```sh | |||
mix pleroma.search.meilisearch show-private-key <your master key here> | |||
``` | |||
This is the key you actually put into your configuration file. | |||
### Initial indexing | |||
After setting up the configuration, you'll want to index all of your already existsing posts. Only public posts are indexed. You'll only | |||
have to do it one time, but it might take a while, depending on the amount of posts your instance has seen. This is also a fairly RAM | |||
consuming process for `meilisearch`, and it will take a lot of RAM when running if you have a lot of posts (seems to be around 5G for ~1.2 | |||
million posts while idle and up to 7G while indexing initially, but your experience may be different). | |||
To start te initial indexing, run the `index` command: | |||
=== "OTP" | |||
```sh | |||
./bin/pleroma_ctl search.meilisearch index | |||
``` | |||
=== "From Source" | |||
```sh | |||
mix pleroma.search.meilisearch index | |||
``` | |||
This will show you the total amount of posts to index, and then show you the amount of posts indexed currently, until the numbers eventually | |||
become the same. The posts are indexed in big batches and meilisearch will take some time to actually index them, even after you have | |||
inserted all the posts into it. Depending on the amount of posts, this may be as long as several hours. To get information about the status | |||
of indexing and how many posts have actually been indexed, use the `stats` command: | |||
=== "OTP" | |||
```sh | |||
./bin/pleroma_ctl search.meilisearch stats | |||
``` | |||
=== "From Source" | |||
```sh | |||
mix pleroma.search.meilisearch stats | |||
``` | |||
### Clearing the index | |||
In case you need to clear the index (for example, to re-index from scratch, if that needs to happen for some reason), you can | |||
use the `clear` command: | |||
=== "OTP" | |||
```sh | |||
./bin/pleroma_ctl search.meilisearch clear | |||
``` | |||
=== "From Source" | |||
```sh | |||
mix pleroma.search.meilisearch clear | |||
``` | |||
This will clear **all** the posts from the search index. Note, that deleted posts are also removed from index by the instance itself, so | |||
there is no need to actually clear the whole index, unless you want **all** of it gone. That said, the index does not hold any information | |||
that cannot be re-created from the database, it should also generally be a lot smaller than the size of your database. Still, the size | |||
depends on the amount of text in posts. |
@@ -0,0 +1,122 @@ | |||
# Pleroma: A lightweight social networking server | |||
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> | |||
# SPDX-License-Identifier: AGPL-3.0-only | |||
defmodule Mix.Tasks.Pleroma.Search.Meilisearch do | |||
require Logger | |||
require Pleroma.Constants | |||
import Mix.Pleroma | |||
import Ecto.Query | |||
import Pleroma.Search.Meilisearch, only: [meili_post!: 2, meili_delete!: 1, meili_get!: 1] | |||
def run(["index"]) do | |||
start_pleroma() | |||
meili_post!( | |||
"/indexes/objects/settings/ranking-rules", | |||
[ | |||
"desc(published)", | |||
"words", | |||
"exactness", | |||
"proximity", | |||
"wordsPosition", | |||
"typo", | |||
"attribute" | |||
] | |||
) | |||
meili_post!( | |||
"/indexes/objects/settings/searchable-attributes", | |||
[ | |||
"content" | |||
] | |||
) | |||
chunk_size = 10_000 | |||
Pleroma.Repo.transaction( | |||
fn -> | |||
query = | |||
from(Pleroma.Object, | |||
# Only index public posts which are notes and have some text | |||
where: | |||
fragment("data->>'type' = 'Note'") and | |||
fragment("LENGTH(data->>'content') > 0") and | |||
fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()), | |||
order_by: [desc: fragment("data->'published'")] | |||
) | |||
count = query |> Pleroma.Repo.aggregate(:count, :data) | |||
IO.puts("Entries to index: #{count}") | |||
Pleroma.Repo.stream( | |||
query, | |||
timeout: :infinity | |||
) | |||
|> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1) | |||
|> Stream.filter(fn o -> not is_nil(o) end) | |||
|> Stream.chunk_every(chunk_size) | |||
|> Stream.transform(0, fn objects, acc -> | |||
new_acc = acc + Enum.count(objects) | |||
# Reset to the beginning of the line and rewrite it | |||
IO.write("\r") | |||
IO.write("Indexed #{new_acc} entries") | |||
{[objects], new_acc} | |||
end) | |||
|> Stream.each(fn objects -> | |||
result = | |||
meili_post!( | |||
"/indexes/objects/documents", | |||
objects | |||
) | |||
if not Map.has_key?(result, "updateId") do | |||
IO.puts("Failed to index: #{inspect(result)}") | |||
end | |||
end) | |||
|> Stream.run() | |||
end, | |||
timeout: :infinity | |||
) | |||
IO.write("\n") | |||
end | |||
def run(["clear"]) do | |||
start_pleroma() | |||
meili_delete!("/indexes/objects/documents") | |||
end | |||
def run(["show-private-key", master_key]) do | |||
start_pleroma() | |||
endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) | |||
{:ok, result} = | |||
Pleroma.HTTP.get( | |||
Path.join(endpoint, "/keys"), | |||
[{"X-Meili-API-Key", master_key}] | |||
) | |||
decoded = Jason.decode!(result.body) | |||
if decoded["private"] do | |||
IO.puts(decoded["private"]) | |||
else | |||
IO.puts("Error fetching the key, check the master key is correct: #{inspect(decoded)}") | |||
end | |||
end | |||
def run(["stats"]) do | |||
start_pleroma() | |||
result = meili_get!("/indexes/objects/stats") | |||
IO.puts("Number of entries: #{result["numberOfDocuments"]}") | |||
IO.puts("Indexing? #{result["isIndexing"]}") | |||
end | |||
end |
@@ -45,6 +45,9 @@ defmodule Pleroma.Activity.Search do | |||
end | |||
end | |||
def add_to_index(_activity), do: nil | |||
def remove_from_index(_object), do: nil | |||
def maybe_restrict_author(query, %User{} = author) do | |||
Activity.Queries.by_author(query, author) | |||
end | |||
@@ -57,7 +60,7 @@ defmodule Pleroma.Activity.Search do | |||
def maybe_restrict_blocked(query, _), do: query | |||
defp restrict_public(q) do | |||
def restrict_public(q) do | |||
from([a, o] in q, | |||
where: fragment("?->>'type' = 'Create'", a.data), | |||
where: ^Pleroma.Constants.as_public() in a.recipients | |||
@@ -124,7 +127,7 @@ defmodule Pleroma.Activity.Search do | |||
) | |||
end | |||
defp maybe_restrict_local(q, user) do | |||
def maybe_restrict_local(q, user) do | |||
limit = Pleroma.Config.get([:instance, :limit_to_local_content], :unauthenticated) | |||
case {limit, user} do | |||
@@ -137,7 +140,7 @@ defmodule Pleroma.Activity.Search do | |||
defp restrict_local(q), do: where(q, local: true) | |||
defp maybe_fetch(activities, user, search_query) do | |||
def maybe_fetch(activities, user, search_query) do | |||
with true <- Regex.match?(~r/https?:/, search_query), | |||
{:ok, object} <- Fetcher.fetch_object_from_id(search_query), | |||
%Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]), | |||
@@ -303,7 +303,11 @@ defmodule Pleroma.Application do | |||
def limiters_setup do | |||
config = Config.get(ConcurrentLimiter, []) | |||
[Pleroma.Web.RichMedia.Helpers, Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy] | |||
[ | |||
Pleroma.Web.RichMedia.Helpers, | |||
Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, | |||
Pleroma.Search | |||
] | |||
|> Enum.each(fn module -> | |||
mod_config = Keyword.get(config, module, []) | |||
@@ -0,0 +1,134 @@ | |||
defmodule Pleroma.Search.Meilisearch do | |||
require Logger | |||
require Pleroma.Constants | |||
alias Pleroma.Activity | |||
import Pleroma.Activity.Search | |||
import Ecto.Query | |||
defp meili_headers() do | |||
private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key]) | |||
[{"Content-Type", "application/json"}] ++ | |||
if is_nil(private_key), do: [], else: [{"X-Meili-API-Key", private_key}] | |||
end | |||
def meili_get!(path) do | |||
endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) | |||
{:ok, result} = | |||
Pleroma.HTTP.get( | |||
Path.join(endpoint, path), | |||
meili_headers() | |||
) | |||
Jason.decode!(result.body) | |||
end | |||
def meili_post!(path, params) do | |||
endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) | |||
{:ok, result} = | |||
Pleroma.HTTP.post( | |||
Path.join(endpoint, path), | |||
Jason.encode!(params), | |||
meili_headers() | |||
) | |||
Jason.decode!(result.body) | |||
end | |||
def meili_delete!(path) do | |||
endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) | |||
{:ok, _} = | |||
Pleroma.HTTP.request( | |||
:delete, | |||
Path.join(endpoint, path), | |||
"", | |||
meili_headers(), | |||
[] | |||
) | |||
end | |||
def search(user, query, options \\ []) do | |||
limit = Enum.min([Keyword.get(options, :limit), 40]) | |||
offset = Keyword.get(options, :offset, 0) | |||
author = Keyword.get(options, :author) | |||
result = | |||
meili_post!( | |||
"/indexes/objects/search", | |||
%{q: query, offset: offset, limit: limit} | |||
) | |||
hits = result["hits"] |> Enum.map(& &1["ap"]) | |||
try do | |||
hits | |||
|> Activity.create_by_object_ap_id() | |||
|> Activity.with_preloaded_object() | |||
|> Activity.with_preloaded_object() | |||
|> Activity.restrict_deactivated_users() | |||
|> maybe_restrict_local(user) | |||
|> maybe_restrict_author(author) | |||
|> maybe_restrict_blocked(user) | |||
|> maybe_fetch(user, query) | |||
|> order_by([object: obj], desc: obj.data["published"]) | |||
|> Pleroma.Repo.all() | |||
rescue | |||
_ -> maybe_fetch([], user, query) | |||
end | |||
end | |||
def object_to_search_data(object) do | |||
if not is_nil(object) and object.data["type"] == "Note" and | |||
Pleroma.Constants.as_public() in object.data["to"] do | |||
data = object.data | |||
content_str = | |||
case data["content"] do | |||
[nil | rest] -> to_string(rest) | |||
str -> str | |||
end | |||
content = | |||
with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str), | |||
trimmed <- String.trim(scrubbed) do | |||
trimmed | |||
end | |||
if String.length(content) > 1 do | |||
{:ok, published, _} = DateTime.from_iso8601(data["published"]) | |||
%{ | |||
id: object.id, | |||
content: content, | |||
ap: data["id"], | |||
published: published |> DateTime.to_unix() | |||
} | |||
end | |||
end | |||
end | |||
def add_to_index(activity) do | |||
maybe_search_data = object_to_search_data(activity.object) | |||
if activity.data["type"] == "Create" and maybe_search_data do | |||
result = | |||
meili_post!( | |||
"/indexes/objects/documents", | |||
[maybe_search_data] | |||
) | |||
if not Map.has_key?(result, "updateId") do | |||
Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}") | |||
end | |||
end | |||
end | |||
def remove_from_index(object) do | |||
meili_delete!("/indexes/objects/documents/#{object.id}") | |||
end | |||
end |
@@ -0,0 +1,18 @@ | |||
defmodule Pleroma.Search do | |||
def add_to_index(activity) do | |||
search_module = Pleroma.Config.get([Pleroma.Search, :module]) | |||
ConcurrentLimiter.limit(Pleroma.Search, fn -> | |||
Task.start(fn -> search_module.add_to_index(activity) end) | |||
end) | |||
end | |||
def remove_from_index(object) do | |||
# Also delete from search index | |||
search_module = Pleroma.Config.get([Pleroma.Search, :module]) | |||
ConcurrentLimiter.limit(Pleroma.Search, fn -> | |||
Task.start(fn -> search_module.remove_from_index(object) end) | |||
end) | |||
end | |||
end |
@@ -136,6 +136,9 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do | |||
Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) | |||
end) | |||
# Add local posts to search index | |||
if local, do: Pleroma.Search.add_to_index(activity) | |||
{:ok, activity} | |||
else | |||
%Activity{} = activity -> | |||
@@ -193,6 +193,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do | |||
# - Increase replies count | |||
# - Set up ActivityExpiration | |||
# - Set up notifications | |||
# - Index incoming posts for search (if needed) | |||
@impl true | |||
def handle(%{data: %{"type" => "Create"}} = activity, meta) do | |||
with {:ok, object, meta} <- handle_object_creation(meta[:object_data], activity, meta), | |||
@@ -221,6 +222,8 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do | |||
Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) | |||
end) | |||
Pleroma.Search.add_to_index(Map.put(activity, :object, object)) | |||
meta = | |||
meta | |||
|> add_notifications(notifications) | |||
@@ -281,6 +284,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do | |||
# - Reduce the user note count | |||
# - Reduce the reply count | |||
# - Stream out the activity | |||
# - Removes posts from search index (if needed) | |||
@impl true | |||
def handle(%{data: %{"type" => "Delete", "object" => deleted_object}} = object, meta) do | |||
deleted_object = | |||
@@ -320,6 +324,9 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do | |||
if result == :ok do | |||
Notification.create_notifications(object) | |||
Pleroma.Search.remove_from_index(deleted_object) | |||
{:ok, object, meta} | |||
else | |||
{:error, result} | |||
@@ -5,7 +5,6 @@ | |||
defmodule Pleroma.Web.MastodonAPI.SearchController do | |||
use Pleroma.Web, :controller | |||
alias Pleroma.Activity | |||
alias Pleroma.Repo | |||
alias Pleroma.User | |||
alias Pleroma.Web.ControllerHelper | |||
@@ -98,7 +97,9 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do | |||
end | |||
defp resource_search(_, "statuses", query, options) do | |||
statuses = with_fallback(fn -> Activity.search(options[:for_user], query, options) end) | |||
search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) | |||
statuses = with_fallback(fn -> search_module.search(options[:for_user], query, options) end) | |||
StatusView.render("index.json", | |||
activities: statuses, | |||
@@ -0,0 +1,108 @@ | |||
# Pleroma: A lightweight social networking server | |||
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> | |||
# SPDX-License-Identifier: AGPL-3.0-only | |||
defmodule Pleroma.Search.MeilisearchTest do | |||
require Pleroma.Constants | |||
use Pleroma.DataCase | |||
import Pleroma.Factory | |||
import Tesla.Mock | |||
import Mock | |||
alias Pleroma.Web.CommonAPI | |||
alias Pleroma.Search.Meilisearch | |||
setup_all do | |||
Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) | |||
:ok | |||
end | |||
describe "meilisearch" do | |||
setup do: clear_config([Pleroma.Search, :module], Meilisearch) | |||
setup_with_mocks( | |||
[ | |||
{Meilisearch, [:passthrough], | |||
[ | |||
add_to_index: fn a -> passthrough([a]) end, | |||
remove_from_index: fn a -> passthrough([a]) end | |||
]} | |||
], | |||
context, | |||
do: {:ok, context} | |||
) | |||
test "indexes a local post on creation" do | |||
user = insert(:user) | |||
mock_global(fn | |||
%{method: :post, url: "http://127.0.0.1:7700/indexes/objects/documents", body: body} -> | |||
assert match?( | |||
[%{"content" => "guys i just don't wanna leave the swamp"}], | |||
Jason.decode!(body) | |||
) | |||
json(%{updateId: 1}) | |||
end) | |||
{:ok, activity} = | |||
CommonAPI.post(user, %{ | |||
status: "guys i just don't wanna leave the swamp", | |||
visibility: "public" | |||
}) | |||
assert_called(Meilisearch.add_to_index(activity)) | |||
end | |||
test "doesn't index posts that are not public" do | |||
user = insert(:user) | |||
Enum.each(["unlisted", "private", "direct"], fn visiblity -> | |||
{:ok, _} = | |||
CommonAPI.post(user, %{ | |||
status: "guys i just don't wanna leave the swamp", | |||
visibility: visiblity | |||
}) | |||
end) | |||
history = call_history(Meilisearch) | |||
assert Enum.count(history) == 3 | |||
Enum.each(history, fn {_, _, return} -> | |||
assert is_nil(return) | |||
end) | |||
end | |||
test "deletes posts from index when deleted locally" do | |||
user = insert(:user) | |||
mock_global(fn | |||
%{method: :post, url: "http://127.0.0.1:7700/indexes/objects/documents", body: body} -> | |||
assert match?( | |||
[%{"content" => "guys i just don't wanna leave the swamp"}], | |||
Jason.decode!(body) | |||
) | |||
json(%{updateId: 1}) | |||
%{method: :delete, url: "http://127.0.0.1:7700/indexes/objects/documents/" <> id} -> | |||
assert String.length(id) > 1 | |||
json(%{updateId: 2}) | |||
end) | |||
{:ok, activity} = | |||
CommonAPI.post(user, %{ | |||
status: "guys i just don't wanna leave the swamp", | |||
visibility: "public" | |||
}) | |||
assert_called(Meilisearch.add_to_index(activity)) | |||
{:ok, _} = CommonAPI.delete(activity.id, user) | |||
assert_called(Meilisearch.remove_from_index(:_)) | |||
end | |||
end | |||
end |