Browse Source

Make the indexing batch differently and more, show number indexed

merge-requests/3529/head
Ekaterina Vaartis 2 years ago
parent
commit
f70e7dffd1
1 changed files with 38 additions and 25 deletions
  1. +38
    -25
      lib/mix/tasks/pleroma/search/meilisearch.ex

+ 38
- 25
lib/mix/tasks/pleroma/search/meilisearch.ex View File

@@ -28,33 +28,46 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
])
)

Pleroma.Repo.chunk_stream(
from(Pleroma.Object,
# Only index public posts which are notes and have some text
where:
fragment("data->>'type' = 'Note'") and
fragment("LENGTH(data->>'source') > 0") and
fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public())
),
200,
:batches
)
|> Stream.map(fn objects ->
Enum.map(objects, fn object ->
data = object.data
%{id: object.id, source: data["source"], ap: data["id"]}
end)
end)
|> Stream.each(fn objects ->
{:ok, _} =
Pleroma.HTTP.post(
"#{endpoint}/indexes/objects/documents",
Jason.encode!(objects)
chunk_size = 100_000

Pleroma.Repo.transaction(
fn ->
Pleroma.Repo.stream(
from(Pleroma.Object,
# Only index public posts which are notes and have some text
where:
fragment("data->>'type' = 'Note'") and
fragment("LENGTH(data->>'source') > 0") and
fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()),
order_by: fragment("data->'published' DESC")
),
timeout: :infinity
)
|> Stream.chunk_every(chunk_size)
|> Stream.transform(0, fn objects, acc ->
new_acc = acc + Enum.count(objects)

IO.puts("Indexed #{Enum.count(objects)} entries")
end)
|> Stream.run()
IO.puts("Indexed #{new_acc} entries")

{[objects], new_acc}
end)
|> Stream.map(fn objects ->
Enum.map(objects, fn object ->
data = object.data
%{id: object.id, source: data["source"], ap: data["id"]}
end)
end)
|> Stream.each(fn objects ->
{:ok, _} =
Pleroma.HTTP.post(
"#{endpoint}/indexes/objects/documents",
Jason.encode!(objects)
)
end)
|> Stream.run()
end,
timeout: :infinity
)
end

def run(["clear"]) do


Loading…
Cancel
Save