added deleted_urls in AttachmentsCleanupWorker

This commit is contained in:
Maksim Pechnikov 2020-05-26 09:49:20 +03:00
parent 65d9692975
commit 04a26ab0a8
5 changed files with 66 additions and 57 deletions

View File

@ -23,8 +23,6 @@ defmodule Pleroma.Workers.AttachmentsCleanupWorker do
Enum.map(attachment["url"], & &1["href"])
end)
names = Enum.map(attachments, & &1["name"])
uploader = Pleroma.Config.get([Pleroma.Upload, :uploader])
prefix =
@ -40,67 +38,78 @@ defmodule Pleroma.Workers.AttachmentsCleanupWorker do
)
# find all objects for copies of the attachments, name and actor doesn't matter here
object_ids_and_hrefs =
from(o in Object,
where:
fragment(
"to_jsonb(array(select jsonb_array_elements((?)#>'{url}') ->> 'href' where jsonb_typeof((?)#>'{url}') = 'array'))::jsonb \\?| (?)",
o.data,
o.data,
^hrefs
)
)
# The query above can be time consumptive on large instances until we
# refactor how uploads are stored
|> Repo.all(timeout: :infinity)
# we should delete 1 object for any given attachment, but don't delete
# files if there are more than 1 object for it
|> Enum.reduce(%{}, fn %{
id: id,
data: %{
"url" => [%{"href" => href}],
"actor" => obj_actor,
"name" => name
}
},
acc ->
Map.update(acc, href, %{id: id, count: 1}, fn val ->
case obj_actor == actor and name in names do
true ->
# set id of the actor's object that will be deleted
%{val | id: id, count: val.count + 1}
false ->
# another actor's object, just increase count to not delete file
%{val | count: val.count + 1}
end
end)
end)
|> Enum.map(fn {href, %{id: id, count: count}} ->
# only delete files that have single instance
{object_ids, attachment_urls} =
hrefs
|> fetch_objects
|> prepare_objects(actor, Enum.map(attachments, & &1["name"]))
|> Enum.reduce({[], []}, fn {href, %{id: id, count: count}}, {ids, hrefs} ->
with 1 <- count do
href
|> String.trim_leading("#{base_url}/#{prefix}")
|> uploader.delete_file()
{id, href}
{ids ++ [id], hrefs ++ [href]}
else
_ -> {id, nil}
_ -> {ids ++ [id], hrefs}
end
end)
object_ids = Enum.map(object_ids_and_hrefs, fn {id, _} -> id end)
Pleroma.Web.MediaProxy.put_in_deleted_urls(attachment_urls)
from(o in Object, where: o.id in ^object_ids)
|> Repo.delete_all()
Enum.each(attachment_urls, fn href ->
href
|> String.trim_leading("#{base_url}/#{prefix}")
|> uploader.delete_file()
end)
object_ids_and_hrefs
|> Enum.filter(fn {_, href} -> not is_nil(href) end)
|> Enum.map(&elem(&1, 1))
|> Pleroma.Web.MediaProxy.Invalidation.purge()
Repo.delete_all(from(o in Object, where: o.id in ^object_ids))
cache_purge(attachment_urls)
{:ok, :success}
end
def perform(%{"op" => "cleanup_attachments", "object" => _object}, _job), do: {:ok, :skip}
defp cache_purge(attachment_urls) do
Pleroma.Web.MediaProxy.Invalidation.purge(attachment_urls)
end
# we should delete 1 object for any given attachment, but don't delete
# files if there are more than 1 object for it
def prepare_objects(objects, actor, names) do
objects
|> Enum.reduce(%{}, fn %{
id: id,
data: %{
"url" => [%{"href" => href}],
"actor" => obj_actor,
"name" => name
}
},
acc ->
Map.update(acc, href, %{id: id, count: 1}, fn val ->
case obj_actor == actor and name in names do
true ->
# set id of the actor's object that will be deleted
%{val | id: id, count: val.count + 1}
false ->
# another actor's object, just increase count to not delete file
%{val | count: val.count + 1}
end
end)
end)
end
defp fetch_objects(hrefs) do
from(o in Object,
where:
fragment(
"to_jsonb(array(select jsonb_array_elements((?)#>'{url}') ->> 'href' where jsonb_typeof((?)#>'{url}') = 'array'))::jsonb \\?| (?)",
o.data,
o.data,
^hrefs
)
)
# The query above can be time consumptive on large instances until we
# refactor how uploads are stored
|> Repo.all(timeout: :infinity)
end
end

View File

@ -12,7 +12,7 @@ defmodule Pleroma.Web.MediaProxy.InvalidationTest do
setup do: clear_config([:media_proxy])
setup do
on_exit(fn -> Cachex.purge(:deleted_urls_cache) end)
on_exit(fn -> Cachex.clear(:deleted_urls_cache) end)
:ok
end

View File

@ -6,7 +6,7 @@ defmodule Pleroma.Web.MediaProxy.Invalidation.HttpTest do
import Tesla.Mock
setup do
on_exit(fn -> Cachex.purge(:deleted_urls_cache) end)
on_exit(fn -> Cachex.clear(:deleted_urls_cache) end)
:ok
end

View File

@ -5,7 +5,7 @@ defmodule Pleroma.Web.MediaProxy.Invalidation.ScriptTest do
import ExUnit.CaptureLog
setup do
on_exit(fn -> Cachex.purge(:deleted_urls_cache) end)
on_exit(fn -> Cachex.clear(:deleted_urls_cache) end)
:ok
end

View File

@ -11,7 +11,7 @@ defmodule Pleroma.Web.MediaProxy.MediaProxyControllerTest do
setup do: clear_config([Pleroma.Web.Endpoint, :secret_key_base])
setup do
on_exit(fn -> Cachex.purge(:deleted_urls_cache) end)
on_exit(fn -> Cachex.clear(:deleted_urls_cache) end)
:ok
end