diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6f268d110..a9906a05f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,6 +50,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Configuration: Filter.AnonymizeFilename added ability to retain file extension with custom text
- Admin API: changed json structure for saving config settings.
- RichMedia: parsers and their order are configured in `rich_media` config.
+- RichMedia: add the rich media ttl based on image expiration time.
## [1.0.1] - 2019-07-14
### Security
diff --git a/config/config.exs b/config/config.exs
index bda92e45d..569411866 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -345,7 +345,8 @@ config :pleroma, :rich_media,
Pleroma.Web.RichMedia.Parsers.TwitterCard,
Pleroma.Web.RichMedia.Parsers.OGP,
Pleroma.Web.RichMedia.Parsers.OEmbed
- ]
+ ],
+ ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]
config :pleroma, :media_proxy,
enabled: false,
diff --git a/docs/config/howto_set_richmedia_cache_ttl_based_on_image.md b/docs/config/howto_set_richmedia_cache_ttl_based_on_image.md
new file mode 100644
index 000000000..bfee5a9e6
--- /dev/null
+++ b/docs/config/howto_set_richmedia_cache_ttl_based_on_image.md
@@ -0,0 +1,33 @@
+# How to set rich media cache ttl based on image ttl
+## Explanation
+
+Richmedia are cached without the ttl but the rich media may have image which can expire, like aws signed url.
+In such cases the old image url (expired) is returned from the media cache.
+
+So to avoid such situation we can define a module that will set ttl based on image.
+The module must adopt behaviour `Pleroma.Web.RichMedia.Parser.TTL`
+
+### Example
+
+```exs
+defmodule MyModule do
+ @behaviour Pleroma.Web.RichMedia.Parser.TTL
+
+ @impl Pleroma.Web.RichMedia.Parser.TTL
+ def ttl(data, url) do
+ image_url = Map.get(data, :image)
+ # do some parsing in the url and get the ttl of the image
+ # return ttl is unix time
+ parse_ttl_from_url(image_url)
+ end
+end
+```
+
+And update the config
+
+```exs
+config :pleroma, :rich_media,
+ ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl, MyModule]
+```
+
+> For reference there is a parser for AWS signed URL `Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl`, it's enabled by default.
diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex
index 0d2523338..b69b2be61 100644
--- a/lib/pleroma/web/rich_media/parser.ex
+++ b/lib/pleroma/web/rich_media/parser.ex
@@ -24,6 +24,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
Cachex.fetch!(:rich_media_cache, url, fn _ ->
{:commit, parse_url(url)}
end)
+ |> set_ttl_based_on_image(url)
rescue
e ->
{:error, "Cachex error: #{inspect(e)}"}
@@ -31,6 +32,50 @@ defmodule Pleroma.Web.RichMedia.Parser do
end
end
+ @doc """
+ Set the rich media cache based on the expiration time of image.
+
+ Adopt behaviour `Pleroma.Web.RichMedia.Parser.TTL`
+
+ ## Example
+
+ defmodule MyModule do
+ @behaviour Pleroma.Web.RichMedia.Parser.TTL
+ def ttl(data, url) do
+ image_url = Map.get(data, :image)
+ # do some parsing in the url and get the ttl of the image
+ # and return ttl is unix time
+ parse_ttl_from_url(image_url)
+ end
+ end
+
+ Define the module in the config
+
+ config :pleroma, :rich_media,
+ ttl_setters: [MyModule]
+ """
+ def set_ttl_based_on_image({:ok, data}, url) do
+ with {:ok, nil} <- Cachex.ttl(:rich_media_cache, url) do
+ ttl = get_ttl_from_image(data, url)
+ Cachex.expire_at(:rich_media_cache, url, ttl * 1000)
+ {:ok, data}
+ else
+ _ ->
+ {:ok, data}
+ end
+ end
+
+ defp get_ttl_from_image(data, url) do
+ Pleroma.Config.get([:rich_media, :ttl_setters])
+ |> Enum.reduce({:ok, nil}, fn
+ module, {:ok, _ttl} ->
+ module.ttl(data, url)
+
+ _, error ->
+ error
+ end)
+ end
+
defp parse_url(url) do
try do
{:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [], adapter: @hackney_options)
diff --git a/lib/pleroma/web/rich_media/parsers/ttl/aws_signed_url.ex b/lib/pleroma/web/rich_media/parsers/ttl/aws_signed_url.ex
new file mode 100644
index 000000000..014c0935f
--- /dev/null
+++ b/lib/pleroma/web/rich_media/parsers/ttl/aws_signed_url.ex
@@ -0,0 +1,52 @@
+defmodule Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl do
+ @behaviour Pleroma.Web.RichMedia.Parser.TTL
+
+ @impl Pleroma.Web.RichMedia.Parser.TTL
+ def ttl(data, _url) do
+ image = Map.get(data, :image)
+
+ if is_aws_signed_url(image) do
+ image
+ |> parse_query_params()
+ |> format_query_params()
+ |> get_expiration_timestamp()
+ end
+ end
+
+ defp is_aws_signed_url(""), do: nil
+ defp is_aws_signed_url(nil), do: nil
+
+ defp is_aws_signed_url(image) when is_binary(image) do
+ %URI{host: host, query: query} = URI.parse(image)
+
+ if String.contains?(host, "amazonaws.com") and
+ String.contains?(query, "X-Amz-Expires") do
+ image
+ else
+ nil
+ end
+ end
+
+ defp is_aws_signed_url(_), do: nil
+
+ defp parse_query_params(image) do
+ %URI{query: query} = URI.parse(image)
+ query
+ end
+
+ defp format_query_params(query) do
+ query
+ |> String.split(~r/&|=/)
+ |> Enum.chunk_every(2)
+ |> Map.new(fn [k, v] -> {k, v} end)
+ end
+
+ defp get_expiration_timestamp(params) when is_map(params) do
+ {:ok, date} =
+ params
+ |> Map.get("X-Amz-Date")
+ |> Timex.parse("{ISO:Basic:Z}")
+
+ Timex.to_unix(date) + String.to_integer(Map.get(params, "X-Amz-Expires"))
+ end
+end
diff --git a/lib/pleroma/web/rich_media/parsers/ttl/ttl.ex b/lib/pleroma/web/rich_media/parsers/ttl/ttl.ex
new file mode 100644
index 000000000..6b3ec6d30
--- /dev/null
+++ b/lib/pleroma/web/rich_media/parsers/ttl/ttl.ex
@@ -0,0 +1,3 @@
+defmodule Pleroma.Web.RichMedia.Parser.TTL do
+ @callback ttl(Map.t(), String.t()) :: {:ok, Integer.t()} | {:error, String.t()}
+end
diff --git a/test/fixtures/rich_media/amz.html b/test/fixtures/rich_media/amz.html
new file mode 100644
index 000000000..d4f8bd1a3
--- /dev/null
+++ b/test/fixtures/rich_media/amz.html
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/test/web/rich_media/aws_signed_url_test.exs b/test/web/rich_media/aws_signed_url_test.exs
new file mode 100644
index 000000000..122787bc2
--- /dev/null
+++ b/test/web/rich_media/aws_signed_url_test.exs
@@ -0,0 +1,81 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2019 Pleroma Authors
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Web.RichMedia.TTL.AwsSignedUrlTest do
+ use ExUnit.Case, async: true
+
+ test "s3 signed url is parsed correct for expiration time" do
+ url = "https://pleroma.social/amz"
+
+ {:ok, timestamp} =
+ Timex.now()
+ |> DateTime.truncate(:second)
+ |> Timex.format("{ISO:Basic:Z}")
+
+ # in seconds
+ valid_till = 30
+
+ metadata = construct_metadata(timestamp, valid_till, url)
+
+ expire_time =
+ Timex.parse!(timestamp, "{ISO:Basic:Z}") |> Timex.to_unix() |> Kernel.+(valid_till)
+
+ assert expire_time == Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl.ttl(metadata, url)
+ end
+
+ test "s3 signed url is parsed and correct ttl is set for rich media" do
+ url = "https://pleroma.social/amz"
+
+ {:ok, timestamp} =
+ Timex.now()
+ |> DateTime.truncate(:second)
+ |> Timex.format("{ISO:Basic:Z}")
+
+ # in seconds
+ valid_till = 30
+
+ metadata = construct_metadata(timestamp, valid_till, url)
+
+ body = """
+
+
+
+
+
+ """
+
+ Tesla.Mock.mock(fn
+ %{
+ method: :get,
+ url: "https://pleroma.social/amz"
+ } ->
+ %Tesla.Env{status: 200, body: body}
+ end)
+
+ Cachex.put(:rich_media_cache, url, metadata)
+
+ Pleroma.Web.RichMedia.Parser.set_ttl_based_on_image({:ok, metadata}, url)
+
+ {:ok, cache_ttl} = Cachex.ttl(:rich_media_cache, url)
+
+ # as there is delay in setting and pulling the data from cache we ignore 1 second
+ assert_in_delta(valid_till * 1000, cache_ttl, 1000)
+ end
+
+ defp construct_s3_url(timestamp, valid_till) do
+ "https://pleroma.s3.ap-southeast-1.amazonaws.com/sachin%20%281%29%20_a%20-%25%2Aasdasd%20BNN%20bnnn%20.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIBLWWK6RGDQXDLJQ%2F20190716%2Fap-southeast-1%2Fs3%2Faws4_request&X-Amz-Date=#{
+ timestamp
+ }&X-Amz-Expires=#{valid_till}&X-Amz-Signature=04ffd6b98634f4b1bbabc62e0fac4879093cd54a6eed24fe8eb38e8369526bbf&X-Amz-SignedHeaders=host"
+ end
+
+ defp construct_metadata(timestamp, valid_till, url) do
+ %{
+ image: construct_s3_url(timestamp, valid_till),
+ site: "Pleroma",
+ title: "Pleroma",
+ description: "Pleroma",
+ url: url
+ }
+ end
+end