Browse Source

Merge branch 'feature/richmedia-ttl' into 'develop'

add the rich media ttl based on image exp time

See merge request pleroma/pleroma!1438
tags/v1.1.4
kaniini 5 years ago
parent
commit
33729bbb28
8 changed files with 222 additions and 1 deletions
  1. +1
    -0
      CHANGELOG.md
  2. +2
    -1
      config/config.exs
  3. +33
    -0
      docs/config/howto_set_richmedia_cache_ttl_based_on_image.md
  4. +45
    -0
      lib/pleroma/web/rich_media/parser.ex
  5. +52
    -0
      lib/pleroma/web/rich_media/parsers/ttl/aws_signed_url.ex
  6. +3
    -0
      lib/pleroma/web/rich_media/parsers/ttl/ttl.ex
  7. +5
    -0
      test/fixtures/rich_media/amz.html
  8. +81
    -0
      test/web/rich_media/aws_signed_url_test.exs

+ 1
- 0
CHANGELOG.md View File

@@ -50,6 +50,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Configuration: Filter.AnonymizeFilename added ability to retain file extension with custom text
- Admin API: changed json structure for saving config settings.
- RichMedia: parsers and their order are configured in `rich_media` config.
- RichMedia: add the rich media ttl based on image expiration time.

## [1.0.1] - 2019-07-14
### Security


+ 2
- 1
config/config.exs View File

@@ -345,7 +345,8 @@ config :pleroma, :rich_media,
Pleroma.Web.RichMedia.Parsers.TwitterCard,
Pleroma.Web.RichMedia.Parsers.OGP,
Pleroma.Web.RichMedia.Parsers.OEmbed
]
],
ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]

config :pleroma, :media_proxy,
enabled: false,


+ 33
- 0
docs/config/howto_set_richmedia_cache_ttl_based_on_image.md View File

@@ -0,0 +1,33 @@
# How to set rich media cache ttl based on image ttl
## Explanation

Richmedia are cached without the ttl but the rich media may have image which can expire, like aws signed url.
In such cases the old image url (expired) is returned from the media cache.

So to avoid such situation we can define a module that will set ttl based on image.
The module must adopt behaviour `Pleroma.Web.RichMedia.Parser.TTL`

### Example

```exs
defmodule MyModule do
@behaviour Pleroma.Web.RichMedia.Parser.TTL

@impl Pleroma.Web.RichMedia.Parser.TTL
def ttl(data, url) do
image_url = Map.get(data, :image)
# do some parsing in the url and get the ttl of the image
# return ttl is unix time
parse_ttl_from_url(image_url)
end
end
```

And update the config

```exs
config :pleroma, :rich_media,
ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl, MyModule]
```

> For reference there is a parser for AWS signed URL `Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl`, it's enabled by default.

+ 45
- 0
lib/pleroma/web/rich_media/parser.ex View File

@@ -24,6 +24,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
Cachex.fetch!(:rich_media_cache, url, fn _ ->
{:commit, parse_url(url)}
end)
|> set_ttl_based_on_image(url)
rescue
e ->
{:error, "Cachex error: #{inspect(e)}"}
@@ -31,6 +32,50 @@ defmodule Pleroma.Web.RichMedia.Parser do
end
end

@doc """
Set the rich media cache based on the expiration time of image.

Adopt behaviour `Pleroma.Web.RichMedia.Parser.TTL`

## Example

defmodule MyModule do
@behaviour Pleroma.Web.RichMedia.Parser.TTL
def ttl(data, url) do
image_url = Map.get(data, :image)
# do some parsing in the url and get the ttl of the image
# and return ttl is unix time
parse_ttl_from_url(image_url)
end
end

Define the module in the config

config :pleroma, :rich_media,
ttl_setters: [MyModule]
"""
def set_ttl_based_on_image({:ok, data}, url) do
with {:ok, nil} <- Cachex.ttl(:rich_media_cache, url) do
ttl = get_ttl_from_image(data, url)
Cachex.expire_at(:rich_media_cache, url, ttl * 1000)
{:ok, data}
else
_ ->
{:ok, data}
end
end

defp get_ttl_from_image(data, url) do
Pleroma.Config.get([:rich_media, :ttl_setters])
|> Enum.reduce({:ok, nil}, fn
module, {:ok, _ttl} ->
module.ttl(data, url)

_, error ->
error
end)
end

defp parse_url(url) do
try do
{:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [], adapter: @hackney_options)


+ 52
- 0
lib/pleroma/web/rich_media/parsers/ttl/aws_signed_url.ex View File

@@ -0,0 +1,52 @@
defmodule Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl do
@behaviour Pleroma.Web.RichMedia.Parser.TTL

@impl Pleroma.Web.RichMedia.Parser.TTL
def ttl(data, _url) do
image = Map.get(data, :image)

if is_aws_signed_url(image) do
image
|> parse_query_params()
|> format_query_params()
|> get_expiration_timestamp()
end
end

defp is_aws_signed_url(""), do: nil
defp is_aws_signed_url(nil), do: nil

defp is_aws_signed_url(image) when is_binary(image) do
%URI{host: host, query: query} = URI.parse(image)

if String.contains?(host, "amazonaws.com") and
String.contains?(query, "X-Amz-Expires") do
image
else
nil
end
end

defp is_aws_signed_url(_), do: nil

defp parse_query_params(image) do
%URI{query: query} = URI.parse(image)
query
end

defp format_query_params(query) do
query
|> String.split(~r/&|=/)
|> Enum.chunk_every(2)
|> Map.new(fn [k, v] -> {k, v} end)
end

defp get_expiration_timestamp(params) when is_map(params) do
{:ok, date} =
params
|> Map.get("X-Amz-Date")
|> Timex.parse("{ISO:Basic:Z}")

Timex.to_unix(date) + String.to_integer(Map.get(params, "X-Amz-Expires"))
end
end

+ 3
- 0
lib/pleroma/web/rich_media/parsers/ttl/ttl.ex View File

@@ -0,0 +1,3 @@
defmodule Pleroma.Web.RichMedia.Parser.TTL do
@callback ttl(Map.t(), String.t()) :: {:ok, Integer.t()} | {:error, String.t()}
end

+ 5
- 0
test/fixtures/rich_media/amz.html View File

@@ -0,0 +1,5 @@
<meta name="twitter:card" content="summary" />
<meta name="twitter:site" content="@flickr" />
<meta name="twitter:title" content="Small Island Developing States Photo Submission" />
<meta name="twitter:description" content="View the album on Flickr." />
<meta name="twitter:image" content="https://pleroma.s3.ap-southeast-1.amazonaws.com/sachin%20%281%29%20_a%20-%25%2Aasdasd%20BNN%20bnnn%20.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIBLWWK6RGDQXDLJQ%2F20190716%2Fap-southeast-1%2Fs3%2Faws4_request&X-Amz-Date=20190716T175105Z&X-Amz-Expires=300000&X-Amz-Signature=04ffd6b98634f4b1bbabc62e0fac4879093cd54a6eed24fe8eb38e8369526bbf&X-Amz-SignedHeaders=host" />

+ 81
- 0
test/web/rich_media/aws_signed_url_test.exs View File

@@ -0,0 +1,81 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only

defmodule Pleroma.Web.RichMedia.TTL.AwsSignedUrlTest do
use ExUnit.Case, async: true

test "s3 signed url is parsed correct for expiration time" do
url = "https://pleroma.social/amz"

{:ok, timestamp} =
Timex.now()
|> DateTime.truncate(:second)
|> Timex.format("{ISO:Basic:Z}")

# in seconds
valid_till = 30

metadata = construct_metadata(timestamp, valid_till, url)

expire_time =
Timex.parse!(timestamp, "{ISO:Basic:Z}") |> Timex.to_unix() |> Kernel.+(valid_till)

assert expire_time == Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl.ttl(metadata, url)
end

test "s3 signed url is parsed and correct ttl is set for rich media" do
url = "https://pleroma.social/amz"

{:ok, timestamp} =
Timex.now()
|> DateTime.truncate(:second)
|> Timex.format("{ISO:Basic:Z}")

# in seconds
valid_till = 30

metadata = construct_metadata(timestamp, valid_till, url)

body = """
<meta name="twitter:card" content="Pleroma" />
<meta name="twitter:site" content="Pleroma" />
<meta name="twitter:title" content="Pleroma" />
<meta name="twitter:description" content="Pleroma" />
<meta name="twitter:image" content="#{Map.get(metadata, :image)}" />
"""

Tesla.Mock.mock(fn
%{
method: :get,
url: "https://pleroma.social/amz"
} ->
%Tesla.Env{status: 200, body: body}
end)

Cachex.put(:rich_media_cache, url, metadata)

Pleroma.Web.RichMedia.Parser.set_ttl_based_on_image({:ok, metadata}, url)

{:ok, cache_ttl} = Cachex.ttl(:rich_media_cache, url)

# as there is delay in setting and pulling the data from cache we ignore 1 second
assert_in_delta(valid_till * 1000, cache_ttl, 1000)
end

defp construct_s3_url(timestamp, valid_till) do
"https://pleroma.s3.ap-southeast-1.amazonaws.com/sachin%20%281%29%20_a%20-%25%2Aasdasd%20BNN%20bnnn%20.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIBLWWK6RGDQXDLJQ%2F20190716%2Fap-southeast-1%2Fs3%2Faws4_request&X-Amz-Date=#{
timestamp
}&X-Amz-Expires=#{valid_till}&X-Amz-Signature=04ffd6b98634f4b1bbabc62e0fac4879093cd54a6eed24fe8eb38e8369526bbf&X-Amz-SignedHeaders=host"
end

defp construct_metadata(timestamp, valid_till, url) do
%{
image: construct_s3_url(timestamp, valid_till),
site: "Pleroma",
title: "Pleroma",
description: "Pleroma",
url: url
}
end
end

Loading…
Cancel
Save