Fork of Pleroma with site-specific changes and feature branches https://git.pleroma.social/pleroma/pleroma
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

129 lines
3.3KB

  1. # Pleroma: A lightweight social networking server
  2. # Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
  3. # SPDX-License-Identifier: AGPL-3.0-only
  4. defmodule Pleroma.HTML do
  5. # Scrubbers are compiled on boot so they can be configured in OTP releases
  6. # @on_load :compile_scrubbers
  7. @cachex Pleroma.Config.get([:cachex, :provider], Cachex)
  8. def compile_scrubbers do
  9. dir = Path.join(:code.priv_dir(:pleroma), "scrubbers")
  10. dir
  11. |> Pleroma.Utils.compile_dir()
  12. |> case do
  13. {:error, _errors, _warnings} ->
  14. raise "Compiling scrubbers failed"
  15. {:ok, _modules, _warnings} ->
  16. :ok
  17. end
  18. end
  19. defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber]
  20. defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers
  21. defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default]
  22. def get_scrubbers do
  23. Pleroma.Config.get([:markup, :scrub_policy])
  24. |> get_scrubbers
  25. end
  26. def filter_tags(html, nil) do
  27. filter_tags(html, get_scrubbers())
  28. end
  29. def filter_tags(html, scrubbers) when is_list(scrubbers) do
  30. Enum.reduce(scrubbers, html, fn scrubber, html ->
  31. filter_tags(html, scrubber)
  32. end)
  33. end
  34. def filter_tags(html, scrubber) do
  35. {:ok, content} = FastSanitize.Sanitizer.scrub(html, scrubber)
  36. content
  37. end
  38. def filter_tags(html), do: filter_tags(html, nil)
  39. def strip_tags(html), do: filter_tags(html, FastSanitize.Sanitizer.StripTags)
  40. def get_cached_scrubbed_html_for_activity(
  41. content,
  42. scrubbers,
  43. activity,
  44. key \\ "",
  45. callback \\ fn x -> x end
  46. ) do
  47. key = "#{key}#{generate_scrubber_signature(scrubbers)}|#{activity.id}"
  48. @cachex.fetch!(:scrubber_cache, key, fn _key ->
  49. object = Pleroma.Object.normalize(activity, fetch: false)
  50. ensure_scrubbed_html(content, scrubbers, object.data["fake"] || false, callback)
  51. end)
  52. end
  53. def get_cached_stripped_html_for_activity(content, activity, key) do
  54. get_cached_scrubbed_html_for_activity(
  55. content,
  56. FastSanitize.Sanitizer.StripTags,
  57. activity,
  58. key,
  59. &HtmlEntities.decode/1
  60. )
  61. end
  62. def ensure_scrubbed_html(
  63. content,
  64. scrubbers,
  65. fake,
  66. callback
  67. ) do
  68. content =
  69. content
  70. |> filter_tags(scrubbers)
  71. |> callback.()
  72. if fake do
  73. {:ignore, content}
  74. else
  75. {:commit, content}
  76. end
  77. end
  78. defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do
  79. generate_scrubber_signature([scrubber])
  80. end
  81. defp generate_scrubber_signature(scrubbers) do
  82. Enum.reduce(scrubbers, "", fn scrubber, signature ->
  83. "#{signature}#{to_string(scrubber)}"
  84. end)
  85. end
  86. def extract_first_external_url_from_object(%{data: %{"content" => content}} = object)
  87. when is_binary(content) do
  88. unless object.data["fake"] do
  89. key = "URL|#{object.id}"
  90. @cachex.fetch!(:scrubber_cache, key, fn _key ->
  91. {:commit, {:ok, extract_first_external_url(content)}}
  92. end)
  93. else
  94. {:ok, extract_first_external_url(content)}
  95. end
  96. end
  97. def extract_first_external_url_from_object(_), do: {:error, :no_content}
  98. def extract_first_external_url(content) do
  99. content
  100. |> Floki.parse_fragment!()
  101. |> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])")
  102. |> Enum.take(1)
  103. |> Floki.attribute("href")
  104. |> Enum.at(0)
  105. end
  106. end