Fork of Pleroma with site-specific changes and feature branches https://git.pleroma.social/pleroma/pleroma
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

94 lines
2.4KB

  1. # Pleroma: A lightweight social networking server
  2. # Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
  3. # SPDX-License-Identifier: AGPL-3.0-only
  4. defmodule Pleroma.HTML do
  5. # Scrubbers are compiled on boot so they can be configured in OTP releases
  6. # @on_load :compile_scrubbers
  7. @cachex Pleroma.Config.get([:cachex, :provider], Cachex)
  8. def compile_scrubbers do
  9. dir = Path.join(:code.priv_dir(:pleroma), "scrubbers")
  10. dir
  11. |> Pleroma.Utils.compile_dir()
  12. |> case do
  13. {:error, _errors, _warnings} ->
  14. raise "Compiling scrubbers failed"
  15. {:ok, _modules, _warnings} ->
  16. :ok
  17. end
  18. end
  19. defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber]
  20. defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers
  21. defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default]
  22. def get_scrubbers do
  23. Pleroma.Config.get([:markup, :scrub_policy])
  24. |> get_scrubbers
  25. end
  26. def filter_tags(html, nil) do
  27. filter_tags(html, get_scrubbers())
  28. end
  29. def filter_tags(html, scrubbers) when is_list(scrubbers) do
  30. Enum.reduce(scrubbers, html, fn scrubber, html ->
  31. filter_tags(html, scrubber)
  32. end)
  33. end
  34. def filter_tags(html, scrubber) do
  35. {:ok, content} = FastSanitize.Sanitizer.scrub(html, scrubber)
  36. content
  37. end
  38. def filter_tags(html), do: filter_tags(html, nil)
  39. def strip_tags(html), do: filter_tags(html, FastSanitize.Sanitizer.StripTags)
  40. def ensure_scrubbed_html(
  41. content,
  42. scrubbers,
  43. fake,
  44. callback
  45. ) do
  46. content =
  47. content
  48. |> filter_tags(scrubbers)
  49. |> callback.()
  50. if fake do
  51. {:ignore, content}
  52. else
  53. {:commit, content}
  54. end
  55. end
  56. def extract_first_external_url_from_object(%{data: %{"content" => content}} = object)
  57. when is_binary(content) do
  58. unless object.data["fake"] do
  59. key = "URL|#{object.id}"
  60. @cachex.fetch!(:scrubber_cache, key, fn _key ->
  61. {:commit, {:ok, extract_first_external_url(content)}}
  62. end)
  63. else
  64. {:ok, extract_first_external_url(content)}
  65. end
  66. end
  67. def extract_first_external_url_from_object(_), do: {:error, :no_content}
  68. def extract_first_external_url(content) do
  69. content
  70. |> Floki.parse_fragment!()
  71. |> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])")
  72. |> Enum.take(1)
  73. |> Floki.attribute("href")
  74. |> Enum.at(0)
  75. end
  76. end