diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a857e4a..162ae45 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,9 +7,17 @@ jobs: strategy: matrix: include: + - elixir: 1.20.x + otp: 29.x + check_formatted: true + - elixir: 1.19.x + otp: 28.x + - elixir: 1.18.x + otp: 27.x + - elixir: 1.17.x + otp: 27.x - elixir: 1.16.x otp: 26.x - check_formatted: true - elixir: 1.15.x otp: 25.x - elixir: 1.14.x @@ -25,7 +33,7 @@ jobs: elixir-version: ${{ matrix.elixir }} - name: Retrieve Mix Dependencies Cache - uses: actions/cache@v1 + uses: actions/cache@v4 id: mix-cache with: path: deps diff --git a/lib/simplex_format.ex b/lib/simplex_format.ex index 7a54cc8..34739ef 100644 --- a/lib/simplex_format.ex +++ b/lib/simplex_format.ex @@ -3,7 +3,7 @@ defmodule SimplexFormat do Helpers related to formatting text. """ - import Phoenix.HTML, only: [html_escape: 1, safe_to_string: 1, raw: 1] + import Phoenix.HTML, only: [html_escape: 1, raw: 1] import PhoenixHTMLHelpers.Tag, only: [content_tag: 3, tag: 1] @doc ~S""" @@ -48,103 +48,65 @@ defmodule SimplexFormat do url_attrs = Keyword.get(opts, :url_attributes, []) string - |> maybe_html_escape(escape?) |> String.split(["\n\n", "\r\n\r\n"], trim: true) |> Enum.filter(¬_blank?/1) - |> Enum.map(&wrap_paragraph(&1, wrapper_tag, attributes, insert_brs?, auto_link?, url_attrs)) + |> Enum.map( + &wrap_paragraph(&1, wrapper_tag, attributes, escape?, insert_brs?, auto_link?, url_attrs) + ) |> html_escape() end - defp maybe_html_escape(string, true) do - string - |> html_escape() - |> safe_to_string() - end - - defp maybe_html_escape(string, false), do: string - defp not_blank?("\r\n" <> rest), do: not_blank?(rest) defp not_blank?("\n" <> rest), do: not_blank?(rest) defp not_blank?(" " <> rest), do: not_blank?(rest) defp not_blank?(""), do: false defp not_blank?(_), do: true - defp wrap_paragraph(text, tag, attributes, insert_brs?, auto_link?, url_attrs) do - prepared_text = + defp wrap_paragraph(text, tag, attributes, escape?, insert_brs?, auto_link?, url_attrs) do + content = text - |> insert_brs(insert_brs?) - |> auto_link(auto_link?, url_attrs) - - [content_tag(tag, prepared_text, attributes), ?\n] - end + |> split_lines() + |> Enum.map(&format_line(&1, escape?, auto_link?, url_attrs)) + |> join_lines(insert_brs?) - defp insert_brs(text, false) do - text - |> split_lines() - |> Enum.intersperse(?\s) - |> raw() - end - - defp insert_brs(text, true) do - text - |> split_lines() - |> Enum.map(&raw/1) - |> Enum.intersperse([tag(:br), ?\n]) + [content_tag(tag, content, attributes), ?\n] end defp split_lines(text) do String.split(text, ["\n", "\r\n"], trim: true) end - defp auto_link(lines, false, _), do: lines - - defp auto_link(lines, true, url_attrs) do - assemble_links([], lines, url_attrs) - end - - @url_regex ~r/((http(s)?(\:\/\/))+(www\.)?([\w\-\.\/])*(\.[a-zA-Z]{2,3}\/?))[^\s\b\n|]*[^.,;:\?\!\@\^\$ -]/ - - defp assemble_links(runs, [], _), do: runs - - defp assemble_links(runs, [line | lines], url_attrs) when is_list(line) do - assemble_links(runs ++ [line], lines, url_attrs) - end - - defp assemble_links(runs, [line | lines], url_attrs) do - text = safe_to_string(line) - - case url_indices(text) do - nil -> - assemble_links(runs ++ [line], lines, url_attrs) - - indices -> - {leading, url, trailing} = split_at_indices(text, indices) - safe_leading = raw(leading) - safe_trailing = raw(trailing) - safe_url = wrap_url(url, url_attrs) - - assemble_links(runs ++ [safe_leading, safe_url], [safe_trailing] ++ lines, url_attrs) - end - end - - defp url_indices(""), do: nil - - defp url_indices(text) do - case Regex.run(@url_regex, text, return: :index, captures: :all_but_first) do - nil -> nil - matches -> matches |> Enum.at(0) - end - end - - defp split_at_indices(text, {index, split_length}) do - leading = binary_part(text, 0, index) - middle = binary_part(text, index, split_length) - - trailing_index = index + split_length - trailing_length = byte_size(text) - trailing_index - trailing = binary_part(text, trailing_index, trailing_length) - - {leading, middle, trailing} + defp join_lines(lines, true), do: Enum.intersperse(lines, [tag(:br), ?\n]) + defp join_lines(lines, false), do: Enum.intersperse(lines, ?\s) + + defp format_line(line, escape?, false, _url_attrs), do: escape_text(line, escape?) + defp format_line(line, escape?, true, url_attrs), do: link_urls(line, escape?, url_attrs) + + # Escaping happens here, at the leaves, so that auto-linked URLs reach + # `content_tag/3` unescaped and are therefore escaped exactly once. + defp escape_text(text, true), do: html_escape(text) + defp escape_text(text, false), do: raw(text) + + # The URL body runs up to the first whitespace or pipe; the final character + # class trims trailing sentence punctuation so "see http://x.com." excludes + # the period. + @url_regex ~r/((http(s)?(\:\/\/))+(www\.)?([\w\-\.\/])*(\.[a-zA-Z]{2,3}\/?))[^\s|]*[^.,;:\?\!\@\^\$ -]/ + + # Finds every URL in a single linear pass, interleaving the escaped text + # between matches with the anchor tags built from each (unescaped) URL. + defp link_urls(text, escape?, url_attrs) do + {segments, offset} = + @url_regex + |> Regex.scan(text, return: :index) + |> Enum.reduce({[], 0}, fn [{index, length} | _], {segments, offset} -> + leading = binary_part(text, offset, index - offset) + url = binary_part(text, index, length) + + {[wrap_url(url, url_attrs), escape_text(leading, escape?) | segments], index + length} + end) + + trailing = binary_part(text, offset, byte_size(text) - offset) + Enum.reverse([escape_text(trailing, escape?) | segments]) end defp wrap_url(url, url_attributes) do diff --git a/mix.lock b/mix.lock index 59cc2fc..0074b9d 100644 --- a/mix.lock +++ b/mix.lock @@ -1,10 +1,10 @@ %{ - "earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"}, - "ex_doc": {:hex, :ex_doc, "0.31.2", "8b06d0a5ac69e1a54df35519c951f1f44a7b7ca9a5bb7a260cd8a174d6322ece", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.1", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "317346c14febaba9ca40fd97b5b5919f7751fb85d399cc8e7e8872049f37e0af"}, - "makeup": {:hex, :makeup, "1.1.1", "fa0bc768698053b2b3869fa8a62616501ff9d11a562f3ce39580d60860c3a55e", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5dc62fbdd0de44de194898b6710692490be74baa02d9d108bc29f007783b0b48"}, - "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"}, - "makeup_erlang": {:hex, :makeup_erlang, "0.1.5", "e0ff5a7c708dda34311f7522a8758e23bfcd7d8d8068dc312b5eb41c6fd76eba", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "94d2e986428585a21516d7d7149781480013c56e30c6a233534bedf38867a59a"}, - "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, - "phoenix_html": {:hex, :phoenix_html, "4.1.1", "4c064fd3873d12ebb1388425a8f2a19348cef56e7289e1998e2d2fa758aa982e", [:mix], [], "hexpm", "f2f2df5a72bc9a2f510b21497fd7d2b86d932ec0598f0210fed4114adc546c6f"}, + "earmark_parser": {:hex, :earmark_parser, "1.4.45", "cba8369ab2a1342e419bc2760eec731b17be828941dcf494045d44766227e1d5", [:mix], [], "hexpm", "d3ec045bf122965db20c0bdb420e19ee1415843135327124918473feb4b328e8"}, + "ex_doc": {:hex, :ex_doc, "0.40.3", "4a972ffe64bc07dc605af487e98fc19b72a4185f55ca031b94c0552d6071c1d9", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "2756e357742fecd9749b489b85d67c9ce99c465f2e75728d9e6dc8d704b973de"}, + "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"}, + "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"}, + "makeup_erlang": {:hex, :makeup_erlang, "1.1.0", "835f7e60792e08824cda445639555d7bf1bbbddb1b60b306e33cb6f6db24dc74", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "1cd6780fb1dd1a03979abaed0fe82712b0625118fd5257d3ebbf73f960c73c3c"}, + "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"}, + "phoenix_html": {:hex, :phoenix_html, "4.3.0", "d3577a5df4b6954cd7890c84d955c470b5310bb49647f0a114a6eeecc850f7ad", [:mix], [], "hexpm", "3eaa290a78bab0f075f791a46a981bbe769d94bc776869f4f3063a14f30497ad"}, "phoenix_html_helpers": {:hex, :phoenix_html_helpers, "1.0.1", "7eed85c52eff80a179391036931791ee5d2f713d76a81d0d2c6ebafe1e11e5ec", [:mix], [{:phoenix_html, "~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:plug, "~> 1.5", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "cffd2385d1fa4f78b04432df69ab8da63dc5cf63e07b713a4dcf36a3740e3090"}, } diff --git a/test/simplex_format_test.exs b/test/simplex_format_test.exs index 01ff1af..fec857c 100644 --- a/test/simplex_format_test.exs +++ b/test/simplex_format_test.exs @@ -204,10 +204,24 @@ defmodule SimplexFormatTest do ) assert formatted == """ -
<script src=http://www.example.com/malicious-code.js></script>
+<script src=http://www.example.com/malicious-code.js></script>
""" end + test "auto_link does not double-escape entities in a URL" do + formatted = format("See http://example.com/a?b=1&c=2 here", auto_link: true) + + assert formatted == + ~s(See http://example.com/a?b=1&c=2 here
\n) + end + + test "auto_link terminates a URL at whitespace" do + formatted = format("a http://one.com http://two.com b", auto_link: true) + + assert formatted == + ~s(\n) + end + defp format(text, opts \\ []) do text |> text_to_html(opts) |> safe_to_string end