Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,17 @@ jobs:
strategy:
matrix:
include:
- elixir: 1.20.x
otp: 29.x
check_formatted: true
- elixir: 1.19.x
otp: 28.x
- elixir: 1.18.x
otp: 27.x
- elixir: 1.17.x
otp: 27.x
- elixir: 1.16.x
otp: 26.x
check_formatted: true
- elixir: 1.15.x
otp: 25.x
- elixir: 1.14.x
Expand All @@ -25,7 +33,7 @@ jobs:
elixir-version: ${{ matrix.elixir }}

- name: Retrieve Mix Dependencies Cache
uses: actions/cache@v1
uses: actions/cache@v4
id: mix-cache
with:
path: deps
Expand Down
120 changes: 41 additions & 79 deletions lib/simplex_format.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ defmodule SimplexFormat do
Helpers related to formatting text.
"""

import Phoenix.HTML, only: [html_escape: 1, safe_to_string: 1, raw: 1]
import Phoenix.HTML, only: [html_escape: 1, raw: 1]
import PhoenixHTMLHelpers.Tag, only: [content_tag: 3, tag: 1]

@doc ~S"""
Expand Down Expand Up @@ -48,103 +48,65 @@ defmodule SimplexFormat do
url_attrs = Keyword.get(opts, :url_attributes, [])

string
|> maybe_html_escape(escape?)
|> String.split(["\n\n", "\r\n\r\n"], trim: true)
|> Enum.filter(&not_blank?/1)
|> Enum.map(&wrap_paragraph(&1, wrapper_tag, attributes, insert_brs?, auto_link?, url_attrs))
|> Enum.map(
&wrap_paragraph(&1, wrapper_tag, attributes, escape?, insert_brs?, auto_link?, url_attrs)
)
|> html_escape()
end

defp maybe_html_escape(string, true) do
string
|> html_escape()
|> safe_to_string()
end

defp maybe_html_escape(string, false), do: string

defp not_blank?("\r\n" <> rest), do: not_blank?(rest)
defp not_blank?("\n" <> rest), do: not_blank?(rest)
defp not_blank?(" " <> rest), do: not_blank?(rest)
defp not_blank?(""), do: false
defp not_blank?(_), do: true

defp wrap_paragraph(text, tag, attributes, insert_brs?, auto_link?, url_attrs) do
prepared_text =
defp wrap_paragraph(text, tag, attributes, escape?, insert_brs?, auto_link?, url_attrs) do
content =
text
|> insert_brs(insert_brs?)
|> auto_link(auto_link?, url_attrs)

[content_tag(tag, prepared_text, attributes), ?\n]
end
|> split_lines()
|> Enum.map(&format_line(&1, escape?, auto_link?, url_attrs))
|> join_lines(insert_brs?)

defp insert_brs(text, false) do
text
|> split_lines()
|> Enum.intersperse(?\s)
|> raw()
end

defp insert_brs(text, true) do
text
|> split_lines()
|> Enum.map(&raw/1)
|> Enum.intersperse([tag(:br), ?\n])
[content_tag(tag, content, attributes), ?\n]
end

defp split_lines(text) do
String.split(text, ["\n", "\r\n"], trim: true)
end

defp auto_link(lines, false, _), do: lines

defp auto_link(lines, true, url_attrs) do
assemble_links([], lines, url_attrs)
end

@url_regex ~r/((http(s)?(\:\/\/))+(www\.)?([\w\-\.\/])*(\.[a-zA-Z]{2,3}\/?))[^\s\b\n|]*[^.,;:\?\!\@\^\$ -]/

defp assemble_links(runs, [], _), do: runs

defp assemble_links(runs, [line | lines], url_attrs) when is_list(line) do
assemble_links(runs ++ [line], lines, url_attrs)
end

defp assemble_links(runs, [line | lines], url_attrs) do
text = safe_to_string(line)

case url_indices(text) do
nil ->
assemble_links(runs ++ [line], lines, url_attrs)

indices ->
{leading, url, trailing} = split_at_indices(text, indices)
safe_leading = raw(leading)
safe_trailing = raw(trailing)
safe_url = wrap_url(url, url_attrs)

assemble_links(runs ++ [safe_leading, safe_url], [safe_trailing] ++ lines, url_attrs)
end
end

defp url_indices(""), do: nil

defp url_indices(text) do
case Regex.run(@url_regex, text, return: :index, captures: :all_but_first) do
nil -> nil
matches -> matches |> Enum.at(0)
end
end

defp split_at_indices(text, {index, split_length}) do
leading = binary_part(text, 0, index)
middle = binary_part(text, index, split_length)

trailing_index = index + split_length
trailing_length = byte_size(text) - trailing_index
trailing = binary_part(text, trailing_index, trailing_length)

{leading, middle, trailing}
defp join_lines(lines, true), do: Enum.intersperse(lines, [tag(:br), ?\n])
defp join_lines(lines, false), do: Enum.intersperse(lines, ?\s)

defp format_line(line, escape?, false, _url_attrs), do: escape_text(line, escape?)
defp format_line(line, escape?, true, url_attrs), do: link_urls(line, escape?, url_attrs)

# Escaping happens here, at the leaves, so that auto-linked URLs reach
# `content_tag/3` unescaped and are therefore escaped exactly once.
defp escape_text(text, true), do: html_escape(text)
defp escape_text(text, false), do: raw(text)

# The URL body runs up to the first whitespace or pipe; the final character
# class trims trailing sentence punctuation so "see http://x.com." excludes
# the period.
@url_regex ~r/((http(s)?(\:\/\/))+(www\.)?([\w\-\.\/])*(\.[a-zA-Z]{2,3}\/?))[^\s|]*[^.,;:\?\!\@\^\$ -]/

# Finds every URL in a single linear pass, interleaving the escaped text
# between matches with the anchor tags built from each (unescaped) URL.
defp link_urls(text, escape?, url_attrs) do
{segments, offset} =
@url_regex
|> Regex.scan(text, return: :index)
|> Enum.reduce({[], 0}, fn [{index, length} | _], {segments, offset} ->
leading = binary_part(text, offset, index - offset)
url = binary_part(text, index, length)

{[wrap_url(url, url_attrs), escape_text(leading, escape?) | segments], index + length}
end)

trailing = binary_part(text, offset, byte_size(text) - offset)
Enum.reverse([escape_text(trailing, escape?) | segments])
end

defp wrap_url(url, url_attributes) do
Expand Down
14 changes: 7 additions & 7 deletions mix.lock
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
%{
"earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"},
"ex_doc": {:hex, :ex_doc, "0.31.2", "8b06d0a5ac69e1a54df35519c951f1f44a7b7ca9a5bb7a260cd8a174d6322ece", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.1", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "317346c14febaba9ca40fd97b5b5919f7751fb85d399cc8e7e8872049f37e0af"},
"makeup": {:hex, :makeup, "1.1.1", "fa0bc768698053b2b3869fa8a62616501ff9d11a562f3ce39580d60860c3a55e", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5dc62fbdd0de44de194898b6710692490be74baa02d9d108bc29f007783b0b48"},
"makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"},
"makeup_erlang": {:hex, :makeup_erlang, "0.1.5", "e0ff5a7c708dda34311f7522a8758e23bfcd7d8d8068dc312b5eb41c6fd76eba", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "94d2e986428585a21516d7d7149781480013c56e30c6a233534bedf38867a59a"},
"nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"},
"phoenix_html": {:hex, :phoenix_html, "4.1.1", "4c064fd3873d12ebb1388425a8f2a19348cef56e7289e1998e2d2fa758aa982e", [:mix], [], "hexpm", "f2f2df5a72bc9a2f510b21497fd7d2b86d932ec0598f0210fed4114adc546c6f"},
"earmark_parser": {:hex, :earmark_parser, "1.4.45", "cba8369ab2a1342e419bc2760eec731b17be828941dcf494045d44766227e1d5", [:mix], [], "hexpm", "d3ec045bf122965db20c0bdb420e19ee1415843135327124918473feb4b328e8"},
"ex_doc": {:hex, :ex_doc, "0.40.3", "4a972ffe64bc07dc605af487e98fc19b72a4185f55ca031b94c0552d6071c1d9", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "2756e357742fecd9749b489b85d67c9ce99c465f2e75728d9e6dc8d704b973de"},
"makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"},
"makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"},
"makeup_erlang": {:hex, :makeup_erlang, "1.1.0", "835f7e60792e08824cda445639555d7bf1bbbddb1b60b306e33cb6f6db24dc74", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "1cd6780fb1dd1a03979abaed0fe82712b0625118fd5257d3ebbf73f960c73c3c"},
"nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"},
"phoenix_html": {:hex, :phoenix_html, "4.3.0", "d3577a5df4b6954cd7890c84d955c470b5310bb49647f0a114a6eeecc850f7ad", [:mix], [], "hexpm", "3eaa290a78bab0f075f791a46a981bbe769d94bc776869f4f3063a14f30497ad"},
"phoenix_html_helpers": {:hex, :phoenix_html_helpers, "1.0.1", "7eed85c52eff80a179391036931791ee5d2f713d76a81d0d2c6ebafe1e11e5ec", [:mix], [{:phoenix_html, "~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:plug, "~> 1.5", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "cffd2385d1fa4f78b04432df69ab8da63dc5cf63e07b713a4dcf36a3740e3090"},
}
16 changes: 15 additions & 1 deletion test/simplex_format_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -204,10 +204,24 @@ defmodule SimplexFormatTest do
)

assert formatted == """
<p>&lt;script src=<a href=\"http://www.example.com/malicious-code.js&amp;gt;&amp;lt;/script&amp;gt\">http://www.example.com/malicious-code.js&amp;gt;&amp;lt;/script&amp;gt</a>;</p>
<p>&lt;script src=<a href=\"http://www.example.com/malicious-code.js&gt;&lt;/script&gt;\">http://www.example.com/malicious-code.js&gt;&lt;/script&gt;</a></p>
"""
end

test "auto_link does not double-escape entities in a URL" do
formatted = format("See http://example.com/a?b=1&c=2 here", auto_link: true)

assert formatted ==
~s(<p>See <a href="http://example.com/a?b=1&amp;c=2">http://example.com/a?b=1&amp;c=2</a> here</p>\n)
end

test "auto_link terminates a URL at whitespace" do
formatted = format("a http://one.com http://two.com b", auto_link: true)

assert formatted ==
~s(<p>a <a href="http://one.com">http://one.com</a> <a href="http://two.com">http://two.com</a> b</p>\n)
end

defp format(text, opts \\ []) do
text |> text_to_html(opts) |> safe_to_string
end
Expand Down
Loading