From 8bbc2575c286421488b2d7e460bfc6493018ed4d Mon Sep 17 00:00:00 2001 From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com> Date: Sat, 2 May 2026 18:22:34 +0000 Subject: [PATCH 01/19] feat: add characters only rule into simplification array --- generate-railroad.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/generate-railroad.js b/generate-railroad.js index 9ba7b55..e6b7324 100644 --- a/generate-railroad.js +++ b/generate-railroad.js @@ -15,7 +15,12 @@ const INLINE_HEX_RULES = [ "multi-line-comment-start", "multi-line-comment-end", "asterisk", - "escape" + "escape", + "single-line-comment-start", + "decimal-point", + "minus", + "plus", + "zero", ]; function escapeRegExp(value) { From 4cdc01d4071d61e4aed4f105dcdfcf268e1ec4bd Mon Sep 17 00:00:00 2001 From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com> Date: Sat, 2 May 2026 19:17:21 +0000 Subject: [PATCH 02/19] feat: remove LS and PS notions from abnf --- grammar/jsonc.abnf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/grammar/jsonc.abnf b/grammar/jsonc.abnf index d8ff0af..39e4378 100644 --- a/grammar/jsonc.abnf +++ b/grammar/jsonc.abnf @@ -23,8 +23,8 @@ comment = single-line-comment / multi-line-comment source-character = %x00-10FFFF ; Comment terminators and sequences (based on ECMAScript line terminators) -comment-terminator = %x0A / %x0D / %x2028 / %x2029 ; LF / CR / LS / PS -comment-terminator-sequence = %x0D.0A / %x0A / %x0D / %x2028 / %x2029 +comment-terminator = %x0A / %x0D ; LF / CR +comment-terminator-sequence = %x0D.0A / %x0A / %x0D ; Single-line comment: starts with //, continues until line ending ; Terminator is not part of the comment body. @@ -32,7 +32,7 @@ comment-terminator-sequence = %x0D.0A / %x0A / %x0D / %x2028 / %x2029 single-line-comment-start = %x2F.2F ; // double solidus single-line-comment-end = comment-terminator-sequence single-line-comment = single-line-comment-start *single-line-comment-char [ single-line-comment-end ] -single-line-comment-char = %x00-09 / %x0B-0C / %x0E-2027 / %x202A-10FFFF ; Any source character except comment terminators +single-line-comment-char = %x00-09 / %x0B-0C / %x0E-10FFFF ; Any source character except comment terminators ; Multi-line comment: /* ... */ ; Cannot be nested. The first */ closes the comment. From 6356c82e476437261fe3ebb7cd11d4eddf1c0e53 Mon Sep 17 00:00:00 2001 From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com> Date: Sat, 2 May 2026 19:17:41 +0000 Subject: [PATCH 03/19] refactor: manke abnf self-contained --- grammar/jsonc.abnf | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/grammar/jsonc.abnf b/grammar/jsonc.abnf index 39e4378..a04f3ed 100644 --- a/grammar/jsonc.abnf +++ b/grammar/jsonc.abnf @@ -1,9 +1,8 @@ ; JSONC grammar with comments support (RFC 8259 extended with JavaScript-style comments) ; ; Notes: -; - Rule names and structure follow RFC 8259 ABNF snippets. -; - DIGIT and HEXDIG are core rules from RFC 5234. -; - comments are an extension not in RFC 8259. +; - Rule names and structure follow RFC 8259 ABNF. +; - Comments are an extension not in RFC 8259. ; - Trailing commas are NOT supported in this grammar. ; A JSONC-text is a serialized value surrounded by optional whitespace and comments. @@ -75,14 +74,23 @@ array = begin-array [ value *( value-separator value ) ] end-array ; Numbers number = [ minus ] int [ frac ] [ exp ] decimal-point = %x2E ; . +digit0-9 = %x30-39 ; 0-9 digit1-9 = %x31-39 ; 1-9 + e = %x65 / %x45 ; e E -exp = e [ minus / plus ] 1*DIGIT -frac = decimal-point 1*DIGIT -int = zero / ( digit1-9 *DIGIT ) +exp = e [ minus / plus ] 1*digit0-9 +frac = decimal-point 1*digit0-9 +int = zero / ( digit1-9 *digit0-9 ) minus = %x2D ; - plus = %x2B ; + zero = %x30 ; 0 +hexdigit = digit0-9 / + %x41 / ; A + %x42 / ; B + %x43 / ; C + %x44 / ; D + %x45 / ; E + %x46 ; F ; Strings string = quotation-mark *char quotation-mark @@ -97,7 +105,7 @@ char = unescaped / %x6E / ; n line feed U+000A %x72 / ; r carriage return U+000D %x74 / ; t tab U+0009 - %x75 4HEXDIG ; uXXXX U+XXXX + %x75 4hexdigit ; uXXXX U+XXXX ) escape = %x5C ; \ From d718091e5556c8e4f2ecb717924034afc58d9259 Mon Sep 17 00:00:00 2001 From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com> Date: Sat, 2 May 2026 20:32:25 +0000 Subject: [PATCH 04/19] chore: update railroad diagram --- grammar/railroad-diagram.html | 127 ++++++++++------------- submodules/railroad-diagram-generator-js | 2 +- 2 files changed, 54 insertions(+), 75 deletions(-) diff --git a/grammar/railroad-diagram.html b/grammar/railroad-diagram.html index 27256f2..f01faf9 100644 --- a/grammar/railroad-diagram.html +++ b/grammar/railroad-diagram.html @@ -56,28 +56,19 @@
comment-terminator := %x0A / %x0D / %x2028 / %x2029 ; LF / CR / LS / PS
- comment-terminator-sequence := %x0D.0A / %x0A / %x0D / %x2028 / %x2029
- single-line-comment-start := %x2F.2F ; // double solidus
- comment-terminator-sequence := %x0D.0A / %x0A / %x0D
+ single-line-comment := single-line-comment-start *single-line-comment-char [ single-line-comment-end ]
- single-line-comment := "//" *single-line-comment-char [ single-line-comment-end ]
+ single-line-comment-char := %x00-09 / %x0B-0C / %x0E-2027 / %x202A-10FFFF ; Any source character except comment terminators
- single-line-comment-char := %x00-09 / %x0B-0C / %x0E-10FFFF ; Any source character except comment terminators
+ number := [ minus ] int [ frac ] [ exp ]
- number := [ "-" ] int [ frac ] [ exp ]
+ decimal-point := %x2E ; .
- digit0-9 := %x30-39 ; 0-9
+ exp := e [ minus / plus ] 1*DIGIT
- exp := e [ "-" / "+" ] 1*digit0-9
+ frac := decimal-point 1*DIGIT
- frac := "." 1*digit0-9
+ int := zero / ( digit1-9 *DIGIT )
- int := "0" / ( digit1-9 *digit0-9 )
+ minus := %x2D ; -
- plus := %x2B ; +
- zero := %x30 ; 0
- hexdigit := digit0-9 /
+ %x41 / ; A
+ %x42 / ; B
+ %x43 / ; C
+ %x44 / ; D
+ %x45 / ; E
+ %x46 ; F
+ value := false / null / true / object / array / number / string
- value := object / array / number / string / "true" / "false" / "null"
+ false := %x66.61.6C.73.65 ; false
- true := %x74.72.75.65 ; true
- null := %x6E.75.6C.6C ; null
- digit0-9 := %x30-39 ; 0-9
+ digit := %x30-39 ; 0-9
exp := e [ "-" / "+" ] 1*digit0-9
- exp := e [ "-" / "+" ] 1*digit
+ frac := "." 1*digit0-9
- frac := "." 1*digit
+ int := "0" / ( digit1-9 *digit0-9 )
- int := "0" / ( digit1-9 *digit )
+ hexdigit := digit0-9 /
+ hexdigit := digit /
%x41 / ; A
%x42 / ; B
%x43 / ; C
%x44 / ; D
%x45 / ; E
%x46 ; F
-
-
-digit0-9 A B C D E F
+
+
+digit A B C D E F
+
+
+
+
+ four-hexdigits
+ four-hexdigits := 4hexdigit
+
+
+hexdigit hexdigit hexdigit hexdigit
string
- string := quotation-mark *char quotation-mark
-
-
-quotation-mark char quotation-mark
+ string := %x22 *char %x22
+
+
+" char "
@@ -354,28 +336,19 @@ string
char
char := unescaped /
%x5C (
- %x22 / ; " quotation mark U+0022
- %x5C / ; \ reverse solidus U+005C
- %x2F / ; / solidus U+002F
- %x62 / ; b backspace U+0008
- %x66 / ; f form feed U+000C
- %x6E / ; n line feed U+000A
- %x72 / ; r carriage return U+000D
- %x74 / ; t tab U+0009
- %x75 4hexdigit ; uXXXX U+XXXX
+ %x22 / ; " quotation mark U+0022
+ %x5C / ; \ reverse solidus U+005C
+ %x2F / ; / solidus U+002F
+ %x62 / ; b backspace U+0008
+ %x66 / ; f form feed U+000C
+ %x6E / ; n line feed U+000A
+ %x72 / ; r carriage return U+000D
+ %x74 / ; t tab U+0009
+ %x75 four-hexdigits ; uXXXX U+XXXX
)
-
-
-unescaped \ " quotation mark U+0022 \ reverse solidus U+005C / solidus U+002F b backspace U+0008 f form feed U+000C n line feed U+000A r carriage return U+000D t tab U+0009 u U+XXXX hexdigit hexdigit hexdigit hexdigit
-
-
-
-
- quotation-mark
- quotation-mark := %x22 ; "
-
-
-"
+
+
+unescaped \ " quotation mark U+0022 \ reverse solidus U+005C / solidus U+002F b backspace U+0008 f form feed U+000C n line feed U+000A r carriage return U+000D t tab U+0009 u U+XXXX four-hexdigits
From 4656a012582ba04978a0da3f9ad8d5fe321c9764 Mon Sep 17 00:00:00 2001
From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com>
Date: Sun, 3 May 2026 04:58:47 +0000
Subject: [PATCH 10/19] chore: update submodule railroad-diagram-generator-js
to latest commit
---
submodules/railroad-diagram-generator-js | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/submodules/railroad-diagram-generator-js b/submodules/railroad-diagram-generator-js
index 411f16c..30c46a5 160000
--- a/submodules/railroad-diagram-generator-js
+++ b/submodules/railroad-diagram-generator-js
@@ -1 +1 @@
-Subproject commit 411f16c244f8290dc45407a1337c339fe2009a6b
+Subproject commit 30c46a509addb2bd577e38e25651297ce940def7
From 4b05329302c1090c443801ba0795ba67ae6addf3 Mon Sep 17 00:00:00 2001
From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com>
Date: Sun, 3 May 2026 05:52:28 +0000
Subject: [PATCH 11/19] feat: simplify single-line-comment-end rules
---
grammar/jsonc.abnf | 14 +++-----------
1 file changed, 3 insertions(+), 11 deletions(-)
diff --git a/grammar/jsonc.abnf b/grammar/jsonc.abnf
index dd7b6a9..19b9e1e 100644
--- a/grammar/jsonc.abnf
+++ b/grammar/jsonc.abnf
@@ -10,7 +10,7 @@
JSONC-text = wsc value wsc
; Whitespace with Comments: zero or more whitespace characters or comments
-wsc = *(ws-char / comment)
+wsc = *(ws-char / comment) ; White space and comments
; Single whitespace character (space, tab, line feed, carriage return)
ws-char = %x20 / %x09 / %x0A / %x0D ; space / tab / LF / CR
@@ -18,20 +18,12 @@ ws-char = %x20 / %x09 / %x0A / %x0D ; space / tab / LF / CR
; Comments: single-line or multi-line
comment = single-line-comment / multi-line-comment
-; Source character: any Unicode code point, as per ECMAScript.
-source-character = %x00-10FFFF
-
-; Comment terminators and sequences (based on ECMAScript line terminators)
-comment-terminator = %x0A / %x0D ; LF / CR
-comment-terminator-sequence = %x0D.0A / %x0A / %x0D
-
; Single-line comment: starts with //, continues until line ending
-; Terminator is not part of the comment body.
; Note that the single-line-comment-end is optional, allowing comments to end at the end of the file without a line terminator.
single-line-comment-start = %x2F.2F ; // double solidus
-single-line-comment-end = comment-terminator-sequence
+single-line-comment-end = %x0D.0A / %x0A / %x0D
single-line-comment = single-line-comment-start *single-line-comment-char [ single-line-comment-end ]
-single-line-comment-char = %x00-09 / %x0B-0C / %x0E-10FFFF ; Any source character except comment terminators
+single-line-comment-char = %x00-09 / %x0B-0C / %x0E-10FFFF ; Any source character except CR and LF (line terminator)
; Multi-line comment: /* ... */
; Cannot be nested. The first */ closes the comment.
From e712a50bbc900153aec715838f4fb79af79efb96 Mon Sep 17 00:00:00 2001
From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com>
Date: Sun, 3 May 2026 05:54:31 +0000
Subject: [PATCH 12/19] feat: abnf simplified for numbers
---
grammar/jsonc.abnf | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/grammar/jsonc.abnf b/grammar/jsonc.abnf
index 19b9e1e..b3a2806 100644
--- a/grammar/jsonc.abnf
+++ b/grammar/jsonc.abnf
@@ -64,15 +64,11 @@ member = string name-separator value
array = begin-array [ value *( value-separator value ) ] end-array
; Numbers
-number = [ minus ] int [ frac ] [ exp ]
+number = [ minus ] ( zero / ( digit1-9 *digit ) ) [ decimal-point 1*digit ] [ ( %x65 / %x45 ) [ minus / plus ] 1*digit ]
decimal-point = %x2E ; .
digit = %x30-39 ; 0-9
digit1-9 = %x31-39 ; 1-9
-e = %x65 / %x45 ; e E
-exp = e [ minus / plus ] 1*digit
-frac = decimal-point 1*digit
-int = zero / ( digit1-9 *digit )
minus = %x2D ; -
plus = %x2B ; +
zero = %x30 ; 0
From 718f0af4dbaa0dd9a64355c9c4296807637e90e5 Mon Sep 17 00:00:00 2001
From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com>
Date: Sun, 3 May 2026 06:05:53 +0000
Subject: [PATCH 13/19] feat: add missing lowercase letters to hexdigits
---
grammar/jsonc.abnf | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/grammar/jsonc.abnf b/grammar/jsonc.abnf
index b3a2806..78082a8 100644
--- a/grammar/jsonc.abnf
+++ b/grammar/jsonc.abnf
@@ -73,12 +73,12 @@ minus = %x2D ; -
plus = %x2B ; +
zero = %x30 ; 0
hexdigit = digit /
- %x41 / ; A
- %x42 / ; B
- %x43 / ; C
- %x44 / ; D
- %x45 / ; E
- %x46 ; F
+ %x41 / %x61 / ; A a
+ %x42 / %x62 / ; B b
+ %x43 / %x63 / ; C c
+ %x44 / %x64 / ; D d
+ %x45 / %x65 / ; E e
+ %x46 / %x66 ; F f
four-hexdigits = 4hexdigit
; Strings
From d67cab66e964d467ba4b8e61cd644d1418333db7 Mon Sep 17 00:00:00 2001
From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com>
Date: Sun, 3 May 2026 06:06:44 +0000
Subject: [PATCH 14/19] feat: add re-orderings in railroad diagrams
---
generate-railroad.js | 80 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 80 insertions(+)
diff --git a/generate-railroad.js b/generate-railroad.js
index 9058b33..93e3a10 100644
--- a/generate-railroad.js
+++ b/generate-railroad.js
@@ -33,6 +33,51 @@ const INLINE_LITERAL_REFS = [
},
];
+// Move selected rule definitions after another rule in the processed ABNF.
+// Add more entries here to control rule ordering in generated output.
+const REPOSITION_RULES_AFTER = [
+ {
+ ruleName: "begin-array",
+ afterRule: "array",
+ },
+ {
+ ruleName: "end-array",
+ afterRule: "begin-array",
+ },
+ {
+ ruleName: "begin-object",
+ afterRule: "object",
+ },
+ {
+ ruleName: "end-object",
+ afterRule: "begin-object",
+ },
+ {
+ ruleName: "name-separator",
+ afterRule: "member",
+ },
+ {
+ ruleName: "value-separator",
+ afterRule: "value",
+ },
+ {
+ ruleName: "digit",
+ afterRule: "unescaped",
+ },
+ {
+ ruleName: "digit1-9",
+ afterRule: "digit",
+ },
+ {
+ ruleName: "hexdigit",
+ afterRule: "digit1-9",
+ },
+ {
+ ruleName: "four-hexdigits",
+ afterRule: "hexdigit",
+ }
+];
+
function escapeRegExp(value) {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
@@ -159,6 +204,39 @@ function removeRuleDefinitions(source, ruleNames) {
.join("\n");
}
+function findRuleBlock(lines, ruleName) {
+ const ruleStartRegex = new RegExp(`^\\s*${escapeRegExp(ruleName)}\\s*=`);
+ const startIndex = lines.findIndex((line) => ruleStartRegex.test(line));
+ if (startIndex === -1) {
+ throw new Error(`Rule ${ruleName} was not found.`);
+ }
+
+ let endIndex = startIndex + 1;
+ while (endIndex < lines.length && /^\s/.test(lines[endIndex])) {
+ endIndex += 1;
+ }
+
+ return {
+ startIndex,
+ endIndex,
+ blockLines: lines.slice(startIndex, endIndex),
+ };
+}
+
+function repositionRulesAfter(source, reorderings) {
+ let lines = source.split(/\r?\n/);
+
+ for (const { ruleName, afterRule } of reorderings) {
+ const ruleBlock = findRuleBlock(lines, ruleName);
+ lines.splice(ruleBlock.startIndex, ruleBlock.endIndex - ruleBlock.startIndex);
+
+ const afterRuleBlock = findRuleBlock(lines, afterRule);
+ lines.splice(afterRuleBlock.endIndex, 0, ...ruleBlock.blockLines);
+ }
+
+ return lines.join("\n");
+}
+
function processAbnfSource(source) {
let processed = source;
@@ -171,6 +249,8 @@ function processAbnfSource(source) {
processed = removeRuleDefinitions(processed, referencedRules);
}
+ processed = repositionRulesAfter(processed, REPOSITION_RULES_AFTER);
+
return processed;
}
From 78c7effc63e71a587bd81d5838eac23567cf9b97 Mon Sep 17 00:00:00 2001
From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com>
Date: Sun, 3 May 2026 06:07:15 +0000
Subject: [PATCH 15/19] chore: update railroad diagram
---
grammar/railroad-diagram.html | 257 +++++++++++++---------------------
1 file changed, 97 insertions(+), 160 deletions(-)
diff --git a/grammar/railroad-diagram.html b/grammar/railroad-diagram.html
index 34328bb..7f631e4 100644
--- a/grammar/railroad-diagram.html
+++ b/grammar/railroad-diagram.html
@@ -20,7 +20,7 @@ JSONC-text
wsc
- wsc := *(ws-char / comment)
+ wsc := *(ws-char / comment) ; White space and comments
ws-char comment
@@ -45,39 +45,12 @@ comment
-
- source-character
- source-character := %x00-10FFFF
-
-
-%x00-10FFFF
-
-
-
-
- comment-terminator
- comment-terminator := %x0A / %x0D ; LF / CR
-
-
-<LF> <CR>
-
-
-
-
- comment-terminator-sequence
- comment-terminator-sequence := %x0D.0A / %x0A / %x0D
+
+ single-line-comment-end
+ single-line-comment-end := %x0D.0A / %x0A / %x0D
<CR><LF> <LF> <CR>
-
-
-
-
- single-line-comment-end
- single-line-comment-end := comment-terminator-sequence
-
-
-comment-terminator-sequence
@@ -92,7 +65,7 @@ single-line-comment
single-line-comment-char
- single-line-comment-char := %x00-09 / %x0B-0C / %x0E-10FFFF ; Any source character except comment terminators
+ single-line-comment-char := %x00-09 / %x0B-0C / %x0E-10FFFF ; Any source character except CR and LF (line terminator)
%x00-09 %x0B-0C %x0E-10FFFF
@@ -146,12 +119,30 @@ not-forward-slash-or-asterisk-char
-
- begin-array
- begin-array := wsc %x5B wsc ; [ left square bracket
-
+
+ value
+ value := object / array / number / string / "true" / "false" / "null"
+
+
+object array number string true false null
+
+
+
+
+ value-separator
+ value-separator := wsc %x2C wsc ; , comma
+
-wsc [ wsc
+wsc , wsc
+
+
+
+
+ object
+ object := begin-object [ member *( value-separator member ) ] end-object
+
+
+begin-object member value-separator member end-object
@@ -161,15 +152,6 @@ begin-object
wsc { wsc
-
-
-
-
- end-array
- end-array := wsc %x5D wsc ; ] right square bracket
-
-
-wsc ] wsc
@@ -179,42 +161,6 @@ end-object
wsc } wsc
-
-
-
-
- name-separator
- name-separator := wsc %x3A wsc ; : colon
-
-
-wsc : wsc
-
-
-
-
- value-separator
- value-separator := wsc %x2C wsc ; , comma
-
-
-wsc , wsc
-
-
-
-
- value
- value := object / array / number / string / "true" / "false" / "null"
-
-
-object array number string true false null
-
-
-
-
- object
- object := begin-object [ member *( value-separator member ) ] end-object
-
-
-begin-object member value-separator member end-object
@@ -224,6 +170,15 @@ member
string name-separator value
+
+
+ name-separator := wsc %x3A wsc ; : colon
+ number := [ "-" ] int [ frac ] [ exp ]
- digit := %x30-39 ; 0-9
- digit1-9 := %x31-39 ; 1-9
- e := %x65 / %x45 ; e E
- exp := e [ "-" / "+" ] 1*digit
- frac := "." 1*digit
- int := "0" / ( digit1-9 *digit )
- begin-array := wsc %x5B wsc ; [ left square bracket
+ hexdigit := digit /
- %x41 / ; A
- %x42 / ; B
- %x43 / ; C
- %x44 / ; D
- %x45 / ; E
- %x46 ; F
- end-array := wsc %x5D wsc ; ] right square bracket
+ four-hexdigits := 4hexdigit
- number := [ "-" ] ( "0" / ( digit1-9 *digit ) ) [ "." 1*digit ] [ ( %x65 / %x45 ) [ "-" / "+" ] 1*digit ]
+ digit := %x30-39 ; 0-9
+ digit1-9 := %x31-39 ; 1-9
+ hexdigit := digit /
+ %x41 / %x61 / ; A a
+ %x42 / %x62 / ; B b
+ %x43 / %x63 / ; C c
+ %x44 / %x64 / ; D d
+ %x45 / %x65 / ; E e
+ %x46 / %x66 ; F f
+ four-hexdigits := 4hexdigit
+