diff --git a/generate-railroad.js b/generate-railroad.js index 9ba7b55..448b145 100644 --- a/generate-railroad.js +++ b/generate-railroad.js @@ -5,9 +5,10 @@ const { spawnSync } = require("node:child_process"); const path = require("node:path"); // Customization section -const DEFAULT_INPUT_ABNF = "grammar/jsonc.abnf"; +const DEFAULT_INPUT_ABNF = "grammar/JSONC.abnf"; const DEFAULT_PROCESSED_ABNF = "grammar/jsonc-processed.abnf"; const DEFAULT_OUTPUT_HTML = "grammar/railroad-diagram.html"; +const FORCED_HTML_HEADER = "JSONC GRAMMAR"; // Rules to inline from their %x... definitions as literal ABNF strings. // Add more rule names here to apply the same transformation. @@ -15,7 +16,67 @@ const INLINE_HEX_RULES = [ "multi-line-comment-start", "multi-line-comment-end", "asterisk", - "escape" + "escape", + "single-line-comment-start", + "quotation-mark", + "decimal-point", + "minus", + "plus", + "zero", +]; + +// Inline selected rule references as quoted literals in specific target rules. +// Add more mappings here to reuse this transformation pattern. +const INLINE_LITERAL_REFS = [ + { + targetRule: "value", + referencedRules: ["false", "true", "null"], + }, +]; + +// Move selected rule definitions after another rule in the processed ABNF. +// Add more entries here to control rule ordering in generated output. +const REPOSITION_RULES_AFTER = [ + { + ruleName: "begin-array", + afterRule: "array", + }, + { + ruleName: "end-array", + afterRule: "begin-array", + }, + { + ruleName: "begin-object", + afterRule: "object", + }, + { + ruleName: "end-object", + afterRule: "begin-object", + }, + { + ruleName: "name-separator", + afterRule: "member", + }, + { + ruleName: "value-separator", + afterRule: "value", + }, + { + ruleName: "digit", + afterRule: "unescaped", + }, + { + ruleName: "digit1-9", + afterRule: "digit", + }, + { + ruleName: "hexdigit", + afterRule: "digit1-9", + }, + { + ruleName: "four-hexdigits", + afterRule: "hexdigit", + } ]; function escapeRegExp(value) { @@ -36,6 +97,20 @@ function decodeAbnfHexSequence(value) { return String.fromCodePoint(...bytes); } +function getHexRuleLiteral(source, ruleName) { + const escapedRuleName = escapeRegExp(ruleName); + const ruleRegex = new RegExp( + `^\\s*${escapedRuleName}\\s*=\\s*(%x[0-9A-Fa-f]+(?:\\.[0-9A-Fa-f]+)*)\\b.*$`, + "m", + ); + const ruleMatch = source.match(ruleRegex); + if (!ruleMatch) { + throw new Error(`Rule ${ruleName} was not found.`); + } + + return decodeAbnfHexSequence(ruleMatch[1]); +} + function inlineHexRuleAsLiteral(source, ruleName) { const escapedRuleName = escapeRegExp(ruleName); const ruleRegex = new RegExp( @@ -50,10 +125,10 @@ function inlineHexRuleAsLiteral(source, ruleName) { const hexSequence = ruleMatch[1]; const literalChars = decodeAbnfHexSequence(hexSequence); - // For backslash or other problematic characters, keep them as hex format - // ABNF doesn't support backslash escaping in quoted strings + // Keep hex format for characters that cannot be represented safely + // as a single ABNF quoted string literal. let replacement; - if (literalChars === "\\") { + if (literalChars === "\\" || literalChars === '"') { replacement = hexSequence; } else { // For other characters, escape only double quotes (not backslashes) @@ -90,6 +165,79 @@ function inlineHexRuleAsLiteral(source, ruleName) { .join("\n"); } +function inlineLiteralRefsInTargetRule(source, targetRule, referencedRules) { + const escapedTargetRule = escapeRegExp(targetRule); + const targetRuleRegex = new RegExp(`^(\\s*${escapedTargetRule}\\s*=\\s*)(.*)$`, "m"); + const match = source.match(targetRuleRegex); + if (!match) { + throw new Error(`Rule ${targetRule} was not found.`); + } + + const targetRulePrefix = match[1]; + const targetRuleRhs = match[2]; + + let updatedRhs = targetRuleRhs; + for (const referencedRule of referencedRules) { + const literalValue = getHexRuleLiteral(source, referencedRule); + const replacementLiteral = `"${literalValue.replace(/"/g, '\\"')}"`; + const referencedRuleRegex = new RegExp( + `(? { + const match = line.match(/^\s*([A-Za-z][A-Za-z0-9-]*)\s*=/); + if (!match) { + return true; + } + return !removalSet.has(match[1]); + }) + .join("\n"); +} + +function findRuleBlock(lines, ruleName) { + const ruleStartRegex = new RegExp(`^\\s*${escapeRegExp(ruleName)}\\s*=`); + const startIndex = lines.findIndex((line) => ruleStartRegex.test(line)); + if (startIndex === -1) { + throw new Error(`Rule ${ruleName} was not found.`); + } + + let endIndex = startIndex + 1; + while (endIndex < lines.length && /^\s/.test(lines[endIndex])) { + endIndex += 1; + } + + return { + startIndex, + endIndex, + blockLines: lines.slice(startIndex, endIndex), + }; +} + +function repositionRulesAfter(source, reorderings) { + let lines = source.split(/\r?\n/); + + for (const { ruleName, afterRule } of reorderings) { + const ruleBlock = findRuleBlock(lines, ruleName); + lines.splice(ruleBlock.startIndex, ruleBlock.endIndex - ruleBlock.startIndex); + + const afterRuleBlock = findRuleBlock(lines, afterRule); + lines.splice(afterRuleBlock.endIndex, 0, ...ruleBlock.blockLines); + } + + return lines.join("\n"); +} + function processAbnfSource(source) { let processed = source; @@ -97,9 +245,25 @@ function processAbnfSource(source) { processed = inlineHexRuleAsLiteral(processed, ruleName); } + for (const { targetRule, referencedRules } of INLINE_LITERAL_REFS) { + processed = inlineLiteralRefsInTargetRule(processed, targetRule, referencedRules); + processed = removeRuleDefinitions(processed, referencedRules); + } + + processed = repositionRulesAfter(processed, REPOSITION_RULES_AFTER); + return processed; } +function postProcessGeneratedHtml(htmlPath) { + const html = fs.readFileSync(htmlPath, "utf8"); + const updated = html.replace(/

[^<]*<\/h1>/, `

${FORCED_HTML_HEADER}

`); + + if (updated !== html) { + fs.writeFileSync(htmlPath, updated, "utf8"); + } +} + const args = process.argv.slice(2); const titleIndex = args.indexOf("--title"); @@ -173,4 +337,15 @@ if (result.error) { process.exit(1); } -process.exit(result.status === null ? 1 : result.status); \ No newline at end of file +if (result.status !== 0) { + process.exit(result.status === null ? 1 : result.status); +} + +try { + postProcessGeneratedHtml(outputPath); +} catch (error) { + console.error(`Failed to post-process generated HTML: ${error.message}`); + process.exit(1); +} + +process.exit(0); \ No newline at end of file diff --git a/grammar/jsonc.abnf b/grammar/JSONC.abnf similarity index 69% rename from grammar/jsonc.abnf rename to grammar/JSONC.abnf index d8ff0af..e89ae02 100644 --- a/grammar/jsonc.abnf +++ b/grammar/JSONC.abnf @@ -1,9 +1,8 @@ ; JSONC grammar with comments support (RFC 8259 extended with JavaScript-style comments) ; ; Notes: -; - Rule names and structure follow RFC 8259 ABNF snippets. -; - DIGIT and HEXDIG are core rules from RFC 5234. -; - comments are an extension not in RFC 8259. +; - Rule names and structure follow RFC 8259 ABNF. +; - Comments are an extension not in RFC 8259. ; - Trailing commas are NOT supported in this grammar. ; A JSONC-text is a serialized value surrounded by optional whitespace and comments. @@ -11,7 +10,7 @@ JSONC-text = wsc value wsc ; Whitespace with Comments: zero or more whitespace characters or comments -wsc = *(ws-char / comment) +wsc = *(ws-char / comment) ; Whitespace and/or comments ; Single whitespace character (space, tab, line feed, carriage return) ws-char = %x20 / %x09 / %x0A / %x0D ; space / tab / LF / CR @@ -19,20 +18,12 @@ ws-char = %x20 / %x09 / %x0A / %x0D ; space / tab / LF / CR ; Comments: single-line or multi-line comment = single-line-comment / multi-line-comment -; Source character: any Unicode code point, as per ECMAScript. -source-character = %x00-10FFFF - -; Comment terminators and sequences (based on ECMAScript line terminators) -comment-terminator = %x0A / %x0D / %x2028 / %x2029 ; LF / CR / LS / PS -comment-terminator-sequence = %x0D.0A / %x0A / %x0D / %x2028 / %x2029 - ; Single-line comment: starts with //, continues until line ending -; Terminator is not part of the comment body. ; Note that the single-line-comment-end is optional, allowing comments to end at the end of the file without a line terminator. single-line-comment-start = %x2F.2F ; // double solidus -single-line-comment-end = comment-terminator-sequence +single-line-comment-end = %x0D.0A / %x0A / %x0D single-line-comment = single-line-comment-start *single-line-comment-char [ single-line-comment-end ] -single-line-comment-char = %x00-09 / %x0B-0C / %x0E-2027 / %x202A-10FFFF ; Any source character except comment terminators +single-line-comment-char = %x00-09 / %x0B-0C / %x0E-10FFFF ; Any source character except CR and LF (line terminator) ; Multi-line comment: /* ... */ ; Cannot be nested. The first */ closes the comment. @@ -58,7 +49,7 @@ name-separator = wsc %x3A wsc ; : colon value-separator = wsc %x2C wsc ; , comma ; Any JSON value -value = false / null / true / object / array / number / string +value = object / array / number / string / true / false / null ; Literal names (boolean values and null) false = %x66.61.6C.73.65 ; false @@ -73,31 +64,37 @@ member = string name-separator value array = begin-array [ value *( value-separator value ) ] end-array ; Numbers -number = [ minus ] int [ frac ] [ exp ] +number = [ minus ] ( zero / ( digit1-9 *digit ) ) [ decimal-point 1*digit ] [ ( %x65 / %x45 ) [ minus / plus ] 1*digit ] decimal-point = %x2E ; . +digit = %x30-39 ; 0-9 digit1-9 = %x31-39 ; 1-9 -e = %x65 / %x45 ; e E -exp = e [ minus / plus ] 1*DIGIT -frac = decimal-point 1*DIGIT -int = zero / ( digit1-9 *DIGIT ) + minus = %x2D ; - plus = %x2B ; + zero = %x30 ; 0 +hexdigit = digit / + %x41 / %x61 / ; A a + %x42 / %x62 / ; B b + %x43 / %x63 / ; C c + %x44 / %x64 / ; D d + %x45 / %x65 / ; E e + %x46 / %x66 ; F f +four-hexdigits = 4hexdigit ; Strings string = quotation-mark *char quotation-mark char = unescaped / escape ( - %x22 / ; " quotation mark U+0022 - %x5C / ; \ reverse solidus U+005C - %x2F / ; / solidus U+002F - %x62 / ; b backspace U+0008 - %x66 / ; f form feed U+000C - %x6E / ; n line feed U+000A - %x72 / ; r carriage return U+000D - %x74 / ; t tab U+0009 - %x75 4HEXDIG ; uXXXX U+XXXX + %x22 / ; " quotation mark U+0022 + %x5C / ; \ reverse solidus U+005C + %x2F / ; / solidus U+002F + %x62 / ; b backspace U+0008 + %x66 / ; f form feed U+000C + %x6E / ; n line feed U+000A + %x72 / ; r carriage return U+000D + %x74 / ; t tab U+0009 + %x75 four-hexdigits ; uXXXX U+XXXX ) escape = %x5C ; \ diff --git a/grammar/README.md b/grammar/README.md index c852672..af7cd7a 100644 --- a/grammar/README.md +++ b/grammar/README.md @@ -4,7 +4,7 @@ This directory contains the ABNF grammar for JSONC, along with plans for generat ## Railroad Diagram Generation Plan -Generate railroad diagrams from `grammar/jsonc.abnf` using a simple one-file Node.js script. +Generate railroad diagrams from `grammar/JSONC.abnf` using a simple one-file Node.js script. Instead of building a custom ABNF parser and converter to Tab Atkins constructor calls, use: @@ -18,7 +18,7 @@ The wrapper script should: 1. Accept input ABNF path and optional output HTML path. 2. Default to: - - input: `grammar/jsonc.abnf` + - input: `grammar/JSONC.abnf` - output: `grammar/railroad-diagram.html` 3. Optionally accept `--title` to set the HTML title. 4. Execute the upstream CLI from our installed dependency. @@ -53,13 +53,13 @@ npm run railroad Generate from a specific input and output: ```bash -npm run railroad -- grammar/jsonc.abnf grammar/railroad-diagram.html +npm run railroad -- grammar/JSONC.abnf grammar/railroad-diagram.html ``` Generate with a custom title: ```bash -npm run railroad -- grammar/jsonc.abnf grammar/railroad-diagram.html --title "JSONC Grammar" +npm run railroad -- grammar/JSONC.abnf grammar/railroad-diagram.html --title "JSONC Grammar" ``` ### Notes on EOF for single-line comments diff --git a/grammar/railroad-diagram.css b/grammar/railroad-diagram.css index 87adf27..8ea78d3 100644 --- a/grammar/railroad-diagram.css +++ b/grammar/railroad-diagram.css @@ -41,17 +41,19 @@ .textbox { stroke-width: var(--text-border); stroke: black; - rx: calc(var(--grid-size) * var(--text-box-corner-radius-factor)); - ry: calc(var(--grid-size) * var(--text-box-corner-radius-factor)); + rx: 0; + ry: 0; fill: none; } .textbox.terminal { - fill: rgb(200, 200, 200); + fill: rgb(205, 240, 205); + rx: calc(var(--grid-size) * var(--text-box-corner-radius-factor)); + ry: calc(var(--grid-size) * var(--text-box-corner-radius-factor)); } .textbox.nonterminal { - fill: rgb(210, 210, 210); + fill: rgb(205, 240, 205); } .textbox-text { @@ -64,6 +66,7 @@ } .textbox-text.nonterminal { + font-weight: 700; text-decoration: underline; /* Underline non-terminals */ cursor: pointer; /* Show it's clickable */ pointer-events: auto; /* Ensure text can receive click events */ @@ -73,6 +76,12 @@ fill: #0066cc; /* Blue on hover */ } +.terminal-label { + font-family: Arial, sans-serif; + font-size: 12px; + fill: #555; +} + /* CSS-based debug visualization using HTML overlays */ .debug-overlay { position: absolute !important; @@ -176,7 +185,7 @@ } .textbox.nonterminal:hover { - fill: rgb(190, 190, 190); + fill: rgb(190, 230, 190); stroke-width: calc(var(--text-border) * 1.5); } diff --git a/grammar/railroad-diagram.html b/grammar/railroad-diagram.html index 27256f2..3c9179a 100644 --- a/grammar/railroad-diagram.html +++ b/grammar/railroad-diagram.html @@ -7,7 +7,7 @@ -

jsonc-processed Grammar

+

JSONC GRAMMAR

JSONC-text

@@ -20,7 +20,7 @@

JSONC-text

wsc

-
wsc := *(ws-char / comment)
+
wsc := *(ws-char / comment)   ; White space and comments
ws-charcomment @@ -42,69 +42,33 @@

comment

single-line-commentmulti-line-comment - -
-
-
-

source-character

-
source-character := %x00-10FFFF
-
- -%x00-10FFFF - -
-
-
-

comment-terminator

-
comment-terminator := %x0A / %x0D / %x2028 / %x2029          ; LF / CR / LS / PS
-
- -<LF><CR><LS><PS> - -
-
-
-

comment-terminator-sequence

-
comment-terminator-sequence := %x0D.0A / %x0A / %x0D / %x2028 / %x2029
-
- -<CR><LF><LF><CR><LS><PS> - -
-
-
-

single-line-comment-start

-
single-line-comment-start := %x2F.2F             ; // double solidus
-
- -//

single-line-comment-end

-
single-line-comment-end := comment-terminator-sequence
-
- -comment-terminator-sequence +
single-line-comment-end := %x0D.0A / %x0A / %x0D
+
+ +<CR><LF><LF><CR>

single-line-comment

-
single-line-comment := single-line-comment-start *single-line-comment-char [ single-line-comment-end ]
-
- -single-line-comment-startsingle-line-comment-charsingle-line-comment-end +
single-line-comment := "//" *single-line-comment-char [ single-line-comment-end ]
+
+ +//single-line-comment-charsingle-line-comment-end

single-line-comment-char

-
single-line-comment-char := %x00-09 / %x0B-0C / %x0E-2027 / %x202A-10FFFF ; Any source character except comment terminators
-
- -%x00-09%x0B-0C%x0E-2027%x202A-10FFFF +
single-line-comment-char := %x00-09 / %x0B-0C / %x0E-10FFFF ; Any source character except CR and LF (line terminator)
+
+ +%x00-09%x0B-0C%x0E-10FFFF
@@ -155,12 +119,30 @@

not-forward-slash-or-asterisk-char

-
-

begin-array

-
begin-array := wsc %x5B wsc  ; [ left square bracket
-
+
+

value

+
value := object / array / number / string / "true" / "false" / "null"
+
+ +objectarraynumberstringtruefalsenull + +
+
+
+

value-separator

+
value-separator := wsc %x2C wsc  ; , comma
+
-wsc[wsc +wsc,wsc + +
+
+
+

object

+
object := begin-object [ member *( value-separator member ) ] end-object
+
+ +begin-objectmembervalue-separatormemberend-object
@@ -170,15 +152,6 @@

begin-object

wsc{wsc - -
-
-
-

end-array

-
end-array := wsc %x5D wsc  ; ] right square bracket
-
- -wsc]wsc
@@ -188,69 +161,6 @@

end-object

wsc}wsc - -
-
-
-

name-separator

-
name-separator := wsc %x3A wsc  ; : colon
-
- -wsc:wsc - -
-
-
-

value-separator

-
value-separator := wsc %x2C wsc  ; , comma
-
- -wsc,wsc - -
-
-
-

value

-
value := false / null / true / object / array / number / string
-
- -falsenulltrueobjectarraynumberstring - -
-
-
-

false

-
false := %x66.61.6C.73.65   ; false
-
- -false - -
-
-
-

true

-
true := %x74.72.75.65      ; true
-
- -true - -
-
-
-

null

-
null := %x6E.75.6C.6C      ; null
-
- -null - -
-
-
-

object

-
object := begin-object [ member *( value-separator member ) ] end-object
-
- -begin-objectmembervalue-separatormemberend-object
@@ -260,6 +170,15 @@

member

stringname-separatorvalue + +
+
+
+

name-separator

+
name-separator := wsc %x3A wsc  ; : colon
+
+ +wsc:wsc
@@ -272,102 +191,39 @@

array

-
-

number

-
number := [ minus ] int [ frac ] [ exp ]
-
- -minusintfracexp - -
-
-
-

decimal-point

-
decimal-point := %x2E        ; .
-
- -. - -
-
-
-

digit1-9

-
digit1-9 := %x31-39          ; 1-9
-
- -%x31-39 - -
-
-
-

e

-
e := %x65 / %x45             ; e E
-
- -eE - -
-
-
-

exp

-
exp := e [ minus / plus ] 1*DIGIT
-
- -eminusplusDIGIT - -
-
-
-

frac

-
frac := decimal-point 1*DIGIT
-
- -decimal-pointDIGIT - -
-
-
-

int

-
int := zero / ( digit1-9 *DIGIT )
-
- -zerodigit1-9DIGIT - -
-
-
-

minus

-
minus := %x2D                ; -
-
- -- +
+

begin-array

+
begin-array := wsc %x5B wsc  ; [ left square bracket
+
+ +wsc[wsc
-
-

plus

-
plus := %x2B                 ; +
-
- -+ +
+

end-array

+
end-array := wsc %x5D wsc  ; ] right square bracket
+
+ +wsc]wsc
-
-

zero

-
zero := %x30                 ; 0
-
- -0 +
+

number

+
number := [ "-" ] ( "0" / ( digit1-9 *digit ) ) [ "." 1*digit ] [ ( %x65 / %x45 ) [ "-" / "+" ] 1*digit ]
+
+ +-0digit1-9digit.digiteE-+digit

string

-
string := quotation-mark *char quotation-mark
-
- -quotation-markcharquotation-mark +
string := %x22 *char %x22
+
+ +"char"
@@ -375,28 +231,19 @@

string

char

char := unescaped /
  %x5C (
- %x22 /             ; "    quotation mark  U+0022
- %x5C /             ; \    reverse solidus U+005C
- %x2F /             ; /    solidus         U+002F
- %x62 /             ; b    backspace       U+0008
- %x66 /             ; f    form feed       U+000C
- %x6E /             ; n    line feed       U+000A
- %x72 /             ; r    carriage return U+000D
- %x74 /             ; t    tab             U+0009
- %x75 4HEXDIG       ; uXXXX                U+XXXX
+ %x22 /              ; "    quotation mark  U+0022
+ %x5C /              ; \    reverse solidus U+005C
+ %x2F /              ; /    solidus         U+002F
+ %x62 /              ; b    backspace       U+0008
+ %x66 /              ; f    form feed       U+000C
+ %x6E /              ; n    line feed       U+000A
+ %x72 /              ; r    carriage return U+000D
+ %x74 /              ; t    tab             U+0009
+ %x75 four-hexdigits ; uXXXX                U+XXXX
  )
-
- -unescaped\"" quotation mark U+0022\\ reverse solidus U+005C// solidus U+002Fbb backspace U+0008ff form feed U+000Cnn line feed U+000Arr carriage return U+000Dtt tab U+0009uuXXXX U+XXXXHEXDIGHEXDIGHEXDIGHEXDIG - -
-
-
-

quotation-mark

-
quotation-mark := %x22       ; "
-
- -" +
+ +unescaped\"quotation markU+0022\reverse solidusU+005C/solidusU+002FbbackspaceU+0008fform feedU+000Cnline feedU+000Arcarriage returnU+000DttabU+0009uU+XXXXfour-hexdigits
@@ -406,6 +253,48 @@

unescaped

%x20-21%x23-5B%x5D-10FFFF + +
+
+
+

digit

+
digit := %x30-39          ; 0-9
+
+ +%x30-39 + +
+
+
+

digit1-9

+
digit1-9 := %x31-39          ; 1-9
+
+ +%x31-39 + +
+
+
+

hexdigit

+
hexdigit := digit /
+  %x41 / %x61 /             ; A a
+  %x42 / %x62 /             ; B b
+  %x43 / %x63 /             ; C c
+  %x44 / %x64 /             ; D d
+  %x45 / %x65 /             ; E e
+  %x46 / %x66               ; F f
+
+ +digitAaBbCcDdEeFf + +
+
+
+

four-hexdigits

+
four-hexdigits := 4hexdigit
+
+ +hexdigithexdigithexdigithexdigit
diff --git a/index.markdown b/index.markdown index c92441a..84dae61 100644 --- a/index.markdown +++ b/index.markdown @@ -109,6 +109,16 @@ Consumers that support JSONC SHOULD accept `application/jsonc`. - Configuration Files: JSONC is useful for configuration files where comments can provide explanations or instructions. - Data Annotation: JSONC allows developers to annotate JSON data with comments for better understanding and maintenance. +## Formal grammar + +The formal grammar of JSONC is available here: + +- [JSONC ABNF Grammar]({{ '/grammar/JSONC.abnf' | relative_url }}) + +For a visual representation of the grammar rules, see the JSONC railroad diagram page: + +- [JSONC Railroad Diagram]({{ '/grammar/railroad-diagram.html' | relative_url }}) + ## Tools and Libraries Several tools and libraries support JSONC, enabling developers to parse and generate JSONC data easily. diff --git a/submodules/railroad-diagram-generator-js b/submodules/railroad-diagram-generator-js index e3e2166..30c46a5 160000 --- a/submodules/railroad-diagram-generator-js +++ b/submodules/railroad-diagram-generator-js @@ -1 +1 @@ -Subproject commit e3e21669af406cad1c94c75774d891a9f6447363 +Subproject commit 30c46a509addb2bd577e38e25651297ce940def7