cpp-peglib/grammar/cpp-peglib.peg

160 lines
3.8 KiB
Plaintext
Raw Normal View History

# Setup PEG syntax parser
Grammar <- Spacing Definition+ EndOfFile
Definition <-
Ignore IdentCont Parameters LEFTARROW Expression Instruction?
/ Ignore Identifier LEFTARROW Expression Instruction?
Expression <- Sequence (SLASH Sequence)*
Sequence <- (CUT / Prefix)*
Prefix <- (AND / NOT)? SuffixWithLabel
SuffixWithLabel <- Suffix (LABEL Identifier)?
Suffix <- Primary Loop?
Loop <- QUESTION / STAR / PLUS / Repetition
Primary <-
Ignore IdentCont Arguments !LEFTARROW
/ Ignore Identifier !(Parameters? LEFTARROW)
/ OPEN Expression CLOSE
/ BeginTok Expression EndTok
/ BeginCapScope Expression EndCapScope
/ BeginCap Expression EndCap
2024-09-01 19:53:10 +00:00
/ CapScope
/ BackRef
2024-09-01 19:53:10 +00:00
/ DictionaryI
/ LiteralI
/ Dictionary
/ Literal
/ NegatedClassI
/ NegatedClass
/ ClassI
/ Class
2024-09-01 19:53:10 +00:00
/ DOT
Identifier <- IdentCont Spacing
2024-09-01 19:53:10 +00:00
IdentCont <- <IdentStart IdentRest*>
IdentStart <- !"↑" !"⇑" ([a-zA-Z_%] / [\u0080-\uFFFF])
IdentRest <- IdentStart / [0-9]
Dictionary <- LiteralD (PIPE LiteralD)+
2024-09-01 19:53:10 +00:00
DictionaryI <- LiteralID (PIPE LiteralID)*
lit_ope <-
['] <(!['] Char)*> ['] Spacing
/ ["] <(!["] Char)*> ["] Spacing
Literal <- lit_ope
LiteralD <- lit_ope
2024-09-01 19:53:10 +00:00
lit_case_ignore_ope <-
['] <(!['] Char)*> "'i" Spacing
/ ["] <(!["] Char)*> '"i' Spacing
2024-09-01 19:53:10 +00:00
LiteralI <- lit_case_ignore_ope
LiteralID <- lit_case_ignore_ope
# NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
Class <- '[' !'^' <(!']' Range)+> ']' Spacing
ClassI <- '[' !'^' <(!']' Range)+> ']i' Spacing
NegatedClass <- "[^" <(!']' Range)+> ']' Spacing
NegatedClassI <- "[^" <(!']' Range)+> ']i' Spacing
Range <- (Char '-' ! ']' Char) / Char
Char <-
'\\' [nrt'\"[\]\\^]
/ '\\' [0-3] [0-7] [0-7]
/ '\\' [0-7] [0-7]?
/ "\\x" [0-9a-fA-F] [0-9a-fA-F]?
/ "\\u" (((('0' [0-9a-fA-F]) / "10") [0-9a-fA-F]{4,4}) / [0-9a-fA-F]{4,5})
/ !'\\' .
Repetition <- BeginBracket RepetitionRange EndBracket
RepetitionRange <-
Number COMMA Number
/ Number COMMA
/ Number
/ COMMA Number
Number <- [0-9]+ Spacing
2024-09-01 19:53:10 +00:00
CapScope <- BeginCapScope Expression EndCapScope
LEFTARROW <- ("<-" / "←") Spacing
~SLASH <- '/' Spacing
~PIPE <- '|' Spacing
AND <- '&' Spacing
NOT <- '!' Spacing
QUESTION <- '?' Spacing
STAR <- '*' Spacing
PLUS <- '+' Spacing
~OPEN <- '(' Spacing
~CLOSE <- ')' Spacing
DOT <- '.' Spacing
CUT <- "↑" Spacing
~LABEL <- ('^' / "⇑") Spacing
~Spacing <- (Space / Comment)*
Comment <- '#' (!EndOfLine . )*
Space <- ' ' / '\t' / EndOfLine
EndOfLine <- "\r\n" / '\n' / '\r'
EndOfFile <- ! .
~BeginTok <- '<' Spacing
~EndTok <- '>' Spacing
~BeginCapScope <- '$' '(' Spacing
~EndCapScope <- ')' Spacing
BeginCap <- '$' <IdentCont> '<' Spacing
~EndCap <- '>' Spacing
BackRef <- '$' <IdentCont> Spacing
IGNORE <- '~'
Ignore <- IGNORE?
Parameters <- OPEN Identifier (COMMA Identifier)* CLOSE
Arguments <- OPEN Expression (COMMA Expression)* CLOSE
~COMMA <- ',' Spacing
# Instruction grammars
Instruction <-
BeginBracket (InstructionItem (InstructionItemSeparator InstructionItem)*)? EndBracket
InstructionItem <- PrecedenceClimbing / ErrorMessage / NoAstOpt
~InstructionItemSeparator <- ';' Spacing
~SpacesZom <- Space*
~SpacesOom <- Space+
~BeginBracket <- '{' Spacing
~EndBracket <- '}' Spacing
# PrecedenceClimbing instruction
PrecedenceClimbing <- "precedence" SpacesOom PrecedenceInfo (SpacesOom PrecedenceInfo)* SpacesZom
PrecedenceInfo <- PrecedenceAssoc (~SpacesOom PrecedenceOpe)+
PrecedenceOpe <-
['] <(!(Space / [']) Char)*> [']
/ ["] <(!(Space / ["]) Char)*> ["]
/ <(!(PrecedenceAssoc / Space / '}') . )+>
PrecedenceAssoc <- [LR]
# Error message instruction
ErrorMessage <- "message" SpacesOom LiteralD SpacesZom
# No Ast node optimization instruction
NoAstOpt <- "no_ast_opt" SpacesZom