mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2024-12-22 11:55:30 +00:00
160 lines
3.8 KiB
Plaintext
160 lines
3.8 KiB
Plaintext
# Setup PEG syntax parser
|
|
Grammar <- Spacing Definition+ EndOfFile
|
|
|
|
Definition <-
|
|
Ignore IdentCont Parameters LEFTARROW Expression Instruction?
|
|
/ Ignore Identifier LEFTARROW Expression Instruction?
|
|
|
|
Expression <- Sequence (SLASH Sequence)*
|
|
|
|
Sequence <- (CUT / Prefix)*
|
|
|
|
Prefix <- (AND / NOT)? SuffixWithLabel
|
|
|
|
SuffixWithLabel <- Suffix (LABEL Identifier)?
|
|
|
|
Suffix <- Primary Loop?
|
|
|
|
Loop <- QUESTION / STAR / PLUS / Repetition
|
|
|
|
Primary <-
|
|
Ignore IdentCont Arguments !LEFTARROW
|
|
/ Ignore Identifier !(Parameters? LEFTARROW)
|
|
/ OPEN Expression CLOSE
|
|
/ BeginTok Expression EndTok
|
|
/ BeginCapScope Expression EndCapScope
|
|
/ BeginCap Expression EndCap
|
|
/ CapScope
|
|
/ BackRef
|
|
/ DictionaryI
|
|
/ LiteralI
|
|
/ Dictionary
|
|
/ Literal
|
|
/ NegatedClassI
|
|
/ NegatedClass
|
|
/ ClassI
|
|
/ Class
|
|
/ DOT
|
|
|
|
Identifier <- IdentCont Spacing
|
|
|
|
IdentCont <- <IdentStart IdentRest*>
|
|
|
|
IdentStart <- !"↑" !"⇑" ([a-zA-Z_%] / [\u0080-\uFFFF])
|
|
|
|
IdentRest <- IdentStart / [0-9]
|
|
|
|
Dictionary <- LiteralD (PIPE LiteralD)+
|
|
|
|
DictionaryI <- LiteralID (PIPE LiteralID)*
|
|
|
|
lit_ope <-
|
|
['] <(!['] Char)*> ['] Spacing
|
|
/ ["] <(!["] Char)*> ["] Spacing
|
|
|
|
Literal <- lit_ope
|
|
|
|
LiteralD <- lit_ope
|
|
|
|
lit_case_ignore_ope <-
|
|
['] <(!['] Char)*> "'i" Spacing
|
|
/ ["] <(!["] Char)*> '"i' Spacing
|
|
|
|
LiteralI <- lit_case_ignore_ope
|
|
|
|
LiteralID <- lit_case_ignore_ope
|
|
|
|
# NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
|
|
Class <- '[' !'^' <(!']' Range)+> ']' Spacing
|
|
ClassI <- '[' !'^' <(!']' Range)+> ']i' Spacing
|
|
NegatedClass <- "[^" <(!']' Range)+> ']' Spacing
|
|
NegatedClassI <- "[^" <(!']' Range)+> ']i' Spacing
|
|
|
|
Range <- (Char '-' ! ']' Char) / Char
|
|
|
|
Char <-
|
|
'\\' [nrt'\"[\]\\^]
|
|
/ '\\' [0-3] [0-7] [0-7]
|
|
/ '\\' [0-7] [0-7]?
|
|
/ "\\x" [0-9a-fA-F] [0-9a-fA-F]?
|
|
/ "\\u" (((('0' [0-9a-fA-F]) / "10") [0-9a-fA-F]{4,4}) / [0-9a-fA-F]{4,5})
|
|
/ !'\\' .
|
|
|
|
Repetition <- BeginBracket RepetitionRange EndBracket
|
|
|
|
RepetitionRange <-
|
|
Number COMMA Number
|
|
/ Number COMMA
|
|
/ Number
|
|
/ COMMA Number
|
|
|
|
Number <- [0-9]+ Spacing
|
|
|
|
CapScope <- BeginCapScope Expression EndCapScope
|
|
|
|
LEFTARROW <- ("<-" / "←") Spacing
|
|
|
|
~SLASH <- '/' Spacing
|
|
~PIPE <- '|' Spacing
|
|
AND <- '&' Spacing
|
|
NOT <- '!' Spacing
|
|
QUESTION <- '?' Spacing
|
|
STAR <- '*' Spacing
|
|
PLUS <- '+' Spacing
|
|
~OPEN <- '(' Spacing
|
|
~CLOSE <- ')' Spacing
|
|
DOT <- '.' Spacing
|
|
|
|
CUT <- "↑" Spacing
|
|
~LABEL <- ('^' / "⇑") Spacing
|
|
|
|
~Spacing <- (Space / Comment)*
|
|
Comment <- '#' (!EndOfLine . )*
|
|
Space <- ' ' / '\t' / EndOfLine
|
|
EndOfLine <- "\r\n" / '\n' / '\r'
|
|
EndOfFile <- ! .
|
|
|
|
~BeginTok <- '<' Spacing
|
|
~EndTok <- '>' Spacing
|
|
|
|
~BeginCapScope <- '$' '(' Spacing
|
|
~EndCapScope <- ')' Spacing
|
|
|
|
BeginCap <- '$' <IdentCont> '<' Spacing
|
|
~EndCap <- '>' Spacing
|
|
|
|
BackRef <- '$' <IdentCont> Spacing
|
|
|
|
IGNORE <- '~'
|
|
|
|
Ignore <- IGNORE?
|
|
Parameters <- OPEN Identifier (COMMA Identifier)* CLOSE
|
|
Arguments <- OPEN Expression (COMMA Expression)* CLOSE
|
|
~COMMA <- ',' Spacing
|
|
|
|
# Instruction grammars
|
|
Instruction <-
|
|
BeginBracket (InstructionItem (InstructionItemSeparator InstructionItem)*)? EndBracket
|
|
InstructionItem <- PrecedenceClimbing / ErrorMessage / NoAstOpt
|
|
~InstructionItemSeparator <- ';' Spacing
|
|
|
|
~SpacesZom <- Space*
|
|
~SpacesOom <- Space+
|
|
~BeginBracket <- '{' Spacing
|
|
~EndBracket <- '}' Spacing
|
|
|
|
# PrecedenceClimbing instruction
|
|
PrecedenceClimbing <- "precedence" SpacesOom PrecedenceInfo (SpacesOom PrecedenceInfo)* SpacesZom
|
|
PrecedenceInfo <- PrecedenceAssoc (~SpacesOom PrecedenceOpe)+
|
|
PrecedenceOpe <-
|
|
['] <(!(Space / [']) Char)*> [']
|
|
/ ["] <(!(Space / ["]) Char)*> ["]
|
|
/ <(!(PrecedenceAssoc / Space / '}') . )+>
|
|
PrecedenceAssoc <- [LR]
|
|
|
|
# Error message instruction
|
|
ErrorMessage <- "message" SpacesOom LiteralD SpacesZom
|
|
|
|
# No Ast node optimization instruction
|
|
NoAstOpt <- "no_ast_opt" SpacesZom
|