Add cpp-peglib grammar extracted from peglib.h (#222)

This commit is contained in:
Domingo Alvarez Duarte 2022-06-25 19:19:52 +02:00 committed by GitHub
parent fed85fe14d
commit 0366380091
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

145
grammar/cpp-peglib.peg Normal file
View File

@ -0,0 +1,145 @@
# Setup PEG syntax parser
Grammar <- Spacing Definition+ EndOfFile
Definition <-
Ignore IdentCont Parameters LEFTARROW Expression Instruction?
/ Ignore Identifier LEFTARROW Expression Instruction?
Expression <- Sequence (SLASH Sequence)*
Sequence <- (CUT / Prefix)*
Prefix <- (AND / NOT)? SuffixWithLabel
SuffixWithLabel <- Suffix (LABEL Identifier)?
Suffix <- Primary Loop?
Loop <- QUESTION / STAR / PLUS / Repetition
Primary <-
Ignore IdentCont Arguments !LEFTARROW
/ Ignore Identifier !(Parameters? LEFTARROW)
/ OPEN Expression CLOSE
/ BeginTok Expression EndTok
/ BeginCapScope Expression EndCapScope
/ BeginCap Expression EndCap
/ BackRef
/ LiteralI
/ Dictionary
/ Literal
/ NegatedClass
/ Class
/ DOT
Identifier <- IdentCont Spacing
IdentCont <- IdentStart IdentRest*
IdentStart <- !"↑" !"⇑" ([a-zA-Z_%] / [\u0080-\uFFFF])
IdentRest <- IdentStart / [0-9]
Dictionary <- LiteralD (PIPE LiteralD)+
lit_ope <-
['] <(!['] Char)*> ['] Spacing
/ ["] <(!["] Char)*> ["] Spacing
Literal <- lit_ope
LiteralD <- lit_ope
LiteralI <-
['] <(!['] Char)*> "'i" Spacing
/ ["] <(!["] Char)*> '"i' Spacing
# NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
Class <- '[' !'^' <(!']' Range)+> ']' Spacing
NegatedClass <- "[^" <(!']' Range)+> ']' Spacing
Range <- (Char '-' ! ']' Char) / Char
Char <-
'\\' [nrt'\"[\]\\^]
/ '\\' [0-3] [0-7] [0-7]
/ '\\' [0-7] [0-7]?
/ "\\x" [0-9a-fA-F] [0-9a-fA-F]?
/ "\\u" (((('0' [0-9a-fA-F]) / "10") [0-9a-fA-F]{4,4}) / [0-9a-fA-F]{4,5})
/ !'\\' .
Repetition <- BeginBlacket RepetitionRange EndBlacket
RepetitionRange <-
Number COMMA Number
/ Number COMMA
/ Number
/ COMMA Number
Number <- [0-9]+ Spacing
LEFTARROW <- ("<-" / "←") Spacing
~SLASH <- '/' Spacing
~PIPE <- '|' Spacing
AND <- '&' Spacing
NOT <- '!' Spacing
QUESTION <- '?' Spacing
STAR <- '*' Spacing
PLUS <- '+' Spacing
~OPEN <- '(' Spacing
~CLOSE <- ')' Spacing
DOT <- '.' Spacing
CUT <- "↑" Spacing
~LABEL <- ('^' / "⇑") Spacing
~Spacing <- (Space / Comment)*
Comment <- '#' (!EndOfLine . )*
Space <- ' ' / '\t' / EndOfLine
EndOfLine <- "\r\n" / '\n' / '\r'
EndOfFile <- ! .
~BeginTok <- '<' Spacing
~EndTok <- '>' Spacing
~BeginCapScope <- '$' '(' Spacing
~EndCapScope <- ')' Spacing
BeginCap <- '$' <IdentCont> '<' Spacing
~EndCap <- '>' Spacing
BackRef <- '$' <IdentCont> Spacing
IGNORE <- '~'
Ignore <- IGNORE?
Parameters <- OPEN Identifier (COMMA Identifier)* CLOSE
Arguments <- OPEN Expression (COMMA Expression)* CLOSE
~COMMA <- ',' Spacing
# Instruction grammars
Instruction <-
BeginBlacket (InstructionItem (InstructionItemSeparator InstructionItem)*)? EndBlacket
InstructionItem <- PrecedenceClimbing / ErrorMessage / NoAstOpt
~InstructionItemSeparator <- ';' Spacing
~SpacesZom <- Space*
~SpacesOom <- Space+
~BeginBlacket <- '{' Spacing
~EndBlacket <- '}' Spacing
# PrecedenceClimbing instruction
PrecedenceClimbing <- "precedence" SpacesOom PrecedenceInfo (SpacesOom PrecedenceInfo)* SpacesZom
PrecedenceInfo <- PrecedenceAssoc (~SpacesOom PrecedenceOpe)+
PrecedenceOpe <-
['] <(!(Space / [']) Char)*> [']
/ ["] <(!(Space / ["]) Char)*> ["]
/ <(!(PrecedenceAssoc / Space / '}') . )+>
PrecedenceAssoc <- [LR]
# Error message instruction
ErrorMessage <- "message" SpacesOom LiteralD SpacesZom
# No Ast node optimazation instruction
NoAstOpt <- "no_ast_opt" SpacesZom