diff --git a/grammar/cpp-peglib.peg b/grammar/cpp-peglib.peg new file mode 100644 index 0000000..060e1d6 --- /dev/null +++ b/grammar/cpp-peglib.peg @@ -0,0 +1,145 @@ +# Setup PEG syntax parser +Grammar <- Spacing Definition+ EndOfFile + +Definition <- + Ignore IdentCont Parameters LEFTARROW Expression Instruction? + / Ignore Identifier LEFTARROW Expression Instruction? + +Expression <- Sequence (SLASH Sequence)* + +Sequence <- (CUT / Prefix)* + +Prefix <- (AND / NOT)? SuffixWithLabel + +SuffixWithLabel <- Suffix (LABEL Identifier)? + +Suffix <- Primary Loop? + +Loop <- QUESTION / STAR / PLUS / Repetition + +Primary <- + Ignore IdentCont Arguments !LEFTARROW + / Ignore Identifier !(Parameters? LEFTARROW) + / OPEN Expression CLOSE + / BeginTok Expression EndTok + / BeginCapScope Expression EndCapScope + / BeginCap Expression EndCap + / BackRef + / LiteralI + / Dictionary + / Literal + / NegatedClass + / Class + / DOT + +Identifier <- IdentCont Spacing + +IdentCont <- IdentStart IdentRest* + +IdentStart <- !"↑" !"⇑" ([a-zA-Z_%] / [\u0080-\uFFFF]) + +IdentRest <- IdentStart / [0-9] + +Dictionary <- LiteralD (PIPE LiteralD)+ + +lit_ope <- + ['] <(!['] Char)*> ['] Spacing + / ["] <(!["] Char)*> ["] Spacing + +Literal <- lit_ope + +LiteralD <- lit_ope + +LiteralI <- + ['] <(!['] Char)*> "'i" Spacing + / ["] <(!["] Char)*> '"i' Spacing + +# NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'. +Class <- '[' !'^' <(!']' Range)+> ']' Spacing +NegatedClass <- "[^" <(!']' Range)+> ']' Spacing + +Range <- (Char '-' ! ']' Char) / Char + +Char <- + '\\' [nrt'\"[\]\\^] + / '\\' [0-3] [0-7] [0-7] + / '\\' [0-7] [0-7]? + / "\\x" [0-9a-fA-F] [0-9a-fA-F]? + / "\\u" (((('0' [0-9a-fA-F]) / "10") [0-9a-fA-F]{4,4}) / [0-9a-fA-F]{4,5}) + / !'\\' . + +Repetition <- BeginBlacket RepetitionRange EndBlacket + +RepetitionRange <- + Number COMMA Number + / Number COMMA + / Number + / COMMA Number + +Number <- [0-9]+ Spacing + +LEFTARROW <- ("<-" / "←") Spacing + +~SLASH <- '/' Spacing +~PIPE <- '|' Spacing +AND <- '&' Spacing +NOT <- '!' Spacing +QUESTION <- '?' Spacing +STAR <- '*' Spacing +PLUS <- '+' Spacing +~OPEN <- '(' Spacing +~CLOSE <- ')' Spacing +DOT <- '.' Spacing + +CUT <- "↑" Spacing +~LABEL <- ('^' / "⇑") Spacing + +~Spacing <- (Space / Comment)* +Comment <- '#' (!EndOfLine . )* +Space <- ' ' / '\t' / EndOfLine +EndOfLine <- "\r\n" / '\n' / '\r' +EndOfFile <- ! . + +~BeginTok <- '<' Spacing +~EndTok <- '>' Spacing + +~BeginCapScope <- '$' '(' Spacing +~EndCapScope <- ')' Spacing + +BeginCap <- '$' '<' Spacing +~EndCap <- '>' Spacing + +BackRef <- '$' Spacing + +IGNORE <- '~' + +Ignore <- IGNORE? +Parameters <- OPEN Identifier (COMMA Identifier)* CLOSE +Arguments <- OPEN Expression (COMMA Expression)* CLOSE +~COMMA <- ',' Spacing + +# Instruction grammars +Instruction <- + BeginBlacket (InstructionItem (InstructionItemSeparator InstructionItem)*)? EndBlacket +InstructionItem <- PrecedenceClimbing / ErrorMessage / NoAstOpt +~InstructionItemSeparator <- ';' Spacing + +~SpacesZom <- Space* +~SpacesOom <- Space+ +~BeginBlacket <- '{' Spacing +~EndBlacket <- '}' Spacing + +# PrecedenceClimbing instruction +PrecedenceClimbing <- "precedence" SpacesOom PrecedenceInfo (SpacesOom PrecedenceInfo)* SpacesZom +PrecedenceInfo <- PrecedenceAssoc (~SpacesOom PrecedenceOpe)+ +PrecedenceOpe <- + ['] <(!(Space / [']) Char)*> ['] + / ["] <(!(Space / ["]) Char)*> ["] + / <(!(PrecedenceAssoc / Space / '}') . )+> +PrecedenceAssoc <- [LR] + +# Error message instruction +ErrorMessage <- "message" SpacesOom LiteralD SpacesZom + +# No Ast node optimazation instruction +NoAstOpt <- "no_ast_opt" SpacesZom