import std/options import std/strutils import std/strformat import std/collections/sequtils import std/sugar import std/collections/tables import results import fusion/matching import fp/maybe {.experimental: "caseStmtMacros".} type ParserState* = ref object stream: string position, lastPosition: int Token* = ref object value*: char Parser* = ref object state: ParserState tokens: seq[Token] ParseErrorKind = enum choiceMismatchErr charMismatchErr endOfStringErr ParserError = ref object kind: ParseErrorKind unexpected: string expected: string index: int parser: Parser ParserResult* = Result[Parser, ParserError] Builder*[T] = tuple[ parser: Parser, tree: seq[T] ] BuilderResult*[T] = Result[Builder[T], (Builder[T], string)] proc indentKey(x: string, count: int): string = var y = x.indent(count) y.delete(0..count - 1) y proc `$`*(x: Token): string = &"""Token( value: {x.value}, )""" proc `$`*(x: ParserState): string = &"""ParserState( stream: "{x.stream}", position: {x.position}, lastPosition: {x.lastPosition}, )""" proc `$`*(x: Parser): string = &"""Parser( state: {indentKey($x.state, 2)}, tokens: {indentKey($x.tokens, 2)}, )""" proc prettyExpectedSet(x: set[char]): string = case x: of AllChars: "AllChars {'\x00'..'\xFF'}" of Digits: "Digits {'0'..'9'}" of HexDigits: "HexDigits {'0'..'9', 'A'..'F', 'a'..'f'}" of Letters: "Letters {'A'..'Z', 'a'..'z'}" of Newlines: "Newlines {'\r', '\n'}" of Whitespace: "Whitespace {' ', '\t', '\v', '\r', '\n', '\f'}" else: $x proc `$`*(x: ParserError): string = case x: of charMismatchErr(expected: @expected, parser: @parser, index: @index, unexpected: @unexpected): # TODO: Only works for single line right now let original = parser.state.stream let errSpace = " ".repeat(max(0, index)) &"""Parsing Error (Character Mismatch Error): {original} {errSpace}^ Expected '{expected}' but got '{unexpected}'""" of choiceMismatchErr(expected: @expected, parser: @parser, index: @index, unexpected: @unexpected): let original = parser.state.stream let errSpace = " ".repeat(max(0, index)) &"""Parsing Error (Character Mismatch Error): {original} {errSpace}^ Expected '{expected}' but got '{unexpected}'""" of endOfStringErr(parser: @parser, index: @index): let original = parser.state.stream let errSpace = " ".repeat(max(0, index)) &"""Parsing Error (EndOfString Expected): {original} {errSpace}^ Expected 'EndOfString'""" else: "ParseError" proc initParser*(str: string): Parser = Parser( state: ParserState( stream: str, position: -1, lastPosition: 0, ), tokens: newSeq[Token](), ) proc initParserResult*(str: string): ParserResult = ParserResult.ok(initParser(str)) func ch*(expectedChars: set[char]): (Parser -> ParserResult) {.inline.} = return func(parser: Parser): ParserResult = let state = parser.state let newIndex = state.position + 1 if newIndex > (state.stream.len - 1): return err(ParserError( kind: endOfStringErr, expected: &"{expectedChars.prettyExpectedSet()}", index: newIndex, parser: parser, )) else: let foundChar = state.stream[newIndex] if foundChar in expectedChars: return Parser( state: ParserState( stream: state.stream, position: newIndex, lastPosition: parser.state.position, ), tokens: parser.tokens & Token(value: foundChar) ).ok() else: return err(ParserError( kind: charMismatchErr, unexpected: &"{foundChar}", expected: &"{expectedChars.prettyExpectedSet()}", index: newIndex, parser: parser, )) func ch*(expectedChar: char): (Parser -> ParserResult) {.inline.} = return func(parser: Parser): ParserResult = let state = parser.state let newIndex = state.position + 1 if newIndex > (state.stream.len - 1): return err(ParserError( kind: endOfStringErr, expected: &"{expectedChar}", index: newIndex, parser: parser, )) else: let foundChar = state.stream[newIndex] if expectedChar == foundChar: return Parser( state: ParserState( stream: state.stream, position: newIndex, lastPosition: parser.state.position, ), tokens: parser.tokens & Token(value: foundChar) ).ok() else: return err(ParserError( kind: charMismatchErr, unexpected: &"{foundChar}", expected: &"{expectedChar}", index: newIndex, parser: parser, )) func str*(s: string): (Parser -> ParserResult) {.inline.} = return func(parser: Parser): ParserResult = var p = parser.ok() for c in s.items: p = p.flatMap(ch(c)) return p proc endOfStream*(parser: Parser): ParserResult = if parser.state.position == parser.state.stream.len - 1: ok(parser) else: err(ParserError( kind: endOfStringErr, expected: &"EndOfString", index: parser.state.position, parser: parser, )) proc newline*(parser: Parser): ParserResult = parser .endOfStream() .flatMap(ch(NewLines)) func ignore*(parserFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} = ## Parse characters but throw success tokens away return proc(parser: Parser): ParserResult = return parserFn(parser) .map((x: Parser) => Parser( state: x.state, tokens: parser.tokens, )) func optional*(parserFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} = ## Parse characters and ignore failure return proc(parser: Parser): ParserResult = let newParser = parserFn(parser) if newParser.isOk(): newParser else: parser.ok() func manyUntil*(acceptFn: Parser -> ParserResult, stopFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} = ## Parse characters but throw success tokens away return proc(parser: Parser): ParserResult = var res: ParserResult = acceptFn(parser) while res.isOk() and res.flatMap(stopFn).isErr(): res = res.flatMap(acceptFn) return res func anyUntil*(stopFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} = manyUntil(ch(AllChars), stopFn) func choice*(parsers: seq[Parser -> ParserResult]): (Parser -> ParserResult) {.inline.} = return proc(parser: Parser): ParserResult = var errors: seq[ParserResult] = newSeq[ParserResult]() var found = Nothing[ParserResult]() for fn in parsers: let fnResult: ParserResult = fn(parser) if fnResult.isOk(): found = fnResult.just break else: errors = errors & fnResult return found .fold( proc(): ParserResult = let prettyErrors = errors.map((x: ParserResult) => x.error().expected) err(ParserError( kind: choiceMismatchErr, expected: &"Choice ({prettyErrors})", unexpected: errors[0].error().unexpected, parser: parser, )), proc(x: ParserResult): ParserResult = x, ) proc `+`*(parserFnA: Parser -> ParserResult, parserFnB: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} = ## Parse characters and ignore failure return proc(parser: Parser): ParserResult = parserFnA(parser).flatMap(parserFnB) proc parseSeq*(parser: ParserResult, xs: seq[Parser -> ParserResult]): ParserResult = xs.foldl(a.flatMap(b), parser) proc foldTokens*[T]( parserResult: ParserResult, onError: ParserError -> T, onSuccess: seq[Token] -> T, ): T = if parserResult.isOk(): onSuccess(parserResult.unsafeGet().tokens) else: let err = parserResult.error() onError(err) proc merge[T](t: Builder[T], parser: Parser, tree: seq[T]): Builder[T] = Builder[T](( parser, tree )) proc mapTree[T](builder: BuilderResult[T], fn: seq[T] -> seq[T]): BuilderResult[T] = builder.map(proc(b: Builder[T]): Builder[T] = Builder(( parser: b[0], tree: fn(b[1]), ))) proc applyParsers*[T]( builder: Builder[T], parsers: seq[Parser -> ParserResult], tokenFoldFn: (seq[Token], seq[T]) -> seq[T], ): BuilderResult[T] = # proc nested(b: Builder[T]): BuilderResult[T] = let newParser = ParserResult.ok(Parser( state: builder[0].state, tokens: @[] )) .parseSeq(parsers) newParser .foldTokens( (err: ParserError) => BuilderResult[T].err((builder, "foo")), (newTokens: seq[Token]) => BuilderResult[T].ok( builder.merge(newParser.unsafeGet(), tokenFoldFn(newTokens, builder[1])) ), ) proc applyParsersSeq*[T]( builder: BuilderResult[T], xs: seq[tuple[ parsers: seq[Parser -> ParserResult], tokenFoldFn: (seq[Token], seq[T]) -> seq[T], ]]): BuilderResult[T] = xs.foldl(a.flatMap((x: Builder[T]) => x.applyParsers(b[0], b[1])), builder) proc foldBuilder*[T, T2]( builderResult: BuilderResult[T], onError: string -> T2, onSuccess: seq[T] -> T2, ): T = if builderResult.isOk(): onSuccess(builderResult.unsafeGet().tree) else: let err = builderResult.error() onError(err[1]) when isMainModule: proc getTokens(x: ParserResult): seq[char] = x.foldTokens( proc(err: ParserError): seq[char] = echo err @[], proc(xs: seq[Token]): seq[char] = xs.map((x: Token) => x.value), ) proc testParser(x: string, ps: seq[Parser -> ParserResult]): seq[char] = initParserResult(x).parseSeq(ps).getTokens() let optionalPrefixParser = @[ optional(ch('_')), str("ABC") ] assert: "_ABC".testParser(optionalPrefixParser) == @['_', 'A', 'B', 'C'] assert: "ABC".testParser(optionalPrefixParser) == @['A', 'B', 'C'] let andParser = @[ (ch('A') + ch('B')), ch('C'), ] assert: "ABC".testParser(andParser) == @['A', 'B', 'C'] let newlineParser = @[ str("ABC"), newline ] echo "ABC".testParser(newlineParser)