import std/options import std/strutils import std/strformat import std/collections/sequtils import std/sugar import std/collections/tables import results import fusion/matching import fp/maybe import ../utils/str {.experimental: "caseStmtMacros".} type ParserState* = ref object stream: string position, lastPosition: int parserTokenCharValueT* = char parserTokenStringValueT* = string parserTokenKeyValuePairValueT* = tuple[k: string, v: string] ParserTokenKind* = enum parserTokenChar parserTokenString # parserTokenKeyValuePair ParserToken* = ref object case kind*: ParserTokenKind of parserTokenChar: charValue*: parserTokenCharValueT of parserTokenString: stringValue*: parserTokenStringValueT # of parserTokenKeyValuePair: # keyValuePairValue*: parserTokenKeyValuePairValueT Parser* = ref object state*: ParserState tokens*: seq[ParserToken] ParseErrorKind = enum choiceMismatchErr charMismatchErr endOfStringErr ParserError = ref object kind: ParseErrorKind unexpected: string expected: string index: int parser: Parser ParserResult* = Result[Parser, ParserError] Builder*[T] = tuple[ parser: Parser, tree: seq[T] ] BuilderResult*[T] = Result[Builder[T], (Builder[T], string)] proc indentKey(x: string, count: int): string = var y = x.indent(count) y.delete(0..count - 1) y proc tokenStringValue*(x: ParserToken): string = case x.kind: of parserTokenChar: $x.charValue of parserTokenString: x.stringValue proc `$`*(x: ParserToken): string = &"""ParserToken( value: {tokenStringValue(x)}, )""" proc `$`*(x: ParserState): string = &"""ParserState( stream: "{x.stream}", position: {x.position}, lastPosition: {x.lastPosition}, )""" proc `$`*(x: Parser): string = &"""Parser( state: {indentKey($x.state, 2)}, tokens: {indentKey($x.tokens, 2)}, )""" proc prettyExpectedSet(x: set[char]): string = case x: of AllChars: "AllChars {'\x00'..'\xFF'}" of Digits: "Digits {'0'..'9'}" of HexDigits: "HexDigits {'0'..'9', 'A'..'F', 'a'..'f'}" of Letters: "Letters {'A'..'Z', 'a'..'z'}" of Newlines: "Newlines {'\r', '\n'}" of Whitespace: "Whitespace {' ', '\t', '\v', '\r', '\n', '\f'}" else: $x proc `$`*(x: ParserError): string = case x: of charMismatchErr(expected: @expected, parser: @parser, index: @index, unexpected: @unexpected): # TODO: Only works for single line right now let original = parser.state.stream .deleteAfterNewline(parser.state.position) let errSpace = " ".repeat(max(0, index)) $index & $parser & &"""Parsing Error (Character Mismatch Error): {original} {errSpace}^ Expected '{expected}' but got '{unexpected}'""" of choiceMismatchErr(expected: @expected, parser: @parser, index: @index, unexpected: @unexpected): let original = parser.state.stream .deleteAfterNewline(parser.state.position) let errSpace = " ".repeat(max(0, index)) $index & $parser & &"""Parsing Error (Character Mismatch Error): {original} {errSpace}^ Expected '{expected}' but got '{unexpected}'""" of endOfStringErr(parser: @parser, index: @index): let original = parser.state.stream .deleteAfterNewline(parser.state.position) let errSpace = " ".repeat(max(0, index)) $index & $parser & &"""Parsing Error (EndOfString Expected): {original} {errSpace}^ Expected 'EndOfString' at {index} but got {original.len - 1}""" else: "ParseError" func initParserToken(x: char): ParserToken = ParserToken(kind: parserTokenChar, charValue: x) func initParserToken(x: string): ParserToken = ParserToken(kind: parserTokenString, stringValue: x) # func initParserToken(x: parserTokenKeyValuePairValueT): ParserToken = ParserToken(kind: parserTokenKeyValuePair, keyValuePairValue: x) proc initParser*(str: string): Parser = Parser( state: ParserState( stream: str, position: -1, lastPosition: 0, ), tokens: newSeq[ParserToken](), ) proc initParserResult*(str: string): ParserResult = ParserResult.ok(initParser(str)) func ch*(expectedChars: set[char]): (Parser -> ParserResult) {.inline.} = return func(parser: Parser): ParserResult = let state = parser.state let newIndex = state.position + 1 if newIndex > (state.stream.len - 1): return err(ParserError( kind: endOfStringErr, expected: &"{expectedChars.prettyExpectedSet()}", index: newIndex, parser: parser, )) else: let foundChar = state.stream[newIndex] if foundChar in expectedChars: return Parser( state: ParserState( stream: state.stream, position: newIndex, lastPosition: parser.state.position, ), tokens: parser.tokens & initParserToken(foundChar) ).ok() else: return err(ParserError( kind: charMismatchErr, unexpected: &"{foundChar}", expected: &"{expectedChars.prettyExpectedSet()}", index: newIndex, parser: parser, )) func ch*(expectedChar: char): (Parser -> ParserResult) {.inline.} = return func(parser: Parser): ParserResult = let state = parser.state let newIndex = state.position + 1 if newIndex > (state.stream.len - 1): return err(ParserError( kind: endOfStringErr, expected: &"{expectedChar}", index: newIndex, parser: parser, )) else: let foundChar = state.stream[newIndex] if expectedChar == foundChar: return Parser( state: ParserState( stream: state.stream, position: newIndex, lastPosition: parser.state.position, ), tokens: parser.tokens & initParserToken(foundChar) ).ok() else: return err(ParserError( kind: charMismatchErr, unexpected: &"{foundChar}", expected: &"{expectedChar}", index: newIndex, parser: parser, )) func str*(s: string): (Parser -> ParserResult) {.inline.} = return func(parser: Parser): ParserResult = var p = parser.ok() for c in s.items: p = p.flatMap(ch(c)) return p proc endOfStream*(parser: Parser): ParserResult = let index = parser.state.position + 1 if index == parser.state.stream.len: ok(parser) else: err(ParserError( kind: endOfStringErr, expected: &"EndOfString", index: index, parser: parser, )) func ignore*(parserFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} = ## Parse characters but throw success tokens away return proc(parser: Parser): ParserResult = return parserFn(parser) .map((x: Parser) => Parser( state: x.state, tokens: parser.tokens, )) func flattenParserTokens*(parser: Parser): ParserResult = return ParserResult.ok( Parser( state: parser.state, tokens: @[ ParserToken( kind: parserTokenString, stringValue: parser.tokens.foldl(a & b.tokenStringValue(), "") ) ] ) ) func emptyTokens*(parser: Parser): Parser = Parser( state: parser.state, tokens: newSeq[ParserToken](), ) func optional*(parserFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} = ## Parse characters and ignore failure return proc(parser: Parser): ParserResult = let newParser = parserFn(parser) if newParser.isOk(): newParser else: parser.ok() func manyUntil*(acceptFn: Parser -> ParserResult, stopFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} = ## Parse characters but throw success tokens away return proc(parser: Parser): ParserResult = var res: ParserResult = acceptFn(parser) while res.isOk() and res.flatMap(stopFn).isErr(): res = res.flatMap(acceptFn) return res func anyUntil*(stopFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} = manyUntil(ch(AllChars), stopFn) func choice*(parsers: seq[Parser -> ParserResult]): (Parser -> ParserResult) {.inline.} = return proc(parser: Parser): ParserResult = var errors: seq[ParserResult] = newSeq[ParserResult]() var found = Nothing[ParserResult]() for fn in parsers: let fnResult: ParserResult = fn(parser) if fnResult.isOk(): found = fnResult.just break else: errors = errors & fnResult return found .fold( proc(): ParserResult = let prettyErrors = errors.map((x: ParserResult) => x.error().expected) err(ParserError( kind: choiceMismatchErr, expected: &"Choice ({prettyErrors})", unexpected: errors[0].error().unexpected, parser: parser, )), proc(x: ParserResult): ParserResult = x, ) proc `+`*(parserFnA: Parser -> ParserResult, parserFnB: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} = ## Parse characters and ignore failure return proc(parser: Parser): ParserResult = parserFnA(parser).flatMap(parserFnB) let newlineParser = choice(@[ ch(NewLines), endOfStream, ]) proc newline*(parser: Parser): ParserResult = newlineParser(parser) proc parseSeq*(parser: ParserResult, xs: seq[Parser -> ParserResult]): ParserResult = xs.foldl(a.flatMap(b), parser) proc foldTokens*[T]( parserResult: ParserResult, onError: ParserError -> T, onSuccess: seq[ParserToken] -> T, ): T = if parserResult.isOk(): onSuccess(parserResult.unsafeGet().tokens) else: let err = parserResult.error() onError(err) proc merge[T](t: Builder[T], parser: Parser, tree: seq[T]): Builder[T] = Builder[T](( parser, tree )) proc mapTree[T](builder: BuilderResult[T], fn: seq[T] -> seq[T]): BuilderResult[T] = builder.map(proc(b: Builder[T]): Builder[T] = Builder(( parser: b[0], tree: fn(b[1]), ))) proc applyParsers*[T]( builder: Builder[T], parsers: seq[Parser -> ParserResult], tokenFoldFn: (seq[ParserToken], seq[T]) -> seq[T], ): BuilderResult[T] = # proc nested(b: Builder[T]): BuilderResult[T] = let newParser = ParserResult.ok(Parser( state: builder[0].state, tokens: @[] )) .parseSeq(parsers) newParser .foldTokens( (err: ParserError) => BuilderResult[T].err((builder, "foo")), (newTokens: seq[ParserToken]) => BuilderResult[T].ok( builder.merge(newParser.unsafeGet(), tokenFoldFn(newTokens, builder[1])) ), ) proc applyParsersSeq*[T]( builder: BuilderResult[T], xs: seq[tuple[ parsers: seq[Parser -> ParserResult], tokenFoldFn: (seq[ParserToken], seq[T]) -> seq[T], ]]): BuilderResult[T] = xs.foldl(a.flatMap((x: Builder[T]) => x.applyParsers(b[0], b[1])), builder) proc foldBuilder*[T, T2]( builderResult: BuilderResult[T], onError: string -> T2, onSuccess: seq[T] -> T2, ): T = if builderResult.isOk(): onSuccess(builderResult.unsafeGet().tree) else: let err = builderResult.error() onError(err[1]) when isMainModule: proc getTokens(x: ParserResult): seq[string] = x.foldTokens( proc(err: ParserError): seq[string] = echo err @[], proc(xs: seq[ParserToken]): seq[string] = xs.map((x: ParserToken) => x.tokenStringValue()), ) proc testParser(x: string, ps: seq[Parser -> ParserResult]): seq[string] = initParserResult(x).parseSeq(ps).getTokens() let optionalPrefixParser = @[ optional(ch('_')), str("ABC") ] assert: "_ABC".testParser(optionalPrefixParser) == @["_", "A", "B", "C"] assert: "ABC".testParser(optionalPrefixParser) == @["A", "B", "C"] let andParser = @[ (ch('A') + ch('B')), ch('C'), ] assert: "ABC".testParser(andParser) == @["A", "B", "C"] let newlineParserTest = @[ str("ABC"), newline ] assert "ABC\n".testParser(newlineParserTest) == @["A", "B", "C", "\n"]