From 892eec10d5efacb58946eb2cee7da2d6974538fb Mon Sep 17 00:00:00 2001 From: Florian Schroedl Date: Tue, 9 Aug 2022 22:07:30 +0200 Subject: [PATCH] Started refactor --- TODO.org | 10 + src_v2/parser/builder_api.nim | 92 ++++++ src_v2/parser/parser_internals.nim | 242 ++++++++++++++ src_v2/parser/parser_types.nim | 489 +++++++++++++++++++++++++++++ src_v2/parser/utils.nim | 31 ++ src_v2/utils/fp.nim | 37 +++ src_v2/utils/parsec_test.nim | 15 + src_v2/utils/parser.nim | 28 ++ src_v2/utils/printers.nim | 44 +++ src_v2/utils/str.nim | 60 ++++ 10 files changed, 1048 insertions(+) create mode 100644 src_v2/parser/builder_api.nim create mode 100644 src_v2/parser/parser_internals.nim create mode 100644 src_v2/parser/parser_types.nim create mode 100644 src_v2/parser/utils.nim create mode 100644 src_v2/utils/fp.nim create mode 100644 src_v2/utils/parsec_test.nim create mode 100644 src_v2/utils/parser.nim create mode 100644 src_v2/utils/printers.nim create mode 100644 src_v2/utils/str.nim diff --git a/TODO.org b/TODO.org index 48e5c57..f9045ad 100644 --- a/TODO.org +++ b/TODO.org @@ -30,6 +30,16 @@ :LOGBOOK: CLOCK: [2022-08-09 Tue 17:51] :END: + +So I've had some heavy misconceptions while working on the content, +I would preparse all content tokens, just to work over them, but that doesn't make sense. +Now w + +Currently I have a big problem that newline is not pure and interferes with the other function builders + +[[file:src/org/org_block_heading.nim::let contentEndParser = choice(@\[][This]] doesnt work because newline is "impure" + +Gonna convert everything to funcs..... ** TODO Store the line numbers :PARSER: ** TODO Tag parser :PARSER: ** TODO Creating hot reload :DEV_ENVIRONMENT: diff --git a/src_v2/parser/builder_api.nim b/src_v2/parser/builder_api.nim new file mode 100644 index 0000000..d6ad42e --- /dev/null +++ b/src_v2/parser/builder_api.nim @@ -0,0 +1,92 @@ +import std/sugar +import std/collections/sequtils +import std/strformat +import std/strutils +import results +import fusion/matching +import ./parser_internals +import ./parser_types + +# proc tryParseBuildOrg*[T]( +# builder: Builder[T], +# builderFns: seq[ +# Builder[T] -> BuilderResult[T] +# ], +# stopAtParserFn = endOfStream +# ): BuilderResult[T] = + +# # Mutating accumulators +# var builderAcc: BuilderResult[T] = BuilderResult.ok(builder) + +# while builderAcc.isOk() and builderAcc.tryParser(stopAtParserFn).isErr() +# for fn in builderFns: +# fn + +proc tryParseBuild*[T]( + builder: Builder[T], + builderFns: seq[tuple[ + parserFn: Parser -> ParserResult, + builderFn: seq[ParserToken] -> seq[T], + ]], + defaultBuilderFn: seq[ParserToken] -> seq[T], + stopAtParserFn = newline, + concatFn = concat[T], +): BuilderResult[T] = + ## Parse text in `builder` by checking the `builderFns` list for a sucessful `parserFn`. + ## The `ok` `parserFn` result will be merged into the `Builder[T].tree` by using the `concatFn`. + ## Otherwise continue taking any character until the `stopAtParserFn` condition is found. + ## Any non-matching tokens will be converted using the `defaultBuilderFn`. + let (parser, tree) = builder + + # Mutating accumulators + var parserAcc: ParserResult = ParserResult.ok(parser) + var builderAcc: Builder[T] = builder + + while parserAcc.isOk() and parserAcc.flatMap(stopAtParserFn).isErr(): + # Empty the parser tokens as we want to seperate them for the next parser in the sequence + let emptyParser = parserAcc.map(emptyTokens) + + # Find the first matching parser and convert its tokens + var found = false + for fn in builderFns: + let (parserFn, builderFn) = fn + + let parseResult = emptyParser.flatMap(parserFn) + if parseResult.isOk(): + let okParser = parseResult.unsafeGet() + + # Convert all previous unmatched tokens via the `defaultBuilderTokens` + let defaultBuilderTokens = parserAcc + .foldTokens( + onError = _ => newSeq[T](), + onSuccess = defaultBuilderFn, + ) + + found = true + parserAcc = parseResult.map(emptyTokens) + builderAcc = builder.initBuilder( + okParser, + concatFn( + builderAcc.tree, + defaultBuilderTokens, + builderFn(okParser.tokens), + ) + ) + break + + if not found: + parserAcc = parserAcc.flatMap(anyCh) + + let defaultBuilderTokens = parserAcc + .foldTokens( + onError = _ => newSeq[T](), + onSuccess = defaultBuilderFn, + ) + + BuilderResult[T].ok(builder.initBuilder( + builderAcc.parser, + concatFn( + builderAcc.tree, + defaultBuilderTokens, + ), + )) diff --git a/src_v2/parser/parser_internals.nim b/src_v2/parser/parser_internals.nim new file mode 100644 index 0000000..91e58a7 --- /dev/null +++ b/src_v2/parser/parser_internals.nim @@ -0,0 +1,242 @@ +import std/[ + options, + strutils, + strformat, + collections/sequtils, + sugar, +] +import results +import fp/[ + maybe, +] +import ../utils/str +import ./parser_types + +# -- Types + +type parserFnT = proc(t0: Parser): ParserResult {.nosideeffect.} + +# -- Utilities + +func lookBack*(count: int): (parserFnT) = + return func(parser: Parser): ParserResult = + let state = parser.state + let newIndex = state.position - 1 + + Parser( + state: ParserState( + stream: state.stream, + position: newIndex, + lastPosition: parser.state.position, + ), + tokens: parser.tokens, + ).ok() + +# -- Parsing Functions + +func ch*(expectedChars: set[char]): parserFnT {.inline.} = + return func(parser: Parser): ParserResult = + let state = parser.state + let newIndex = state.position + 1 + + if newIndex > (state.stream.len - 1): + return err(ParserError( + kind: endOfStringErr, + expected: &"{expectedChars.prettyExpectedSet()}", + index: newIndex, + parser: parser, + )) + else: + let foundChar = state.stream[newIndex] + if foundChar in expectedChars: + return Parser( + state: ParserState( + stream: state.stream, + position: newIndex, + lastPosition: parser.state.position, + ), + tokens: parser.tokens & initParserToken(foundChar) + ).ok() + else: + return err(ParserError( + kind: charMismatchErr, + unexpected: &"{foundChar}", + expected: &"{expectedChars.prettyExpectedSet()}", + index: newIndex, + parser: parser, + )) + +func ch*(expectedChar: char): parserFnT {.inline.} = + return func(parser: Parser): ParserResult = + let state = parser.state + let newIndex = state.position + 1 + + if newIndex > (state.stream.len - 1): + return err(ParserError( + kind: endOfStringErr, + expected: &"{expectedChar}", + index: newIndex, + parser: parser, + )) + else: + let foundChar = state.stream[newIndex] + if expectedChar == foundChar: + return Parser( + state: ParserState( + stream: state.stream, + position: newIndex, + lastPosition: parser.state.position, + ), + tokens: parser.tokens & initParserToken(foundChar) + ).ok() + else: + return err(ParserError( + kind: charMismatchErr, + unexpected: &"{foundChar}", + expected: &"{expectedChar}", + index: newIndex, + parser: parser, + )) + +const anyCh* = ch(AllChars) + +func str*(s: string): parserFnT {.inline.} = + return func(parser: Parser): ParserResult = + var p = parser.ok() + for c in s.items: + p = p.flatMap(ch(c)) + return p + +# -- Parsing API + +func optional*(parserFn: parserFnT): parserFnT {.inline.} = + ## Parse characters and ignore failure + return func(parser: Parser): ParserResult = + let newParser = parserFn(parser) + + if newParser.isOk(): + newParser + else: + parser.ok() + +func ignore*(parserFn: parserFnT): parserFnT {.inline.} = + ## Parse characters but throw success tokens away + return func(parser: Parser): ParserResult = + return parserFn(parser) + .map((x: Parser) => Parser( + state: x.state, + tokens: parser.tokens, + )) + +func manyUntilPerformant*(acceptFn: parserFnT, stopFn: parserFnT): parserFnT {.inline.} = + ## Parse characters but throw success tokens away + return func(parser: Parser): ParserResult = + let startPosition = parser.state.position + var res: ParserResult = parser.ok() + + while res.isOk() and res.flatMap(stopFn).isErr(): + res = res.flatMap(acceptFn) + + return res.map((p: Parser) => Parser( + state: ParserState( + stream: p.state.stream, + position: p.state.position, + lastPosition: p.state.lastPosition, + ), + tokens: @[ + ParserToken( + kind: parserTokenString, + stringValue: p.state.stream[(startPosition - 1)..p.state.position], + ) + ] + )) + +func anyUntilPerformant*(stopFn: parserFnT): parserFnT {.inline.} = + manyUntilPerformant(ch(AllChars), stopFn) + +func manyUntil*(acceptFn: parserFnT, stopFn: parserFnT): parserFnT {.inline.} = + ## Parse characters but throw success tokens away + return func(parser: Parser): ParserResult = + var res: ParserResult = parser.ok() + while res.isOk() and res.flatMap(stopFn).isErr(): + res = res.flatMap(acceptFn) + return res + +func anyUntil*(stopFn: parserFnT): parserFnT {.inline.} = + manyUntil(anyCh, stopFn) + +func choice*(parsers: seq[parserFnT]): parserFnT {.inline} = + return func(parser: Parser): ParserResult {.closure, nosideeffect.} = + var errors: seq[ParserResult] = newSeq[ParserResult]() + var found = Nothing[ParserResult]() + + for fn in parsers: + let fnResult: ParserResult = fn(parser) + + if fnResult.isOk(): + found = fnResult.just + break + else: + errors = errors & fnResult + + return found + .fold( + func(): ParserResult = + let prettyErrors = errors.map((x: ParserResult) => x.error().expected) + err(ParserError( + kind: choiceMismatchErr, + index: parser.state.position + 1, + expected: &"Choice ({prettyErrors})", + unexpected: errors[0].error().unexpected, + parser: parser, + )), + func(x: ParserResult): ParserResult = x, + ) + + +func `+`*(parserFnA: parserFnT, parserFnB: parserFnT): parserFnT {.inline.} = + ## Parse characters and ignore failure + return func(parser: Parser): ParserResult = + parserFnA(parser).flatMap(parserFnB) + +func parseSeq*(parser: ParserResult, xs: seq[parserFnT]): ParserResult = + xs.foldl(a.flatMap(b), parser) + +# -- Parsing Aliases + +func endOfStream*(parser: Parser): ParserResult = + let index = parser.state.position + 1 + if index == parser.state.stream.len: + ok(parser) + else: + err(ParserError( + kind: endOfStringErr, + expected: &"EndOfString", + index: index, + parser: parser, + )) + +const newlineParser = choice(@[ + ch(NewLines), + endOfStream, +]) + +func newline*(parser: Parser): ParserResult = + newlineParser(parser) + .mapErr((x: ParserError) => x.setErrorExpectedField("Newline")) + +const whitespaceParser = choice(@[ + ch(Whitespace), + newlineParser, +]) +func whitespace*(parser: Parser): ParserResult = + whitespaceParser(parser) + .mapErr((x: ParserError) => x.setErrorExpectedField("Whitespace")) + +# -- Parsing Helpers + +const parseBetweenDelimiter* = func(start: parserFnT, stop: parserFnT): parserFnT {.closure.} = + ignore(start) + anyUntil(stop + whitespace) + ignore(start) + +const parseBetweenPair* = func(delimiterParser: parserFnT): parserFnT {.closure.} = + parseBetweenDelimiter(delimiterParser, delimiterParser) diff --git a/src_v2/parser/parser_types.nim b/src_v2/parser/parser_types.nim new file mode 100644 index 0000000..4ea88c2 --- /dev/null +++ b/src_v2/parser/parser_types.nim @@ -0,0 +1,489 @@ +import std/strutils +import std/strformat +import std/collections/sequtils +import std/sugar +import results +import fp/maybe +import fusion/matching +import ../utils/str +import ../utils/fp + +{.experimental: "caseStmtMacros".} + +type + ParserState* = ref object + stream*: string + position*, lastPosition*: int + + parserTokenCharValueT* = char + parserTokenStringValueT* = string + ParserTokenKind* = enum + parserTokenChar + parserTokenString + ParserToken* = ref object + case kind*: ParserTokenKind + of parserTokenChar: + charValue*: parserTokenCharValueT + of parserTokenString: + stringValue*: parserTokenStringValueT + + Parser* = ref object + state*: ParserState + tokens*: seq[ParserToken] + ParserErrorKind* = enum + choiceMismatchErr + charMismatchErr + endOfStringErr + ParserError* = ref object + kind*: ParserErrorKind + unexpected*: string + expected*: string + index*: int + parser*: Parser + ParserResult* = Result[Parser, ParserError] + + Builder*[T] = tuple[ + parser: Parser, + tree: seq[T] + ] + + BuilderResult*[T] = Result[Builder[T], (Builder[T], string)] + + # SingleBuilder*[T] = tuple[ + # parser: Parser, + # tree: T + # ] + # SingleBuilderResult*[T] = Result[SingleBuilder[T], (SingleBuilder[T], string)] + +# -- Initalizers + +func initParserToken*(x: char): ParserToken = ParserToken(kind: parserTokenChar, charValue: x) +func initParserToken*(x: string): ParserToken = ParserToken(kind: parserTokenString, stringValue: x) + +func initParser*(str: string): Parser = + Parser( + state: ParserState( + stream: str, + position: -1, + lastPosition: 0, + ), + tokens: newSeq[ParserToken](), + ) + +proc initParserResult*(str: string): ParserResult = + ParserResult.ok(initParser(str)) + +proc initBuilder*[T](t: Builder[T], parser: Parser, tree: seq[T]): Builder[T] = + Builder[T](( + parser, + tree + )) + +# -- Getters + +func tokenStringValue*(x: ParserToken): string = + ## Get the Token value `x` as a string. + case x.kind: + of parserTokenChar: + $x.charValue + of parserTokenString: + x.stringValue + +func tokensToString*(tokens: seq[ParserToken]): string = + tokens.foldl(a & b.tokenStringValue(), "") + +# -- Modifiers + +func flattenParserTokens*(parser: Parser): ParserResult = + return ParserResult.ok( + Parser( + state: parser.state, + tokens: @[ + ParserToken( + kind: parserTokenString, + stringValue: parser.tokens.foldl(a & b.tokenStringValue(), "") + ) + ] + ) + ) + +func emptyTokens*(parser: Parser): Parser = + Parser( + state: parser.state, + tokens: newSeq[ParserToken](), + ) + +proc foldTokens*[T]( + parserResult: ParserResult, + onError: ParserError -> T, + onSuccess: seq[ParserToken] -> T, +): T = + if parserResult.isOk(): + onSuccess(parserResult.unsafeGet().tokens) + else: + let err = parserResult.error() + onError(err) + +func setErrorExpectedField*(err: ParserError, expected: string): ParserError = + ParserError( + kind: err.kind, + unexpected: err.unexpected, + expected: expected, + index: err.index, + parser: err.parser, + ) + +proc mapTree*[T](builder: BuilderResult[T], fn: seq[T] -> seq[T]): BuilderResult[T] = + builder.map(proc(b: Builder[T]): Builder[T] = Builder(( + parser: b[0], + tree: fn(b[1]), + ))) + +proc tryParser*[T]( + builder: Builder[T], + parser: Parser -> ParserResult, +): BuilderResult[T] = + ## Try out a `parser` on a `builder` + ## When succesful return the original builder, otherwise return an error + ParserResult.ok(Parser( + state: builder[0].state, + tokens: @[] + )) + .flatMap(parser) + .foldTokens( + (err: ParserError) => BuilderResult[T].err((builder, "Error")), + (newTokens: seq[ParserToken]) => BuilderResult[T].ok(builder), + ) + +proc tryParser*[T]( + builder: BuilderResult[T], + parser: Parser -> ParserResult, +): BuilderResult[T] = + ## Try out a `parser` on a `builder` result + ## When succesful return the ok builder, otherwise return an error + builder.flatMap((x: Builder[T]) => tryParser(x, parser)) + +proc applyParsersToSingle*[T]( + builder: Builder[T], + parsers: seq[Parser -> ParserResult], + tokenFoldFn: (seq[ParserToken], T) -> T, + optional = false, + initT: T, + isFirst: bool, +): BuilderResult[T] = + # Apply the current parsing functions and convert to text tokens wrapped in ParserResult + let newParser = ParserResult.ok(Parser( + state: builder[0].state, + tokens: @[] + )) + .parseSeq(parsers) + + newParser + .foldTokens( + (err: ParserError) => BuilderResult[T].err((builder, "foo")), + (newTokens: seq[ParserToken]) => ( + if optional and newTokens.len == 0: + BuilderResult[T].ok(builder) + else: + BuilderResult[T].ok( + builder.initBuilder( + newParser.unsafeGet(), + builder.tree + .last() + .filter(x => not isFirst) + .orElse(just(initT)) + .map((x: T) => @[tokenFoldFn(newTokens, x)]) + .getOrElse(newSeq[T]()) + ) + ) + ) + ) + +## TODO Implement applyParsersSeqToSingle with this function by forming the concatFn +proc applyParsersToSeq*[T]( + builder: Builder[T], + parsers: seq[Parser -> ParserResult], + tokenFoldFn: (seq[ParserToken], T) -> T, + # concatFn = `&`, + optional = false, + isFirst: bool, +): BuilderResult[T] = + # Apply the current parsing functions and convert to text tokens wrapped in ParserResult + let newParser = ParserResult.ok(Parser( + state: builder[0].state, + tokens: @[] + )) + .parseSeq(parsers) + + newParser + .foldTokens( + (err: ParserError) => BuilderResult[T].err((builder, "foo")), + (newTokens: seq[ParserToken]) => ( + if optional and newTokens.len == 0: + BuilderResult[T].ok(builder) + else: + BuilderResult[T].ok( + builder.initBuilder( + newParser.unsafeGet(), + builder.tree & builder.tree + ) + ) + ) + ) + +proc applyParsersSeqToSingle*[T]( + builderResult: BuilderResult[T], + initT: T, + xs: seq[tuple[ + parsers: seq[Parser -> ParserResult], + tokenFoldFn: (seq[ParserToken], T) -> T, + ignoreEmpty: bool, + ]], +): BuilderResult[T] = + xs.foldl( + a.flatMap((builder: Builder[T]) => applyParsersToSingle( + builder = builder, + parsers = b.parsers, + tokenFoldFn = b.tokenFoldFn, + optional = b.ignoreEmpty, + initT = initT, + isFirst = a == builderResult + )), + builderResult + ) + +proc applyParsersSeqToSeq*[T]( + builderResult: BuilderResult[T], + xs: seq[tuple[ + parsers: seq[Parser -> ParserResult], + tokenFoldFn: (seq[ParserToken], T) -> T, + ignoreEmpty: bool, + # concatFn: (seq[T], seq[T]) -> seq[T], + ]], +): BuilderResult[T] = + xs.foldl( + a.flatMap((builder: Builder[T]) => applyParsersToSeq( + builder = builder, + parsers = b.parsers, + tokenFoldFn = b.tokenFoldFn, + optional = b.ignoreEmpty, + isFirst = a == builderResult, + # concatFn = concatFn + )), + builderResult + ) + +proc applyParsers*[T]( + builder: Builder[T], + parsers: seq[Parser -> ParserResult], + tokenFoldFn: (seq[ParserToken], seq[T]) -> seq[T], +): BuilderResult[T] = + # proc nested(b: Builder[T]): BuilderResult[T] = + let newParser = ParserResult.ok(Parser( + state: builder[0].state, + tokens: @[] + )) + .parseSeq(parsers) + + newParser + .foldTokens( + (err: ParserError) => BuilderResult[T].err((builder, "foo")), + (newTokens: seq[ParserToken]) => BuilderResult[T].ok( + builder.initBuilder(newParser.unsafeGet(), tokenFoldFn(newTokens, builder[1])) + ), + ) + +proc applyParsersSeq*[T]( + builder: BuilderResult[T], + xs: seq[tuple[ + parsers: seq[Parser -> ParserResult], + tokenFoldFn: (seq[ParserToken], seq[T]) -> seq[T], + ]]): BuilderResult[T] = + xs.foldl(a.flatMap((x: Builder[T]) => x.applyParsers(b[0], b[1])), builder) + + +proc foldBuilder*[T, T2]( + builderResult: BuilderResult[T], + onError: string -> T2, + onSuccess: seq[T] -> T2, +): T = + if builderResult.isOk(): + onSuccess(builderResult.unsafeGet().tree) + else: + let err = builderResult.error() + onError(err[1]) + +# proc parseBuildOr*[T]( +# ## Checks +# builderResult: BuilderResult[T], +# builders: seq[tuple[ +# parsers: seq[Parser -> ParserResult], +# tokenFoldFn: (seq[ParserToken], T) -> T, +# ]], +# defaultBuilder: tuple[ +# defaultParser: Parser -> ParserResult, +# defaultFoldFn: (seq[ParserToken], T) -> T, +# ], +# until: +# ): BuilderResult[T] = + + +# -- Stringifiers + +func pprint*(x: ParserToken): string = + let str = tokenStringValue(x) + let escapedChar = if str == "\n": "\\n" + else: str + &"""ParserToken("{escapedChar}") +""" + +func `$`*(x: ParserToken): string = pprint(x) + +const LEFT_HIGHLIGHT_CHAR = ">>" +const RIGHT_HIGHLIGHT_CHAR = "<<" +func highlightStreamPosition(stream: string, position: int): string = + if position < 0: return stream + if position > stream.len - 1: return stream + + let aIndex = position - 1 + let a = if aIndex < 0: "" + else: stream[0..aIndex] + + let ch = stream[position] + + let bIndex = position + 1 + let b = if bIndex > stream.len - 1: "" + else: stream[bIndex..stream.len - 1] + + return a & LEFT_HIGHLIGHT_CHAR & ch & RIGHT_HIGHLIGHT_CHAR & b + +func highlightStreamPosition2(stream: string, position: int): string = + if position < 0: return stream + if position > stream.len - 1: return stream + + let ch = stream[position] + + let (lineStartPosition, lineEndPosition) = + case ch: + of '\n': + let lineStartPosition = (position - 1).max(0) + let lineEndPosition = (position + 1).min(stream.len - 1) + (lineStartPosition, lineEndPosition) + else: + (position, position) + + let startIndex = stream.rfind("\n", 0, lineStartPosition) + let lineStartIndex = + case startIndex: + of -1: 0 + else: startIndex + 1 + + let endIndex = stream.find("\n", lineEndPosition) + let lineEndIndex = + case endIndex: + of -1: stream.len + else: endIndex + + let spaceChars = " ".repeat((position - lineStartIndex).max(0)) + let lineChars = "_".repeat((lineEndIndex - position).max(0) + 10) + + let escapedChar = + case ch: + of '\n': "\\n" + of ' ': "\\s" + else: $ch + + let insertMessageAtIndex = + case ch: + # Print indicator for newlines on the previous line, which looks better for the reader + of '\n': (lineEndIndex - 1).max(1) + else: lineEndIndex + + let beforeNewline = + case (startIndex, ch): + # Always print newline for newline at the stream begin + of (-1, '\n'): "\n" + # Don't insert a newline when the character is a newline between newlines + of (_, '\n'): "" + else: "\n" + + # debugEcho "char: " & $ch + # debugEcho "position: " & $position + # debugEcho "startIndex: " & $lineStartIndex + # debugEcho "endIndex: " & $lineEndIndex + # debugEcho "insertMessageAtIndex: " & $insertMessageAtIndex + + stream.dup(insert(&"{beforeNewline}{spaceChars}^{lineChars} Char at \"{escapedChar}\"\n", insertMessageAtIndex)) + +proc `$`*(x: ParserState): string = + &"""ParserState( + stream: "{x.stream.highlightStreamPosition(x.position)}", + position: {x.position}, + lastPosition: {x.lastPosition}, +)""" + +proc `$`*(x: Parser): string = + &"""Parser( + state: {indentAfterNewline($x.state, 2)}, + tokens: {indentAfterNewline($x.tokens, 2)}, +)""" + +proc `$`*(x: ParserError): string = + case x: + of charMismatchErr(expected: @expected, parser: @parser, index: @index, unexpected: @unexpected): + # TODO: Only works for single line right now + let original = parser.state.stream + .deleteAfterNewline(parser.state.position) + let errSpace = " ".repeat(max(0, index)) + + &"""Parsing Error (Character Mismatch Error): +{original} +{errSpace}^ Expected '{expected}' but got '{unexpected}'""" + of choiceMismatchErr(expected: @expected, parser: @parser, index: @index, unexpected: @unexpected): + let original = parser.state.stream + .deleteAfterNewline(parser.state.position) + let errSpace = " ".repeat(max(0, index)) + + &"""Parsing Error (Character Mismatch Error): +{original} +{errSpace}^ Expected '{expected}' but got '{unexpected}'""" + + of endOfStringErr(parser: @parser, index: @index): + let original = parser.state.stream + .deleteAfterNewline(parser.state.position) + let errSpace = " ".repeat(max(0, index)) + + &"""Parsing Error (EndOfString Expected): +{original} +{errSpace}^ Expected 'EndOfString' at {index} but got {original.len - 1}""" + + else: "ParseError" + +when isMainModule: + let test1 = """ABC + +D + +EFG""" + echo test1.highlightStreamPosition2(test1.find("B")) + echo "==============" + echo test1.highlightStreamPosition2(test1.find("C")) + echo "==============" + echo test1.highlightStreamPosition2(test1.find("D")) + echo "==============" + echo test1.highlightStreamPosition2(test1.find("\n")) + echo "==============" + echo test1.highlightStreamPosition2(test1.find("\n", test1.find("\n") + 1)) + + + # echo "\n1\n".rfind('\n', 0, 2) + + # block highlightStreamPosition: + # let s = "abc" + # # Out of bounds + # assert s.highlightStreamPosition(-1) == s + # assert s.highlightStreamPosition(s.len) == s + # # Regular highlighting + # assert s.highlightStreamPosition(0) == LEFT_HIGHLIGHT_CHAR & "a" & RIGHT_HIGHLIGHT_CHAR & "bc" + # assert s.highlightStreamPosition(1) == "a" & LEFT_HIGHLIGHT_CHAR & "b" & RIGHT_HIGHLIGHT_CHAR & "c" + # assert s.highlightStreamPosition(2) == "ab" & LEFT_HIGHLIGHT_CHAR & "c" & RIGHT_HIGHLIGHT_CHAR diff --git a/src_v2/parser/utils.nim b/src_v2/parser/utils.nim new file mode 100644 index 0000000..405d0ce --- /dev/null +++ b/src_v2/parser/utils.nim @@ -0,0 +1,31 @@ +import std/sugar +import std/collections/sequtils +import results +import ./parser_types + +type StringBuilderT* = string +type StringBuilder* = Builder[StringBuilderT] +type StringBuilderResult* = BuilderResult[StringBuilderT] + +proc stringConcat*(typeInfo: StringBuilderT, seperator = ""): + (seq[ParserToken], seq[StringBuilderT]) -> seq[StringBuilderT] = + + return proc(xs: seq[ParserToken], ys: seq[StringBuilderT]): seq[StringBuilderT] = + return ys & xs.foldl(a & b.tokenStringValue() & seperator, typeInfo) + +proc initStringBuilder*(str: string): StringBuilderResult = + StringBuilderResult + .ok(StringBuilder(( + parser: initParser(str), + tree: newSeq[StringBuilderT](), + ))) + +proc fold*[T, E, T2]( + self: Result[T, E], + onError: E -> T2, + onSuccess: T -> T2, +): T2 = + if self.isOk(): + onSuccess(self.unsafeGet()) + else: + onError(self.error()) diff --git a/src_v2/utils/fp.nim b/src_v2/utils/fp.nim new file mode 100644 index 0000000..aad5f52 --- /dev/null +++ b/src_v2/utils/fp.nim @@ -0,0 +1,37 @@ +import std/sugar +import fp/maybe +import results + +func last*[T](xs: seq[T]): Maybe[T] = + if xs.len == 0: + nothing(T) + else: + just(xs[^1]) + +template isSome*(self: Result): bool = self.isOk() +template isNone*(self: Result): bool = self.isErr() + +proc findMaybe*[T](xs: seq[T], fn: T -> bool): Maybe[T] = + for x in xs: + if fn(x): + return Just(x) + return Nothing[T]() + +proc findMaybeFn*[T, B](fns: seq[T {.nimcall.} -> Maybe[B]], val: T): Maybe[B] = + for fn in fns: + let res = fn(val) + if res.isDefined(): + return res + return Nothing[B]() + +proc notNegative*[int](x: Maybe[int]): Maybe[int] = + ## Maps nil object to nothing + x.filter(i => i >= 0) + +when isMainModule: + echo @[ + (x: int) => (if x == 2: Just("foo") else: Nothing[string]()), + ].findMaybeFn(2) + + assert last(@[1,2,3]) == just(3) + assert last[int](@[]) == nothing(int) diff --git a/src_v2/utils/parsec_test.nim b/src_v2/utils/parsec_test.nim new file mode 100644 index 0000000..832629f --- /dev/null +++ b/src_v2/utils/parsec_test.nim @@ -0,0 +1,15 @@ +import sequtils +import std/sugar +import microparsec +import results + +echo toSeq(1..1000000) + +# let headlineParser = manyTill(ch '*', space) +# .flatMap((stars: seq[char]) => pure(stars)) +# # .flatMap((stars) => manyTill(anyChar, endOfLine) +# # .flatMap(headline => pure(stars, headline)) +# # ) + +# echo headlineParser.parse("*** Headline") +# echo headlineParser.parse("* Headline") diff --git a/src_v2/utils/parser.nim b/src_v2/utils/parser.nim new file mode 100644 index 0000000..7068ae1 --- /dev/null +++ b/src_v2/utils/parser.nim @@ -0,0 +1,28 @@ +import std/parseutils + +proc fastSubstr(s: string; token: var string; start, length: int) = + token.setLen length + for i in 0 ..< length: token[i] = s[i+start] + +proc parseUntilBackwards*(s: string, token: var string, until: string, + start = 0): int {.inline.} = + ## Parses a token and stores it in ``token``. Returns + ## the number of the parsed characters or 0 in case of an error. A token + ## consists of any character that comes before the `until` token. + runnableExamples: + var myToken: string + doAssert parseUntil("Hello World", myToken, "Wor") == 6 + doAssert myToken == "Hello " + doAssert parseUntil("Hello World", myToken, "Wor", 2) == 4 + doAssert myToken == "llo " + var i = s.len + while i > 0: + if until.len > 0 and s[i] == until[until.len - 1]: + var u = 1 + while i+u < s.len and u < until.len and s[i+u] == until[u]: + inc u + if u >= until.len: break + dec(i) + result = i-start + fastSubstr(s, token, start, result) + #token = substr(s, start, i-1) diff --git a/src_v2/utils/printers.nim b/src_v2/utils/printers.nim new file mode 100644 index 0000000..db9cb0e --- /dev/null +++ b/src_v2/utils/printers.nim @@ -0,0 +1,44 @@ +import std/[ + options, + sequtils, + strformat, + strutils, + sugar, +] + +const INDENT_SIZE* = 2; + +func stringifyFields*( + xs: seq[tuple[ + field: string, + value: string, + print: bool + ]], + indent = 0, +): string = + xs + .filter(x => x.print) + .map(x => x.field & ": " & $x.value) + .join(",\n") + +func stringifyBlock*(blockName: string, indent = 0, xs: varargs[string]): string = + let fieldIndent = indent + INDENT_SIZE + + concat( + @[&"{blockName}("], + @xs.mapIt(it.indent(fieldIndent)), + @[")"], + ) + .join("\n") + +func stringifySeq*[T](xs: seq[T], stringifyFn: (T) -> string, indent = 0): string = + let fieldIndent = indent + INDENT_SIZE + @[ + "@[", + xs + .mapIt(it.stringifyFn()) + .join(",\n") + .indent(fieldIndent), + "]", + ] + .join("\n") diff --git a/src_v2/utils/str.nim b/src_v2/utils/str.nim new file mode 100644 index 0000000..d9f242e --- /dev/null +++ b/src_v2/utils/str.nim @@ -0,0 +1,60 @@ +import std/[ + strutils, + math, +] +import fusion/matching + +{.experimental: "caseStmtMacros".} + +func prettyExpectedSet*(x: set[char]): string = + ## Pretty print value for a set `x` of characters + case x: + of AllChars: + "AllChars {'\x00'..'\xFF'}" + of Digits: + "Digits {'0'..'9'}" + of HexDigits: + "HexDigits {'0'..'9', 'A'..'F', 'a'..'f'}" + of Letters: + "Letters {'A'..'Z', 'a'..'z'}" + of Newlines: + "Newlines {'\r', '\n'}" + of Whitespace: + "Whitespace {' ', '\t', '\v', '\r', '\n', '\f'}" + else: + $x + +func indentAfterNewline*(str: string, count: int): string = + ## Indent lines following the first line of `str` by `count`. + ## Useful for indenting nested keys of stringify functions. + var strDup = str.indent(count) + strDup.delete(0..count - 1) + strDup + +proc safeDelete*(str: string, slice: Slice[int]): string = + ## Deletes the items `str[slice]`, ignoring elements out of range. + if slice.a > str.len - 1: + str + else: + let fromIndex = clamp(slice.a, 0..str.len) + let toIndex = clamp(slice.b, 0..str.len - 1) + + var strDup = str + strDup.delete(fromIndex..toIndex) + strDup + +func findAndDelete*(str: string, chars: set[char], start = 0, last = str.len - 1): string = + ## Find the next instance of `chars` from `start`. + ## When found delete characters until `last`. + # Prevent passing negative numbers (e.g.: initial parser) + if start >= 0 and last >= 0: + let startChar = str.find(chars, start, last) + if startChar == -1: + str + else: + str.safeDelete(startChar..last) + else: str + +func deleteAfterNewline*(str: string, start = 0): string = + ## Delete string after next Newline from `start`. + findAndDelete(str, Newlines, start)