From 51b34e6a27619804c742bcb62e775e2ef9e45276 Mon Sep 17 00:00:00 2001 From: Florian Schroedl Date: Sun, 28 Aug 2022 13:48:41 +0200 Subject: [PATCH] Restore tryTokenize function --- src_v2/parser/builder_api.nim | 113 ++++++++++++++++++++++++++++++++++ src_v2/parser/parser.nim | 2 + 2 files changed, 115 insertions(+) create mode 100644 src_v2/parser/builder_api.nim diff --git a/src_v2/parser/builder_api.nim b/src_v2/parser/builder_api.nim new file mode 100644 index 0000000..e213063 --- /dev/null +++ b/src_v2/parser/builder_api.nim @@ -0,0 +1,113 @@ +import std/[ + collections/sequtils, + options, + strformat, + strutils, + sugar, +] +import fp/[ + resultM +] +import ./builder_types +import ./parser_types +import ./parser_api + +# -- Builder API + +proc tryTokenize*[T]( + builder: Builder[T], + builderFns: seq[tuple[ + parserFn: parserFnT, + tokenizerFn: seq[ParserToken] -> seq[T], + ]], + defaultTokenizerFn: seq[ParserToken] -> seq[T], + stopAtParserFn = newlineOrEol, + concatTokensFn = concat[T], +): BuilderResult[T] = + ## Try to tokenize text in `builder` by checking `builderFns` seq for a sucessful `parserFn`. + ## When a `parserFn.ok` is found, tokenize the text in the `Parser` using the current tokenizerFn + ## and merge the result into `builder.tree` using the `concatTokensFn`. + ## When no parser matches, tokenize with the `defaultTokenizerFn` until `stopAtParserFn is matched. + + let (parser, tree) = builder + # Mutating accumulators + var parserAcc: ParserResult = ParserResult.ok(parser) + var builderAcc: Builder[T] = builder + + while parserAcc.isOk() and parserAcc.flatMap(stopAtParserFn).isErr(): + # Empty the parser tokens as we want to seperate them for the next parser in the sequence + let emptyParser = parserAcc.map(emptyTokens) + + # Find the a parser and convert its tokens with the `tokenizerFn` + var found = false + for fn in builderFns: + let (parserFn, tokenizerFn) = fn + + let parseResult = emptyParser.flatMap(parserFn) + if parseResult.isOk(): + found = true + + # Convert all previous unmatched tokens via the `defaultTokenizerFn` + let defaultBuilderTokens = parserAcc + .foldTokens( + onErrorFn = _ => newSeq[T](), + onSuccessFn = defaultTokenizerFn, + ) + + let okParser = parseResult.unsafeGet() + parserAcc = parseResult.map(emptyTokens) + builderAcc = initBuilder( + okParser, + concatTokensFn( + builderAcc.tree, + defaultBuilderTokens, + tokenizerFn(okParser.tokens), + ) + ) + break + + if not found: + parserAcc = parserAcc.flatMap(anyCh) + + let defaultBuilderTokens = parserAcc + .foldTokens( + onErrorFn = _ => newSeq[T](), + onSuccessFn = defaultTokenizerFn, + ) + + BuilderResult[T].ok(initBuilder( + builderAcc.parser, + concatTokensFn( + builderAcc.tree, + defaultBuilderTokens, + ), + )) + +when isMainModule: + type TestStringBuilderT* = string + type TestStringBuilder* = Builder[TestStringBuilderT] + type TestStringBuilderResult* = BuilderResult[TestStringBuilderT] + + block testApi: + let testParensParser = anyBetween(str(" ("), str(") ")) + + func testDefaultTokenizer(tokens: seq[ParserToken]): seq[string] {.closure.} = + @[tokens.toString()] + func testParensTokenizer(tokens: seq[ParserToken]): seq[string] {.closure.} = + @[&"Parens({tokens.toString()})"] + + let testBuilder = initBuilder( + initParser("""sentence (with parens) and more +And ignore this part"""), + newSeq[TestStringBuilderT]() + ) + .tryTokenize( + builderFns = @[( + testParensParser, + testParensTokenizer + )], + defaultTokenizerFn = testDefaultTokenizer, + ) + + assert testBuilder.isOk() == true + assert testBuilder.unsafeGet().tree == @["sentence", "Parens(with parens)", "and more"] diff --git a/src_v2/parser/parser.nim b/src_v2/parser/parser.nim index 5fb2662..6e22075 100644 --- a/src_v2/parser/parser.nim +++ b/src_v2/parser/parser.nim @@ -1,7 +1,9 @@ import parser_types import parser_api import builder_types +import builder_api export parser_types export parser_api export builder_types +export builder_api