Restore tryTokenize function

This commit is contained in:
Florian Schroedl
2022-08-28 13:48:41 +02:00
parent 19d2cae392
commit 51b34e6a27
2 changed files with 115 additions and 0 deletions

View File

@@ -0,0 +1,113 @@
import std/[
collections/sequtils,
options,
strformat,
strutils,
sugar,
]
import fp/[
resultM
]
import ./builder_types
import ./parser_types
import ./parser_api
# -- Builder API
proc tryTokenize*[T](
builder: Builder[T],
builderFns: seq[tuple[
parserFn: parserFnT,
tokenizerFn: seq[ParserToken] -> seq[T],
]],
defaultTokenizerFn: seq[ParserToken] -> seq[T],
stopAtParserFn = newlineOrEol,
concatTokensFn = concat[T],
): BuilderResult[T] =
## Try to tokenize text in `builder` by checking `builderFns` seq for a sucessful `parserFn`.
## When a `parserFn.ok` is found, tokenize the text in the `Parser` using the current tokenizerFn
## and merge the result into `builder.tree` using the `concatTokensFn`.
## When no parser matches, tokenize with the `defaultTokenizerFn` until `stopAtParserFn is matched.
let (parser, tree) = builder
# Mutating accumulators
var parserAcc: ParserResult = ParserResult.ok(parser)
var builderAcc: Builder[T] = builder
while parserAcc.isOk() and parserAcc.flatMap(stopAtParserFn).isErr():
# Empty the parser tokens as we want to seperate them for the next parser in the sequence
let emptyParser = parserAcc.map(emptyTokens)
# Find the a parser and convert its tokens with the `tokenizerFn`
var found = false
for fn in builderFns:
let (parserFn, tokenizerFn) = fn
let parseResult = emptyParser.flatMap(parserFn)
if parseResult.isOk():
found = true
# Convert all previous unmatched tokens via the `defaultTokenizerFn`
let defaultBuilderTokens = parserAcc
.foldTokens(
onErrorFn = _ => newSeq[T](),
onSuccessFn = defaultTokenizerFn,
)
let okParser = parseResult.unsafeGet()
parserAcc = parseResult.map(emptyTokens)
builderAcc = initBuilder(
okParser,
concatTokensFn(
builderAcc.tree,
defaultBuilderTokens,
tokenizerFn(okParser.tokens),
)
)
break
if not found:
parserAcc = parserAcc.flatMap(anyCh)
let defaultBuilderTokens = parserAcc
.foldTokens(
onErrorFn = _ => newSeq[T](),
onSuccessFn = defaultTokenizerFn,
)
BuilderResult[T].ok(initBuilder(
builderAcc.parser,
concatTokensFn(
builderAcc.tree,
defaultBuilderTokens,
),
))
when isMainModule:
type TestStringBuilderT* = string
type TestStringBuilder* = Builder[TestStringBuilderT]
type TestStringBuilderResult* = BuilderResult[TestStringBuilderT]
block testApi:
let testParensParser = anyBetween(str(" ("), str(") "))
func testDefaultTokenizer(tokens: seq[ParserToken]): seq[string] {.closure.} =
@[tokens.toString()]
func testParensTokenizer(tokens: seq[ParserToken]): seq[string] {.closure.} =
@[&"Parens({tokens.toString()})"]
let testBuilder = initBuilder(
initParser("""sentence (with parens) and more
And ignore this part"""),
newSeq[TestStringBuilderT]()
)
.tryTokenize(
builderFns = @[(
testParensParser,
testParensTokenizer
)],
defaultTokenizerFn = testDefaultTokenizer,
)
assert testBuilder.isOk() == true
assert testBuilder.unsafeGet().tree == @["sentence", "Parens(with parens)", "and more"]

View File

@@ -1,7 +1,9 @@
import parser_types
import parser_api
import builder_types
import builder_api
export parser_types
export parser_api
export builder_types
export builder_api