Files
org-parser/src_v2/parser/builder_api.nim
2022-10-17 15:30:17 +02:00

120 lines
3.3 KiB
Nim

import std/[
collections/sequtils,
options,
strformat,
strutils,
sugar,
]
import fp/[
resultM
]
import ./builder_types
import ./parser_types
import ./parser_api
# -- Builder API
proc tryTokenize*[T](
builder: Builder[T],
builderFns: seq[tuple[
parserFn: parserFnT,
tokenizerFn: seq[ParserToken] -> seq[T],
]],
defaultTokenizerFn: seq[ParserToken] -> seq[T],
stopAtParserFn = newlineOrEol,
concatTokensFn = concat[T],
): BuilderResult[T] =
## Try to tokenize text in `builder` by checking `builderFns` seq for a sucessful `parserFn`.
## When a `parserFn.ok` is found, tokenize the text in the `Parser` using the current tokenizerFn
## and merge the result into `builder.tree` using the `concatTokensFn`.
## When no parser matches, tokenize with the `defaultTokenizerFn` until `stopAtParserFn is matched.
let (parser, tree) = builder
# Mutating accumulators
var parserAcc: ParserResult = ParserResult.ok(parser)
var builderAcc: Builder[T] = builder
while parserAcc.isOk() and parserAcc.flatMap(stopAtParserFn).isErr():
# Empty the parser tokens as we want to seperate them for the next parser in the sequence
let emptyParser = parserAcc.map(emptyTokens)
# Find the a parser and convert its tokens with the `tokenizerFn`
var found = false
for fn in builderFns:
let (parserFn, tokenizerFn) = fn
let parseResult = emptyParser.flatMap(parserFn)
if parseResult.isOk():
found = true
# Convert all previous unmatched tokens via the `defaultTokenizerFn`
let defaultBuilderTokens = parserAcc
.foldTokens(
onErrorFn = _ => newSeq[T](),
onSuccessFn = defaultTokenizerFn,
)
let okParser = parseResult.unsafeGet()
parserAcc = parseResult.map(emptyTokens)
builderAcc = initBuilder(
okParser,
concatTokensFn(
builderAcc.tree,
defaultBuilderTokens,
tokenizerFn(okParser.tokens),
)
)
break
if not found:
parserAcc = parserAcc.flatMap(anyCh)
let defaultBuilderTokens = parserAcc
.foldTokens(
onErrorFn = _ => newSeq[T](),
onSuccessFn = defaultTokenizerFn,
)
BuilderResult[T].ok(initBuilder(
builderAcc.parser,
concatTokensFn(
builderAcc.tree,
defaultBuilderTokens,
),
))
# -- Stringifiers
func pprint[T](x: Builder): string =
&"""Builder()
"""
when isMainModule:
type TestStringBuilderT* = string
type TestStringBuilder* = Builder[TestStringBuilderT]
type TestStringBuilderResult* = BuilderResult[TestStringBuilderT]
block testApi:
let testParensParser = anyBetween(str(" ("), str(") "))
proc testDefaultTokenizer(tokens: seq[ParserToken]): seq[string] {.closure.} =
@[tokens.toString()]
proc testParensTokenizer(tokens: seq[ParserToken]): seq[string] {.closure.} =
@[&"Parens({tokens.toString()})"]
let testBuilder = initBuilder(
initParser("""sentence (with parens) and more
And ignore this part"""),
newSeq[TestStringBuilderT]()
)
.tryTokenize(
builderFns = @[(
testParensParser,
testParensTokenizer
)],
defaultTokenizerFn = testDefaultTokenizer,
)
assert testBuilder.isOk() == true
assert testBuilder.unsafeGet().tree == @["sentence", "Parens(with parens)", "and more"]