120 lines
3.3 KiB
Nim
120 lines
3.3 KiB
Nim
import std/[
|
|
collections/sequtils,
|
|
options,
|
|
strformat,
|
|
strutils,
|
|
sugar,
|
|
]
|
|
import fp/[
|
|
resultM
|
|
]
|
|
import ./builder_types
|
|
import ./parser_types
|
|
import ./parser_api
|
|
|
|
# -- Builder API
|
|
|
|
proc tryTokenize*[T](
|
|
builder: Builder[T],
|
|
builderFns: seq[tuple[
|
|
parserFn: parserFnT,
|
|
tokenizerFn: seq[ParserToken] -> seq[T],
|
|
]],
|
|
defaultTokenizerFn: seq[ParserToken] -> seq[T],
|
|
stopAtParserFn = newlineOrEol,
|
|
concatTokensFn = concat[T],
|
|
): BuilderResult[T] =
|
|
## Try to tokenize text in `builder` by checking `builderFns` seq for a sucessful `parserFn`.
|
|
## When a `parserFn.ok` is found, tokenize the text in the `Parser` using the current tokenizerFn
|
|
## and merge the result into `builder.tree` using the `concatTokensFn`.
|
|
## When no parser matches, tokenize with the `defaultTokenizerFn` until `stopAtParserFn is matched.
|
|
|
|
let (parser, tree) = builder
|
|
# Mutating accumulators
|
|
var parserAcc: ParserResult = ParserResult.ok(parser)
|
|
var builderAcc: Builder[T] = builder
|
|
|
|
while parserAcc.isOk() and parserAcc.flatMap(stopAtParserFn).isErr():
|
|
# Empty the parser tokens as we want to seperate them for the next parser in the sequence
|
|
let emptyParser = parserAcc.map(emptyTokens)
|
|
|
|
# Find the a parser and convert its tokens with the `tokenizerFn`
|
|
var found = false
|
|
for fn in builderFns:
|
|
let (parserFn, tokenizerFn) = fn
|
|
|
|
let parseResult = emptyParser.flatMap(parserFn)
|
|
if parseResult.isOk():
|
|
found = true
|
|
|
|
# Convert all previous unmatched tokens via the `defaultTokenizerFn`
|
|
let defaultBuilderTokens = parserAcc
|
|
.foldTokens(
|
|
onErrorFn = _ => newSeq[T](),
|
|
onSuccessFn = defaultTokenizerFn,
|
|
)
|
|
|
|
let okParser = parseResult.unsafeGet()
|
|
parserAcc = parseResult.map(emptyTokens)
|
|
builderAcc = initBuilder(
|
|
okParser,
|
|
concatTokensFn(
|
|
builderAcc.tree,
|
|
defaultBuilderTokens,
|
|
tokenizerFn(okParser.tokens),
|
|
)
|
|
)
|
|
break
|
|
|
|
if not found:
|
|
parserAcc = parserAcc.flatMap(anyCh)
|
|
|
|
let defaultBuilderTokens = parserAcc
|
|
.foldTokens(
|
|
onErrorFn = _ => newSeq[T](),
|
|
onSuccessFn = defaultTokenizerFn,
|
|
)
|
|
|
|
BuilderResult[T].ok(initBuilder(
|
|
builderAcc.parser,
|
|
concatTokensFn(
|
|
builderAcc.tree,
|
|
defaultBuilderTokens,
|
|
),
|
|
))
|
|
|
|
# -- Stringifiers
|
|
|
|
func pprint[T](x: Builder): string =
|
|
&"""Builder()
|
|
"""
|
|
|
|
when isMainModule:
|
|
type TestStringBuilderT* = string
|
|
type TestStringBuilder* = Builder[TestStringBuilderT]
|
|
type TestStringBuilderResult* = BuilderResult[TestStringBuilderT]
|
|
|
|
block testApi:
|
|
let testParensParser = anyBetween(str(" ("), str(") "))
|
|
|
|
proc testDefaultTokenizer(tokens: seq[ParserToken]): seq[string] {.closure.} =
|
|
@[tokens.toString()]
|
|
proc testParensTokenizer(tokens: seq[ParserToken]): seq[string] {.closure.} =
|
|
@[&"Parens({tokens.toString()})"]
|
|
|
|
let testBuilder = initBuilder(
|
|
initParser("""sentence (with parens) and more
|
|
And ignore this part"""),
|
|
newSeq[TestStringBuilderT]()
|
|
)
|
|
.tryTokenize(
|
|
builderFns = @[(
|
|
testParensParser,
|
|
testParensTokenizer
|
|
)],
|
|
defaultTokenizerFn = testDefaultTokenizer,
|
|
)
|
|
|
|
assert testBuilder.isOk() == true
|
|
assert testBuilder.unsafeGet().tree == @["sentence", "Parens(with parens)", "and more"]
|