From 7c16ece752768da9b402ad44f27eed563a130c74 Mon Sep 17 00:00:00 2001 From: Florian Schroedl Date: Thu, 20 Jan 2022 17:00:00 +0100 Subject: [PATCH] Build text parser --- src/org/org_builder.nim | 17 ++++++ src/org/org_text_delimiter.nim | 76 +++++++++++++++++++++++++- src/org/org_types.nim | 13 +++++ src/parser/parser_internals.nim | 6 +- src/parser/parser_types.nim | 1 + src/parser/utils.nim | 10 ++++ tests/parser/parser_internals.nim | 0 tests/parser/test_parser_internals.nim | 8 +++ 8 files changed, 126 insertions(+), 5 deletions(-) create mode 100644 src/org/org_builder.nim create mode 100644 src/org/org_types.nim create mode 100644 tests/parser/parser_internals.nim create mode 100644 tests/parser/test_parser_internals.nim diff --git a/src/org/org_builder.nim b/src/org/org_builder.nim new file mode 100644 index 0000000..39d9b2c --- /dev/null +++ b/src/org/org_builder.nim @@ -0,0 +1,17 @@ +import ./org_types +import ../parser/parser_types + +type OrgBuilderT* = seq[OrgElement] +type StringBuilder* = Builder[OrgBuilderT] +type OrgBuilderResult* = BuilderResult[OrgBuilderT] + +proc concat*(typeInfo: OrgBuilderT): (seq[ParserToken], seq[OrgBuilderT]) -> seq[OrgBuilderT] = + return proc(xs: seq[ParserToken], ys: seq[OrgBuilderT]): seq[OrgBuilderT] = + return ys & xs.foldl(a & b.tokenStringValue() & seperator, typeInfo) + +proc initStringBuilder*(str: string): StringBuilderResult = + StringBuilderResult + .ok(StringBuilder(( + parser: initParser(str), + tree: newSeq[OrgBuilderT](), + ))) diff --git a/src/org/org_text_delimiter.nim b/src/org/org_text_delimiter.nim index 75a0f3a..1dd67b7 100644 --- a/src/org/org_text_delimiter.nim +++ b/src/org/org_text_delimiter.nim @@ -1,12 +1,16 @@ import std/sugar +import std/collections/sequtils import results import fusion/matching +import ./org_types import ../utils/fp import ../parser/parser_internals import ../parser/parser_types let parseBetweenDelimiter* = proc(delimiterParser: (Parser -> ParserResult)): (Parser -> ParserResult) {.closure.} = - ignore(delimiterParser) + anyUntil(delimiterParser + whitespace) + ignore(delimiterParser) + ignore(delimiterParser) + + anyUntil(delimiterParser + whitespace) + + ignore(delimiterParser) let boldParser* = parseBetweenDelimiter(ch('*')) let italicParser* = parseBetweenDelimiter(ch('/')) @@ -15,5 +19,71 @@ let verbatimParser* = parseBetweenDelimiter(ch('=')) let codeParser* = parseBetweenDelimiter(ch('~')) let strikeThroughParser* = parseBetweenDelimiter(ch('+')) -echo initParser("""_foo bar *_ -""").underlinedParser() + +# until parser != delimiterparser +# takeChar and save to current parser +# else: +# saveTokens to new text block +# saveBlock delimiterparser + +# let delimiterParser* = anyUntil(choice(@[ +# boldParser, +# italicParser, +# underlinedParser, +# verbatimParser, +# codeParser, +# strikeThroughParser, +# ]), newline) + + +type OrgBuilderT* = OrgElement +type OrgBuilder* = Builder[OrgBuilderT] +type OrgBuilderResult* = BuilderResult[OrgBuilderT] + +proc makeBoldTokens*(content: string): OrgBuilderT = + OrgBuilderT( + kind: orgBoldText, + content: content, + ) + +proc makeOrgToken*(orgTokenFn: string -> OrgBuilderT): (seq[ParserToken], seq[OrgBuilderT]) -> seq[OrgBuilderT] = + return proc(parserTokens: seq[ParserToken], builderTokens: seq[OrgBuilderT]): seq[OrgBuilderT] = + return builderTokens & parserTokens.foldl(a & b.tokenStringValue(), "").orgTokenFn() + +proc textParser[T]( + builder: Builder[T], + builderFns: seq[tuple[ + parserFn: Parser -> ParserResult, + tokenFoldFn: (seq[ParserToken], seq[T]) -> seq[T], + ]], + stopFn = newline, +): BuilderResult[T] = + let (parser, tree) = builder + + var parserAcc: ParserResult = parser.ok() + var builderAcc: Builder = builder + + while parser.isOk() and parserAcc.flatMap(stopFn).isErr(): + # Empty the parser tokens as we want to seperate them for the next parser in the sequence + let emptyParser = parserAcc.map(emptyTokens) + + # Find the first matching parser and convert it's tokens + # Otherwise leave the raw tokens + var found = false + for builderFn in builderFns: + let (parserFn, tokenFoldFn) = builderFn + let parseResult = emptyParser.flatMap(parserFn) + + if parseResult.isOk(): + let okParser = parseResult.unsafeGet() + + found = true + parserAcc = parseResult + builderAcc = builder.initBuilder( + okParser, + tokenFoldFn(okParser.tokens, builderAcc[1]), + ) + break + + if not found: + parserAcc = parserAcc.flatMap(anyCh) diff --git a/src/org/org_types.nim b/src/org/org_types.nim new file mode 100644 index 0000000..e38ce6b --- /dev/null +++ b/src/org/org_types.nim @@ -0,0 +1,13 @@ +type + orgElementKind* = enum + orgRawText, + orgText, + orgBoldText, + OrgElement* = ref object + children*: seq[OrgElement] + content*: string + + case kind*: orgElementKind + of orgRawText: discard + of orgText: discard + of orgBoldText: discard diff --git a/src/parser/parser_internals.nim b/src/parser/parser_internals.nim index c3137a2..aa30d9b 100644 --- a/src/parser/parser_internals.nim +++ b/src/parser/parser_internals.nim @@ -78,6 +78,8 @@ func ch*(expectedChar: char): (Parser -> ParserResult) {.inline.} = parser: parser, )) +let anyCh* = ch(AllChars) + func str*(s: string): (Parser -> ParserResult) {.inline.} = return func(parser: Parser): ParserResult = var p = parser.ok() @@ -115,8 +117,8 @@ func manyUntil*(acceptFn: Parser -> ParserResult, stopFn: Parser -> ParserResult res = res.flatMap(acceptFn) return res -func anyUntil*(stopFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} = - manyUntil(ch(AllChars), stopFn) +proc anyUntil*(stopFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} = + manyUntil(anyCh, stopFn) func choice*(parsers: seq[Parser -> ParserResult]): (Parser -> ParserResult) {.inline.} = return proc(parser: Parser): ParserResult = diff --git a/src/parser/parser_types.nim b/src/parser/parser_types.nim index adcd5ee..8e38aca 100644 --- a/src/parser/parser_types.nim +++ b/src/parser/parser_types.nim @@ -155,6 +155,7 @@ proc applyParsersSeq*[T]( ]]): BuilderResult[T] = xs.foldl(a.flatMap((x: Builder[T]) => x.applyParsers(b[0], b[1])), builder) + proc foldBuilder*[T, T2]( builderResult: BuilderResult[T], onError: string -> T2, diff --git a/src/parser/utils.nim b/src/parser/utils.nim index 3964f62..405d0ce 100644 --- a/src/parser/utils.nim +++ b/src/parser/utils.nim @@ -19,3 +19,13 @@ proc initStringBuilder*(str: string): StringBuilderResult = parser: initParser(str), tree: newSeq[StringBuilderT](), ))) + +proc fold*[T, E, T2]( + self: Result[T, E], + onError: E -> T2, + onSuccess: T -> T2, +): T2 = + if self.isOk(): + onSuccess(self.unsafeGet()) + else: + onError(self.error()) diff --git a/tests/parser/parser_internals.nim b/tests/parser/parser_internals.nim new file mode 100644 index 0000000..e69de29 diff --git a/tests/parser/test_parser_internals.nim b/tests/parser/test_parser_internals.nim new file mode 100644 index 0000000..416b777 --- /dev/null +++ b/tests/parser/test_parser_internals.nim @@ -0,0 +1,8 @@ +import unittest +import parser/parser_internals +import parser/parser_types + +suite "parser/parser_internals": + test "whitespace": + + echo whitespace(initParser(" ")) == ParserResult.ok(Parser())