From aebd570a373cc8e00b22baf3fc9ee555d256daeb Mon Sep 17 00:00:00 2001 From: Florian Schroedl Date: Thu, 20 Jan 2022 17:00:00 +0100 Subject: [PATCH] Init --- src/nim_org_parse.nim | 69 ++++++++++++++++++++++--- src/test.nim | 103 ++++++++++++++++++++++++++++++++++++++ src/test_cascade.nim | 3 ++ src/types.nim | 22 ++++++++ src/utils/fp.nim | 4 ++ src/utils/parsec_test.nim | 15 ++++++ src/utils/parser.nim | 28 +++++++++++ 7 files changed, 237 insertions(+), 7 deletions(-) create mode 100644 src/test.nim create mode 100644 src/test_cascade.nim create mode 100644 src/types.nim create mode 100644 src/utils/parsec_test.nim create mode 100644 src/utils/parser.nim diff --git a/src/nim_org_parse.nim b/src/nim_org_parse.nim index 57f56b4..5baa356 100644 --- a/src/nim_org_parse.nim +++ b/src/nim_org_parse.nim @@ -3,6 +3,8 @@ import std/re import std/parseutils import std/strformat import std/sugar +import std/strutils +import std/strformat import fusion/matching import fp/maybe import fp/list @@ -12,6 +14,7 @@ import utils/fp type contentT = string type headlingLevelT = int +type tagsT = seq[string] type lineNumberT = int type lineCharT = int @@ -28,6 +31,7 @@ type of Heading: content*: contentT level*: headlingLevelT + tags*: Maybe[tagsT] else: str: string proc `$`*(x: OrgBlock): string = @@ -36,11 +40,16 @@ proc `$`*(x: OrgBlock): string = return &"""OrgBlock( kind: Document, )""" - of Heading(content: @content, level: @level): + of Heading( + content: @content, + level: @level, + tags: @tags, + ): return &"""OrgBlock( kind: Heading, content: {content}, level: {level}, + tags: {tags}, )""" of NotImplemented(str: @str): return &"""OrgBlock( @@ -48,17 +57,47 @@ proc `$`*(x: OrgBlock): string = str: {str}, )""" +proc findHeadlineTagIndex(line: string): Maybe[int] = + line + .just() + .filter(x => x.endsWith(":")) + .map(x => x.rfind({ '\t', ' ' })) + .notNegative() + # Step forward one character and check if it's ':' + .filter(x => line[x + 1] == ':') + proc parseHeadline(line: string): Maybe[OrgBlock] = if line.startsWith("*"): - var token: string - let idx = line.parseWhile(token, validChars = { '*' }) - # Eat the first space character - let content = line[idx + 1 .. ^1] + var stars: string + + let contentIndex = line + .parseWhile(stars, validChars = { '*' }) + .just() + .notNegative() + + let tagIndex = line.findHeadlineTagIndex() + + let res = + case (contentIndex, tagIndex): + of (Some(@contentIndex), Some(@tagIndex)): + ( + line[contentIndex + 1 .. tagIndex], + # Remove the first and last characters as they're so we don't have to remove them after the split + Just(line[tagIndex + 2 .. ^2].split(':')), + ) + else: + ( + line[contentIndex.get() + 1 .. ^1], + Nothing[tagsT]() + ) + + (@content, @tags) := res Just(OrgBlock( kind: Heading, - level: idx, + level: 0, content: content, + tags: tags, )) else: Nothing[OrgBlock]() @@ -76,4 +115,20 @@ proc parseLine(line: string): any = # ) -echo parseLine("**** foo :bar:") +let line = "**** foo: :sdfdsfd:fvf:sdfsd:" + +echo parseLine(line) + + + +# echo line +# .rfind({ '\t', ' ' }) +# .just() +# .notNegative() +# .map(x => x + 1) +# .filter(x => line[x] == ':') +# .map(x => line[x .. ^1]) + +# proc findHeadlineTags(line: string): any = +# if line.endsWith(":"): +# let idx = line.rfind({ '\t', ' ' }) diff --git a/src/test.nim b/src/test.nim new file mode 100644 index 0000000..f48266a --- /dev/null +++ b/src/test.nim @@ -0,0 +1,103 @@ +import std/options +import std/strutils +import std/strformat +import std/collections/sequtils +import results +import fusion/matching +import fp/maybe +import print + +{.experimental: "caseStmtMacros".} + +type + ParsePosition = tuple + column, line, currentLine: int + atNewLine: bool + + ParseError = tuple + unexpected: string + expected: seq[string] + state: ParserState + message: string + + ParserState* = ref object + stream: string + position, lastPosition: int + + Token* = ref object + value: char + + Parser* = ref object + state: ParserState + tokens: seq[Token] + + ParserResult* = Result[Parser, (Parser, string)] + +proc indentKey(x: string, count: int): string = + var y = x.indent(count) + y.delete(0, count - 1) + y + +proc `$`*(x: Token): string = + &"""Token( + value: {x.value}, +)""" + +proc `$`*(x: ParserState): string = + &"""ParserState( + stream: {x.stream}, + position: {x.position}, + lastPosition: {x.lastPosition}, +)""" + +proc `$`*(x: Parser): string = + &"""Parser( + state: {indentKey($x.state, 4)}, + tokens: {x.tokens} + )""" + +proc initParser(str: string): ParserResult = + Parser( + state: ParserState( + stream: str, + position: 0, + lastPosition: 0, + ), + tokens: newSeq[Token](), + ).ok() + +proc ch(parser: Parser, c: char): ParserResult = + let state = parser.state + + if state.stream[0] == c: + Parser( + state: ParserState( + stream: state.stream[1 .. ^1], + position: parser.state.position + 1, + lastPosition: parser.state.position, + ), + tokens: parser.tokens & Token(value: c) + ).ok() + else: + err((parser, "Foo")) + +# type R = Result[int, string] + +# echo R.ok 4 + +echo initParser("Foo") + +# proc parseStars + +# proc parseHeadline(parser: ParserResult[OrgElement]): ParserResult[OrgElement] = +# parser +# .flatMap(parseA) +# # flatMapOptionalToken( +# # content = +# # ) +# # flatMapToken( +# # content = +# # ) +# # flatMapToken( +# # tags +# # ) diff --git a/src/test_cascade.nim b/src/test_cascade.nim new file mode 100644 index 0000000..4e521ce --- /dev/null +++ b/src/test_cascade.nim @@ -0,0 +1,3 @@ +import cascade +import print + diff --git a/src/types.nim b/src/types.nim new file mode 100644 index 0000000..527b3be --- /dev/null +++ b/src/types.nim @@ -0,0 +1,22 @@ +import print + +type + orgElementContent = string + orgElementChildren = seq[OrgElement] + + OrgElementKind = enum + orgDocument, + orgHeadline, + orgText, + + OrgElement = ref object + children*: orgElementChildren + + case kind*: OrgElementKind + of orgHeadline: + level*: int + of orgText: + content: orgElementContent + of orgDocument: discard + +let emptyChildrenSeq: orgElementChildren = newSeq[OrgElement]() diff --git a/src/utils/fp.nim b/src/utils/fp.nim index 7962040..c66f995 100644 --- a/src/utils/fp.nim +++ b/src/utils/fp.nim @@ -14,6 +14,10 @@ proc findMaybeFn*[T, B](fns: seq[T {.nimcall.} -> Maybe[B]], val: T): Maybe[B] = return res return Nothing[B]() +proc notNegative*[int](x: Maybe[int]): Maybe[int] = + ## Maps nil object to nothing + x.filter(i => i >= 0) + when isMainModule: echo @[ (x: int) => (if x == 2: Just("foo") else: Nothing[string]()), diff --git a/src/utils/parsec_test.nim b/src/utils/parsec_test.nim new file mode 100644 index 0000000..832629f --- /dev/null +++ b/src/utils/parsec_test.nim @@ -0,0 +1,15 @@ +import sequtils +import std/sugar +import microparsec +import results + +echo toSeq(1..1000000) + +# let headlineParser = manyTill(ch '*', space) +# .flatMap((stars: seq[char]) => pure(stars)) +# # .flatMap((stars) => manyTill(anyChar, endOfLine) +# # .flatMap(headline => pure(stars, headline)) +# # ) + +# echo headlineParser.parse("*** Headline") +# echo headlineParser.parse("* Headline") diff --git a/src/utils/parser.nim b/src/utils/parser.nim new file mode 100644 index 0000000..7068ae1 --- /dev/null +++ b/src/utils/parser.nim @@ -0,0 +1,28 @@ +import std/parseutils + +proc fastSubstr(s: string; token: var string; start, length: int) = + token.setLen length + for i in 0 ..< length: token[i] = s[i+start] + +proc parseUntilBackwards*(s: string, token: var string, until: string, + start = 0): int {.inline.} = + ## Parses a token and stores it in ``token``. Returns + ## the number of the parsed characters or 0 in case of an error. A token + ## consists of any character that comes before the `until` token. + runnableExamples: + var myToken: string + doAssert parseUntil("Hello World", myToken, "Wor") == 6 + doAssert myToken == "Hello " + doAssert parseUntil("Hello World", myToken, "Wor", 2) == 4 + doAssert myToken == "llo " + var i = s.len + while i > 0: + if until.len > 0 and s[i] == until[until.len - 1]: + var u = 1 + while i+u < s.len and u < until.len and s[i+u] == until[u]: + inc u + if u >= until.len: break + dec(i) + result = i-start + fastSubstr(s, token, start, result) + #token = substr(s, start, i-1)