Files
org-parser/src/parser/parser.nim

436 lines
12 KiB
Nim

import std/options
import std/strutils
import std/strformat
import std/collections/sequtils
import std/sugar
import std/collections/tables
import results
import fusion/matching
import fp/maybe
import ../utils/str
{.experimental: "caseStmtMacros".}
type
ParserState* = ref object
stream: string
position, lastPosition: int
parserTokenCharValueT* = char
parserTokenStringValueT* = string
parserTokenKeyValuePairValueT* = tuple[k: string, v: string]
ParserTokenKind* = enum
parserTokenChar
parserTokenString
# parserTokenKeyValuePair
ParserToken* = ref object
case kind*: ParserTokenKind
of parserTokenChar:
charValue*: parserTokenCharValueT
of parserTokenString:
stringValue*: parserTokenStringValueT
# of parserTokenKeyValuePair:
# keyValuePairValue*: parserTokenKeyValuePairValueT
Parser* = ref object
state*: ParserState
tokens*: seq[ParserToken]
ParseErrorKind = enum
choiceMismatchErr
charMismatchErr
endOfStringErr
ParserError = ref object
kind: ParseErrorKind
unexpected: string
expected: string
index: int
parser: Parser
ParserResult* = Result[Parser, ParserError]
Builder*[T] = tuple[
parser: Parser,
tree: seq[T]
]
BuilderResult*[T] = Result[Builder[T], (Builder[T], string)]
proc indentKey(x: string, count: int): string =
var y = x.indent(count)
y.delete(0..count - 1)
y
proc tokenStringValue*(x: ParserToken): string =
case x.kind:
of parserTokenChar:
$x.charValue
of parserTokenString:
x.stringValue
proc `$`*(x: ParserToken): string =
&"""ParserToken(
value: {tokenStringValue(x)},
)"""
proc `$`*(x: ParserState): string =
&"""ParserState(
stream: "{x.stream}",
position: {x.position},
lastPosition: {x.lastPosition},
)"""
proc `$`*(x: Parser): string =
&"""Parser(
state: {indentKey($x.state, 2)},
tokens: {indentKey($x.tokens, 2)},
)"""
proc prettyExpectedSet(x: set[char]): string =
case x:
of AllChars:
"AllChars {'\x00'..'\xFF'}"
of Digits:
"Digits {'0'..'9'}"
of HexDigits:
"HexDigits {'0'..'9', 'A'..'F', 'a'..'f'}"
of Letters:
"Letters {'A'..'Z', 'a'..'z'}"
of Newlines:
"Newlines {'\r', '\n'}"
of Whitespace:
"Whitespace {' ', '\t', '\v', '\r', '\n', '\f'}"
else:
$x
proc `$`*(x: ParserError): string =
case x:
of charMismatchErr(expected: @expected, parser: @parser, index: @index, unexpected: @unexpected):
# TODO: Only works for single line right now
let original = parser.state.stream
.deleteAfterNewline(parser.state.position)
let errSpace = " ".repeat(max(0, index))
$index & $parser &
&"""Parsing Error (Character Mismatch Error):
{original}
{errSpace}^ Expected '{expected}' but got '{unexpected}'"""
of choiceMismatchErr(expected: @expected, parser: @parser, index: @index, unexpected: @unexpected):
let original = parser.state.stream
.deleteAfterNewline(parser.state.position)
let errSpace = " ".repeat(max(0, index))
$index & $parser &
&"""Parsing Error (Character Mismatch Error):
{original}
{errSpace}^ Expected '{expected}' but got '{unexpected}'"""
of endOfStringErr(parser: @parser, index: @index):
let original = parser.state.stream
.deleteAfterNewline(parser.state.position)
let errSpace = " ".repeat(max(0, index))
$index & $parser &
&"""Parsing Error (EndOfString Expected):
{original}
{errSpace}^ Expected 'EndOfString' at {index} but got {original.len - 1}"""
else: "ParseError"
func initParserToken(x: char): ParserToken = ParserToken(kind: parserTokenChar, charValue: x)
func initParserToken(x: string): ParserToken = ParserToken(kind: parserTokenString, stringValue: x)
# func initParserToken(x: parserTokenKeyValuePairValueT): ParserToken = ParserToken(kind: parserTokenKeyValuePair, keyValuePairValue: x)
proc initParser*(str: string): Parser =
Parser(
state: ParserState(
stream: str,
position: -1,
lastPosition: 0,
),
tokens: newSeq[ParserToken](),
)
proc initParserResult*(str: string): ParserResult =
ParserResult.ok(initParser(str))
func ch*(expectedChars: set[char]): (Parser -> ParserResult) {.inline.} =
return func(parser: Parser): ParserResult =
let state = parser.state
let newIndex = state.position + 1
if newIndex > (state.stream.len - 1):
return err(ParserError(
kind: endOfStringErr,
expected: &"{expectedChars.prettyExpectedSet()}",
index: newIndex,
parser: parser,
))
else:
let foundChar = state.stream[newIndex]
if foundChar in expectedChars:
return Parser(
state: ParserState(
stream: state.stream,
position: newIndex,
lastPosition: parser.state.position,
),
tokens: parser.tokens & initParserToken(foundChar)
).ok()
else:
return err(ParserError(
kind: charMismatchErr,
unexpected: &"{foundChar}",
expected: &"{expectedChars.prettyExpectedSet()}",
index: newIndex,
parser: parser,
))
func ch*(expectedChar: char): (Parser -> ParserResult) {.inline.} =
return func(parser: Parser): ParserResult =
let state = parser.state
let newIndex = state.position + 1
if newIndex > (state.stream.len - 1):
return err(ParserError(
kind: endOfStringErr,
expected: &"{expectedChar}",
index: newIndex,
parser: parser,
))
else:
let foundChar = state.stream[newIndex]
if expectedChar == foundChar:
return Parser(
state: ParserState(
stream: state.stream,
position: newIndex,
lastPosition: parser.state.position,
),
tokens: parser.tokens & initParserToken(foundChar)
).ok()
else:
return err(ParserError(
kind: charMismatchErr,
unexpected: &"{foundChar}",
expected: &"{expectedChar}",
index: newIndex,
parser: parser,
))
func str*(s: string): (Parser -> ParserResult) {.inline.} =
return func(parser: Parser): ParserResult =
var p = parser.ok()
for c in s.items:
p = p.flatMap(ch(c))
return p
proc endOfStream*(parser: Parser): ParserResult =
let index = parser.state.position + 1
if index == parser.state.stream.len:
ok(parser)
else:
err(ParserError(
kind: endOfStringErr,
expected: &"EndOfString",
index: index,
parser: parser,
))
func ignore*(parserFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
## Parse characters but throw success tokens away
return proc(parser: Parser): ParserResult =
return parserFn(parser)
.map((x: Parser) => Parser(
state: x.state,
tokens: parser.tokens,
))
func flattenParserTokens*(parser: Parser): ParserResult =
return ParserResult.ok(
Parser(
state: parser.state,
tokens: @[
ParserToken(
kind: parserTokenString,
stringValue: parser.tokens.foldl(a & b.tokenStringValue(), "")
)
]
)
)
func emptyTokens*(parser: Parser): Parser =
Parser(
state: parser.state,
tokens: newSeq[ParserToken](),
)
func optional*(parserFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
## Parse characters and ignore failure
return proc(parser: Parser): ParserResult =
let newParser = parserFn(parser)
if newParser.isOk():
newParser
else:
parser.ok()
func manyUntil*(acceptFn: Parser -> ParserResult, stopFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
## Parse characters but throw success tokens away
return proc(parser: Parser): ParserResult =
var res: ParserResult = parser.ok()
while res.isOk() and res.flatMap(stopFn).isErr():
res = res.flatMap(acceptFn)
return res
func anyUntil*(stopFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
manyUntil(ch(AllChars), stopFn)
func choice*(parsers: seq[Parser -> ParserResult]): (Parser -> ParserResult) {.inline.} =
return proc(parser: Parser): ParserResult =
var errors: seq[ParserResult] = newSeq[ParserResult]()
var found = Nothing[ParserResult]()
for fn in parsers:
let fnResult: ParserResult = fn(parser)
if fnResult.isOk():
found = fnResult.just
break
else:
errors = errors & fnResult
return found
.fold(
proc(): ParserResult =
let prettyErrors = errors.map((x: ParserResult) => x.error().expected)
err(ParserError(
kind: choiceMismatchErr,
index: parser.state.position + 1,
expected: &"Choice ({prettyErrors})",
unexpected: errors[0].error().unexpected,
parser: parser,
)),
proc(x: ParserResult): ParserResult = x,
)
proc `+`*(parserFnA: Parser -> ParserResult, parserFnB: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
## Parse characters and ignore failure
return proc(parser: Parser): ParserResult =
parserFnA(parser).flatMap(parserFnB)
proc setErrorExpectedField(err: ParserError, expected: string): ParserError =
ParserError(
kind: err.kind,
unexpected: err.unexpected,
expected: expected,
index: err.index,
parser: err.parser,
)
let newlineParser = choice(@[
ch(NewLines),
endOfStream,
])
proc newline*(parser: Parser): ParserResult =
newlineParser(parser)
.mapErr((x: ParserError) => x.setErrorExpectedField("Newline"))
proc parseSeq*(parser: ParserResult, xs: seq[Parser -> ParserResult]): ParserResult =
xs.foldl(a.flatMap(b), parser)
proc foldTokens*[T](
parserResult: ParserResult,
onError: ParserError -> T,
onSuccess: seq[ParserToken] -> T,
): T =
if parserResult.isOk():
onSuccess(parserResult.unsafeGet().tokens)
else:
let err = parserResult.error()
onError(err)
proc merge[T](t: Builder[T], parser: Parser, tree: seq[T]): Builder[T] =
Builder[T]((
parser,
tree
))
proc mapTree[T](builder: BuilderResult[T], fn: seq[T] -> seq[T]): BuilderResult[T] =
builder.map(proc(b: Builder[T]): Builder[T] = Builder((
parser: b[0],
tree: fn(b[1]),
)))
proc applyParsers*[T](
builder: Builder[T],
parsers: seq[Parser -> ParserResult],
tokenFoldFn: (seq[ParserToken], seq[T]) -> seq[T],
): BuilderResult[T] =
# proc nested(b: Builder[T]): BuilderResult[T] =
let newParser = ParserResult.ok(Parser(
state: builder[0].state,
tokens: @[]
))
.parseSeq(parsers)
newParser
.foldTokens(
(err: ParserError) => BuilderResult[T].err((builder, "foo")),
(newTokens: seq[ParserToken]) => BuilderResult[T].ok(
builder.merge(newParser.unsafeGet(), tokenFoldFn(newTokens, builder[1]))
),
)
proc applyParsersSeq*[T](
builder: BuilderResult[T],
xs: seq[tuple[
parsers: seq[Parser -> ParserResult],
tokenFoldFn: (seq[ParserToken], seq[T]) -> seq[T],
]]): BuilderResult[T] =
xs.foldl(a.flatMap((x: Builder[T]) => x.applyParsers(b[0], b[1])), builder)
proc foldBuilder*[T, T2](
builderResult: BuilderResult[T],
onError: string -> T2,
onSuccess: seq[T] -> T2,
): T =
if builderResult.isOk():
onSuccess(builderResult.unsafeGet().tree)
else:
let err = builderResult.error()
onError(err[1])
when isMainModule:
proc getTokens(x: ParserResult): seq[string] =
x.foldTokens(
proc(err: ParserError): seq[string] =
echo err
@[],
proc(xs: seq[ParserToken]): seq[string] = xs.map((x: ParserToken) => x.tokenStringValue()),
)
proc testParser(x: string, ps: seq[Parser -> ParserResult]): seq[string] =
initParserResult(x).parseSeq(ps).getTokens()
let optionalPrefixParser = @[
optional(ch('_')),
str("ABC")
]
assert: "_ABC".testParser(optionalPrefixParser) == @["_", "A", "B", "C"]
assert: "ABC".testParser(optionalPrefixParser) == @["A", "B", "C"]
let andParser = @[
(ch('A') + ch('B')),
ch('C'),
]
assert: "ABC".testParser(andParser) == @["A", "B", "C"]
let newlineParserTest = @[
str("ABC"),
newline
]
assert "ABC\n".testParser(newlineParserTest) == @["A", "B", "C", "\n"]