Files
org-parser/src/parser/parser.nim
2022-02-07 11:45:43 +01:00

369 lines
9.9 KiB
Nim

import std/options
import std/strutils
import std/strformat
import std/collections/sequtils
import std/sugar
import std/collections/tables
import results
import fusion/matching
import fp/maybe
{.experimental: "caseStmtMacros".}
type
ParserState* = ref object
stream: string
position, lastPosition: int
Token* = ref object
value*: char
Parser* = ref object
state: ParserState
tokens: seq[Token]
ParseErrorKind = enum
choiceMismatchErr
charMismatchErr
endOfStringErr
ParserError = ref object
kind: ParseErrorKind
unexpected: string
expected: string
index: int
parser: Parser
ParserResult* = Result[Parser, ParserError]
Builder*[T] = tuple[
parser: Parser,
tree: seq[T]
]
BuilderResult*[T] = Result[Builder[T], (Builder[T], string)]
proc indentKey(x: string, count: int): string =
var y = x.indent(count)
y.delete(0..count - 1)
y
proc `$`*(x: Token): string =
&"""Token(
value: {x.value},
)"""
proc `$`*(x: ParserState): string =
&"""ParserState(
stream: "{x.stream}",
position: {x.position},
lastPosition: {x.lastPosition},
)"""
proc `$`*(x: Parser): string =
&"""Parser(
state: {indentKey($x.state, 2)},
tokens: {indentKey($x.tokens, 2)},
)"""
proc prettyExpectedSet(x: set[char]): string =
case x:
of AllChars:
"AllChars {'\x00'..'\xFF'}"
of Digits:
"Digits {'0'..'9'}"
of HexDigits:
"HexDigits {'0'..'9', 'A'..'F', 'a'..'f'}"
of Letters:
"Letters {'A'..'Z', 'a'..'z'}"
of Newlines:
"Newlines {'\r', '\n'}"
of Whitespace:
"Whitespace {' ', '\t', '\v', '\r', '\n', '\f'}"
else:
$x
proc `$`*(x: ParserError): string =
case x:
of charMismatchErr(expected: @expected, parser: @parser, index: @index, unexpected: @unexpected):
# TODO: Only works for single line right now
let original = parser.state.stream
let errSpace = " ".repeat(max(0, index))
&"""Parsing Error (Character Mismatch Error):
{original}
{errSpace}^ Expected '{expected}' but got '{unexpected}'"""
of choiceMismatchErr(expected: @expected, parser: @parser, index: @index, unexpected: @unexpected):
let original = parser.state.stream
let errSpace = " ".repeat(max(0, index))
&"""Parsing Error (Character Mismatch Error):
{original}
{errSpace}^ Expected '{expected}' but got '{unexpected}'"""
of endOfStringErr(parser: @parser, index: @index):
let original = parser.state.stream
let errSpace = " ".repeat(max(0, index))
&"""Parsing Error (EndOfString Expected):
{original}
{errSpace}^ Expected 'EndOfString'"""
else: "ParseError"
proc initParser*(str: string): Parser =
Parser(
state: ParserState(
stream: str,
position: -1,
lastPosition: 0,
),
tokens: newSeq[Token](),
)
proc initParserResult*(str: string): ParserResult =
ParserResult.ok(initParser(str))
func ch*(expectedChars: set[char]): (Parser -> ParserResult) {.inline.} =
return func(parser: Parser): ParserResult =
let state = parser.state
let newIndex = state.position + 1
if newIndex > (state.stream.len - 1):
return err(ParserError(
kind: endOfStringErr,
expected: &"{expectedChars.prettyExpectedSet()}",
index: newIndex,
parser: parser,
))
else:
let foundChar = state.stream[newIndex]
if foundChar in expectedChars:
return Parser(
state: ParserState(
stream: state.stream,
position: newIndex,
lastPosition: parser.state.position,
),
tokens: parser.tokens & Token(value: foundChar)
).ok()
else:
return err(ParserError(
kind: charMismatchErr,
unexpected: &"{foundChar}",
expected: &"{expectedChars.prettyExpectedSet()}",
index: newIndex,
parser: parser,
))
func ch*(expectedChar: char): (Parser -> ParserResult) {.inline.} =
return func(parser: Parser): ParserResult =
let state = parser.state
let newIndex = state.position + 1
if newIndex > (state.stream.len - 1):
return err(ParserError(
kind: endOfStringErr,
expected: &"{expectedChar}",
index: newIndex,
parser: parser,
))
else:
let foundChar = state.stream[newIndex]
if expectedChar == foundChar:
return Parser(
state: ParserState(
stream: state.stream,
position: newIndex,
lastPosition: parser.state.position,
),
tokens: parser.tokens & Token(value: foundChar)
).ok()
else:
return err(ParserError(
kind: charMismatchErr,
unexpected: &"{foundChar}",
expected: &"{expectedChar}",
index: newIndex,
parser: parser,
))
func str*(s: string): (Parser -> ParserResult) {.inline.} =
return func(parser: Parser): ParserResult =
var p = parser.ok()
for c in s.items:
p = p.flatMap(ch(c))
return p
proc endOfStream*(parser: Parser): ParserResult =
if parser.state.position == parser.state.stream.len - 1:
ok(parser)
else:
err(ParserError(
kind: endOfStringErr,
expected: &"EndOfString",
index: parser.state.position,
parser: parser,
))
proc newline*(parser: Parser): ParserResult =
parser
.endOfStream()
.flatMap(ch(NewLines))
func ignore*(parserFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
## Parse characters but throw success tokens away
return proc(parser: Parser): ParserResult =
return parserFn(parser)
.map((x: Parser) => Parser(
state: x.state,
tokens: parser.tokens,
))
func optional*(parserFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
## Parse characters and ignore failure
return proc(parser: Parser): ParserResult =
let newParser = parserFn(parser)
if newParser.isOk():
newParser
else:
parser.ok()
func manyUntil*(acceptFn: Parser -> ParserResult, stopFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
## Parse characters but throw success tokens away
return proc(parser: Parser): ParserResult =
var res: ParserResult = acceptFn(parser)
while res.isOk() and res.flatMap(stopFn).isErr():
res = res.flatMap(acceptFn)
return res
func anyUntil*(stopFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
manyUntil(ch(AllChars), stopFn)
func choice*(parsers: seq[Parser -> ParserResult]): (Parser -> ParserResult) {.inline.} =
return proc(parser: Parser): ParserResult =
var errors: seq[ParserResult] = newSeq[ParserResult]()
var found = Nothing[ParserResult]()
for fn in parsers:
let fnResult: ParserResult = fn(parser)
if fnResult.isOk():
found = fnResult.just
break
else:
errors = errors & fnResult
return found
.fold(
proc(): ParserResult =
let prettyErrors = errors.map((x: ParserResult) => x.error().expected)
err(ParserError(
kind: choiceMismatchErr,
expected: &"Choice ({prettyErrors})",
unexpected: errors[0].error().unexpected,
parser: parser,
)),
proc(x: ParserResult): ParserResult = x,
)
proc `+`*(parserFnA: Parser -> ParserResult, parserFnB: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
## Parse characters and ignore failure
return proc(parser: Parser): ParserResult =
parserFnA(parser).flatMap(parserFnB)
proc parseSeq*(parser: ParserResult, xs: seq[Parser -> ParserResult]): ParserResult =
xs.foldl(a.flatMap(b), parser)
proc foldTokens*[T](
parserResult: ParserResult,
onError: ParserError -> T,
onSuccess: seq[Token] -> T,
): T =
if parserResult.isOk():
onSuccess(parserResult.unsafeGet().tokens)
else:
let err = parserResult.error()
onError(err)
proc merge[T](t: Builder[T], parser: Parser, tree: seq[T]): Builder[T] =
Builder[T]((
parser,
tree
))
proc mapTree[T](builder: BuilderResult[T], fn: seq[T] -> seq[T]): BuilderResult[T] =
builder.map(proc(b: Builder[T]): Builder[T] = Builder((
parser: b[0],
tree: fn(b[1]),
)))
proc applyParsers*[T](
builder: Builder[T],
parsers: seq[Parser -> ParserResult],
tokenFoldFn: (seq[Token], seq[T]) -> seq[T],
): BuilderResult[T] =
# proc nested(b: Builder[T]): BuilderResult[T] =
let newParser = ParserResult.ok(Parser(
state: builder[0].state,
tokens: @[]
))
.parseSeq(parsers)
newParser
.foldTokens(
(err: ParserError) => BuilderResult[T].err((builder, "foo")),
(newTokens: seq[Token]) => BuilderResult[T].ok(
builder.merge(newParser.unsafeGet(), tokenFoldFn(newTokens, builder[1]))
),
)
proc applyParsersSeq*[T](
builder: BuilderResult[T],
xs: seq[tuple[
parsers: seq[Parser -> ParserResult],
tokenFoldFn: (seq[Token], seq[T]) -> seq[T],
]]): BuilderResult[T] =
xs.foldl(a.flatMap((x: Builder[T]) => x.applyParsers(b[0], b[1])), builder)
proc foldBuilder*[T, T2](
builderResult: BuilderResult[T],
onError: string -> T2,
onSuccess: seq[T] -> T2,
): T =
if builderResult.isOk():
onSuccess(builderResult.unsafeGet().tree)
else:
let err = builderResult.error()
onError(err[1])
when isMainModule:
proc getTokens(x: ParserResult): seq[char] =
x.foldTokens(
proc(err: ParserError): seq[char] =
echo err
@[],
proc(xs: seq[Token]): seq[char] = xs.map((x: Token) => x.value),
)
proc testParser(x: string, ps: seq[Parser -> ParserResult]): seq[char] =
initParserResult(x).parseSeq(ps).getTokens()
let optionalPrefixParser = @[
optional(ch('_')),
str("ABC")
]
assert: "_ABC".testParser(optionalPrefixParser) == @['_', 'A', 'B', 'C']
assert: "ABC".testParser(optionalPrefixParser) == @['A', 'B', 'C']
let andParser = @[
(ch('A') + ch('B')),
ch('C'),
]
assert: "ABC".testParser(andParser) == @['A', 'B', 'C']
let newlineParser = @[
str("ABC"),
newline
]
echo "ABC".testParser(newlineParser)