369 lines
9.9 KiB
Nim
369 lines
9.9 KiB
Nim
import std/options
|
|
import std/strutils
|
|
import std/strformat
|
|
import std/collections/sequtils
|
|
import std/sugar
|
|
import std/collections/tables
|
|
import results
|
|
import fusion/matching
|
|
import fp/maybe
|
|
|
|
{.experimental: "caseStmtMacros".}
|
|
|
|
type
|
|
ParserState* = ref object
|
|
stream: string
|
|
position, lastPosition: int
|
|
|
|
Token* = ref object
|
|
value*: char
|
|
|
|
Parser* = ref object
|
|
state: ParserState
|
|
tokens: seq[Token]
|
|
|
|
ParseErrorKind = enum
|
|
choiceMismatchErr
|
|
charMismatchErr
|
|
endOfStringErr
|
|
ParserError = ref object
|
|
kind: ParseErrorKind
|
|
unexpected: string
|
|
expected: string
|
|
index: int
|
|
parser: Parser
|
|
ParserResult* = Result[Parser, ParserError]
|
|
|
|
Builder*[T] = tuple[
|
|
parser: Parser,
|
|
tree: seq[T]
|
|
]
|
|
BuilderResult*[T] = Result[Builder[T], (Builder[T], string)]
|
|
|
|
proc indentKey(x: string, count: int): string =
|
|
var y = x.indent(count)
|
|
y.delete(0..count - 1)
|
|
y
|
|
|
|
proc `$`*(x: Token): string =
|
|
&"""Token(
|
|
value: {x.value},
|
|
)"""
|
|
|
|
proc `$`*(x: ParserState): string =
|
|
&"""ParserState(
|
|
stream: "{x.stream}",
|
|
position: {x.position},
|
|
lastPosition: {x.lastPosition},
|
|
)"""
|
|
|
|
proc `$`*(x: Parser): string =
|
|
&"""Parser(
|
|
state: {indentKey($x.state, 2)},
|
|
tokens: {indentKey($x.tokens, 2)},
|
|
)"""
|
|
|
|
proc prettyExpectedSet(x: set[char]): string =
|
|
case x:
|
|
of AllChars:
|
|
"AllChars {'\x00'..'\xFF'}"
|
|
of Digits:
|
|
"Digits {'0'..'9'}"
|
|
of HexDigits:
|
|
"HexDigits {'0'..'9', 'A'..'F', 'a'..'f'}"
|
|
of Letters:
|
|
"Letters {'A'..'Z', 'a'..'z'}"
|
|
of Newlines:
|
|
"Newlines {'\r', '\n'}"
|
|
of Whitespace:
|
|
"Whitespace {' ', '\t', '\v', '\r', '\n', '\f'}"
|
|
else:
|
|
$x
|
|
|
|
proc `$`*(x: ParserError): string =
|
|
case x:
|
|
of charMismatchErr(expected: @expected, parser: @parser, index: @index, unexpected: @unexpected):
|
|
# TODO: Only works for single line right now
|
|
let original = parser.state.stream
|
|
let errSpace = " ".repeat(max(0, index))
|
|
|
|
&"""Parsing Error (Character Mismatch Error):
|
|
{original}
|
|
{errSpace}^ Expected '{expected}' but got '{unexpected}'"""
|
|
of choiceMismatchErr(expected: @expected, parser: @parser, index: @index, unexpected: @unexpected):
|
|
let original = parser.state.stream
|
|
let errSpace = " ".repeat(max(0, index))
|
|
|
|
&"""Parsing Error (Character Mismatch Error):
|
|
{original}
|
|
{errSpace}^ Expected '{expected}' but got '{unexpected}'"""
|
|
|
|
of endOfStringErr(parser: @parser, index: @index):
|
|
let original = parser.state.stream
|
|
let errSpace = " ".repeat(max(0, index))
|
|
|
|
&"""Parsing Error (EndOfString Expected):
|
|
{original}
|
|
{errSpace}^ Expected 'EndOfString'"""
|
|
|
|
else: "ParseError"
|
|
|
|
proc initParser*(str: string): Parser =
|
|
Parser(
|
|
state: ParserState(
|
|
stream: str,
|
|
position: -1,
|
|
lastPosition: 0,
|
|
),
|
|
tokens: newSeq[Token](),
|
|
)
|
|
|
|
proc initParserResult*(str: string): ParserResult =
|
|
ParserResult.ok(initParser(str))
|
|
|
|
func ch*(expectedChars: set[char]): (Parser -> ParserResult) {.inline.} =
|
|
return func(parser: Parser): ParserResult =
|
|
let state = parser.state
|
|
let newIndex = state.position + 1
|
|
|
|
if newIndex > (state.stream.len - 1):
|
|
return err(ParserError(
|
|
kind: endOfStringErr,
|
|
expected: &"{expectedChars.prettyExpectedSet()}",
|
|
index: newIndex,
|
|
parser: parser,
|
|
))
|
|
else:
|
|
let foundChar = state.stream[newIndex]
|
|
if foundChar in expectedChars:
|
|
return Parser(
|
|
state: ParserState(
|
|
stream: state.stream,
|
|
position: newIndex,
|
|
lastPosition: parser.state.position,
|
|
),
|
|
tokens: parser.tokens & Token(value: foundChar)
|
|
).ok()
|
|
else:
|
|
return err(ParserError(
|
|
kind: charMismatchErr,
|
|
unexpected: &"{foundChar}",
|
|
expected: &"{expectedChars.prettyExpectedSet()}",
|
|
index: newIndex,
|
|
parser: parser,
|
|
))
|
|
|
|
func ch*(expectedChar: char): (Parser -> ParserResult) {.inline.} =
|
|
return func(parser: Parser): ParserResult =
|
|
let state = parser.state
|
|
let newIndex = state.position + 1
|
|
|
|
if newIndex > (state.stream.len - 1):
|
|
return err(ParserError(
|
|
kind: endOfStringErr,
|
|
expected: &"{expectedChar}",
|
|
index: newIndex,
|
|
parser: parser,
|
|
))
|
|
else:
|
|
let foundChar = state.stream[newIndex]
|
|
if expectedChar == foundChar:
|
|
return Parser(
|
|
state: ParserState(
|
|
stream: state.stream,
|
|
position: newIndex,
|
|
lastPosition: parser.state.position,
|
|
),
|
|
tokens: parser.tokens & Token(value: foundChar)
|
|
).ok()
|
|
else:
|
|
return err(ParserError(
|
|
kind: charMismatchErr,
|
|
unexpected: &"{foundChar}",
|
|
expected: &"{expectedChar}",
|
|
index: newIndex,
|
|
parser: parser,
|
|
))
|
|
|
|
func str*(s: string): (Parser -> ParserResult) {.inline.} =
|
|
return func(parser: Parser): ParserResult =
|
|
var p = parser.ok()
|
|
for c in s.items:
|
|
p = p.flatMap(ch(c))
|
|
return p
|
|
|
|
proc endOfStream*(parser: Parser): ParserResult =
|
|
if parser.state.position == parser.state.stream.len - 1:
|
|
ok(parser)
|
|
else:
|
|
err(ParserError(
|
|
kind: endOfStringErr,
|
|
expected: &"EndOfString",
|
|
index: parser.state.position,
|
|
parser: parser,
|
|
))
|
|
|
|
proc newline*(parser: Parser): ParserResult =
|
|
parser
|
|
.endOfStream()
|
|
.flatMap(ch(NewLines))
|
|
|
|
func ignore*(parserFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
|
|
## Parse characters but throw success tokens away
|
|
return proc(parser: Parser): ParserResult =
|
|
return parserFn(parser)
|
|
.map((x: Parser) => Parser(
|
|
state: x.state,
|
|
tokens: parser.tokens,
|
|
))
|
|
|
|
func optional*(parserFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
|
|
## Parse characters and ignore failure
|
|
return proc(parser: Parser): ParserResult =
|
|
let newParser = parserFn(parser)
|
|
|
|
if newParser.isOk():
|
|
newParser
|
|
else:
|
|
parser.ok()
|
|
|
|
func manyUntil*(acceptFn: Parser -> ParserResult, stopFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
|
|
## Parse characters but throw success tokens away
|
|
return proc(parser: Parser): ParserResult =
|
|
var res: ParserResult = acceptFn(parser)
|
|
while res.isOk() and res.flatMap(stopFn).isErr():
|
|
res = res.flatMap(acceptFn)
|
|
return res
|
|
|
|
func anyUntil*(stopFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
|
|
manyUntil(ch(AllChars), stopFn)
|
|
|
|
func choice*(parsers: seq[Parser -> ParserResult]): (Parser -> ParserResult) {.inline.} =
|
|
return proc(parser: Parser): ParserResult =
|
|
var errors: seq[ParserResult] = newSeq[ParserResult]()
|
|
var found = Nothing[ParserResult]()
|
|
|
|
for fn in parsers:
|
|
let fnResult: ParserResult = fn(parser)
|
|
|
|
if fnResult.isOk():
|
|
found = fnResult.just
|
|
break
|
|
else:
|
|
errors = errors & fnResult
|
|
|
|
return found
|
|
.fold(
|
|
proc(): ParserResult =
|
|
let prettyErrors = errors.map((x: ParserResult) => x.error().expected)
|
|
err(ParserError(
|
|
kind: choiceMismatchErr,
|
|
expected: &"Choice ({prettyErrors})",
|
|
unexpected: errors[0].error().unexpected,
|
|
parser: parser,
|
|
)),
|
|
proc(x: ParserResult): ParserResult = x,
|
|
)
|
|
|
|
proc `+`*(parserFnA: Parser -> ParserResult, parserFnB: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
|
|
## Parse characters and ignore failure
|
|
return proc(parser: Parser): ParserResult =
|
|
parserFnA(parser).flatMap(parserFnB)
|
|
|
|
proc parseSeq*(parser: ParserResult, xs: seq[Parser -> ParserResult]): ParserResult =
|
|
xs.foldl(a.flatMap(b), parser)
|
|
|
|
proc foldTokens*[T](
|
|
parserResult: ParserResult,
|
|
onError: ParserError -> T,
|
|
onSuccess: seq[Token] -> T,
|
|
): T =
|
|
if parserResult.isOk():
|
|
onSuccess(parserResult.unsafeGet().tokens)
|
|
else:
|
|
let err = parserResult.error()
|
|
onError(err)
|
|
|
|
proc merge[T](t: Builder[T], parser: Parser, tree: seq[T]): Builder[T] =
|
|
Builder[T]((
|
|
parser,
|
|
tree
|
|
))
|
|
|
|
proc mapTree[T](builder: BuilderResult[T], fn: seq[T] -> seq[T]): BuilderResult[T] =
|
|
builder.map(proc(b: Builder[T]): Builder[T] = Builder((
|
|
parser: b[0],
|
|
tree: fn(b[1]),
|
|
)))
|
|
|
|
proc applyParsers*[T](
|
|
builder: Builder[T],
|
|
parsers: seq[Parser -> ParserResult],
|
|
tokenFoldFn: (seq[Token], seq[T]) -> seq[T],
|
|
): BuilderResult[T] =
|
|
# proc nested(b: Builder[T]): BuilderResult[T] =
|
|
let newParser = ParserResult.ok(Parser(
|
|
state: builder[0].state,
|
|
tokens: @[]
|
|
))
|
|
.parseSeq(parsers)
|
|
|
|
newParser
|
|
.foldTokens(
|
|
(err: ParserError) => BuilderResult[T].err((builder, "foo")),
|
|
(newTokens: seq[Token]) => BuilderResult[T].ok(
|
|
builder.merge(newParser.unsafeGet(), tokenFoldFn(newTokens, builder[1]))
|
|
),
|
|
)
|
|
|
|
proc applyParsersSeq*[T](
|
|
builder: BuilderResult[T],
|
|
xs: seq[tuple[
|
|
parsers: seq[Parser -> ParserResult],
|
|
tokenFoldFn: (seq[Token], seq[T]) -> seq[T],
|
|
]]): BuilderResult[T] =
|
|
xs.foldl(a.flatMap((x: Builder[T]) => x.applyParsers(b[0], b[1])), builder)
|
|
|
|
proc foldBuilder*[T, T2](
|
|
builderResult: BuilderResult[T],
|
|
onError: string -> T2,
|
|
onSuccess: seq[T] -> T2,
|
|
): T =
|
|
if builderResult.isOk():
|
|
onSuccess(builderResult.unsafeGet().tree)
|
|
else:
|
|
let err = builderResult.error()
|
|
onError(err[1])
|
|
|
|
|
|
when isMainModule:
|
|
proc getTokens(x: ParserResult): seq[char] =
|
|
x.foldTokens(
|
|
proc(err: ParserError): seq[char] =
|
|
echo err
|
|
@[],
|
|
proc(xs: seq[Token]): seq[char] = xs.map((x: Token) => x.value),
|
|
)
|
|
|
|
proc testParser(x: string, ps: seq[Parser -> ParserResult]): seq[char] =
|
|
initParserResult(x).parseSeq(ps).getTokens()
|
|
|
|
let optionalPrefixParser = @[
|
|
optional(ch('_')),
|
|
str("ABC")
|
|
]
|
|
assert: "_ABC".testParser(optionalPrefixParser) == @['_', 'A', 'B', 'C']
|
|
assert: "ABC".testParser(optionalPrefixParser) == @['A', 'B', 'C']
|
|
|
|
let andParser = @[
|
|
(ch('A') + ch('B')),
|
|
ch('C'),
|
|
]
|
|
assert: "ABC".testParser(andParser) == @['A', 'B', 'C']
|
|
|
|
let newlineParser = @[
|
|
str("ABC"),
|
|
newline
|
|
]
|
|
echo "ABC".testParser(newlineParser)
|