Structure
This commit is contained in:
338
src/parser/parser.nim
Normal file
338
src/parser/parser.nim
Normal file
@@ -0,0 +1,338 @@
|
||||
import std/options
|
||||
import std/strutils
|
||||
import std/strformat
|
||||
import std/collections/sequtils
|
||||
import std/sugar
|
||||
import std/collections/tables
|
||||
import results
|
||||
import fusion/matching
|
||||
import fp/maybe
|
||||
|
||||
{.experimental: "caseStmtMacros".}
|
||||
|
||||
type
|
||||
ParserState* = ref object
|
||||
stream: string
|
||||
position, lastPosition: int
|
||||
|
||||
Token* = ref object
|
||||
value*: char
|
||||
|
||||
Parser* = ref object
|
||||
state: ParserState
|
||||
tokens: seq[Token]
|
||||
|
||||
ParseErrorKind = enum
|
||||
choiceMismatchErr
|
||||
charMismatchErr
|
||||
endOfStringErr
|
||||
ParserError = ref object
|
||||
kind: ParseErrorKind
|
||||
unexpected: string
|
||||
expected: string
|
||||
index: int
|
||||
parser: Parser
|
||||
ParserResult* = Result[Parser, ParserError]
|
||||
|
||||
Builder*[T] = tuple[
|
||||
parser: Parser,
|
||||
tree: seq[T]
|
||||
]
|
||||
BuilderResult*[T] = Result[Builder[T], (Builder[T], string)]
|
||||
|
||||
proc indentKey(x: string, count: int): string =
|
||||
var y = x.indent(count)
|
||||
y.delete(0..count - 1)
|
||||
y
|
||||
|
||||
proc `$`*(x: Token): string =
|
||||
&"""Token(
|
||||
value: {x.value},
|
||||
)"""
|
||||
|
||||
proc `$`*(x: ParserState): string =
|
||||
&"""ParserState(
|
||||
stream: "{x.stream}",
|
||||
position: {x.position},
|
||||
lastPosition: {x.lastPosition},
|
||||
)"""
|
||||
|
||||
proc `$`*(x: Parser): string =
|
||||
&"""Parser(
|
||||
state: {indentKey($x.state, 2)},
|
||||
tokens: {indentKey($x.tokens, 2)},
|
||||
)"""
|
||||
|
||||
proc prettyExpectedSet(x: set[char]): string =
|
||||
case x:
|
||||
of AllChars:
|
||||
"AllChars {'\x00'..'\xFF'}"
|
||||
of Digits:
|
||||
"Digits {'0'..'9'}"
|
||||
of HexDigits:
|
||||
"HexDigits {'0'..'9', 'A'..'F', 'a'..'f'}"
|
||||
of Letters:
|
||||
"Letters {'A'..'Z', 'a'..'z'}"
|
||||
of Newlines:
|
||||
"Newlines {'\r', '\n'}"
|
||||
of Whitespace:
|
||||
"Whitespace {' ', '\t', '\v', '\r', '\n', '\f'}"
|
||||
else:
|
||||
$x
|
||||
|
||||
proc `$`*(x: ParserError): string =
|
||||
case x:
|
||||
of charMismatchErr(expected: @expected, parser: @parser, index: @index, unexpected: @unexpected):
|
||||
# TODO: Only works for single line right now
|
||||
let original = parser.state.stream
|
||||
let errSpace = " ".repeat(max(0, index))
|
||||
|
||||
&"""Parsing Error (Character Mismatch Error):
|
||||
{original}
|
||||
{errSpace}^ Expected '{expected}' but got '{unexpected}'"""
|
||||
of choiceMismatchErr(expected: @expected, parser: @parser, index: @index, unexpected: @unexpected):
|
||||
let original = parser.state.stream
|
||||
let errSpace = " ".repeat(max(0, index))
|
||||
|
||||
&"""Parsing Error (Character Mismatch Error):
|
||||
{original}
|
||||
{errSpace}^ Expected '{expected}' but got '{unexpected}'"""
|
||||
else: "ParseError"
|
||||
|
||||
proc initParser*(str: string): Parser =
|
||||
Parser(
|
||||
state: ParserState(
|
||||
stream: str,
|
||||
position: -1,
|
||||
lastPosition: 0,
|
||||
),
|
||||
tokens: newSeq[Token](),
|
||||
)
|
||||
|
||||
func ch*(expectedChars: set[char]): (Parser -> ParserResult) {.inline.} =
|
||||
return func(parser: Parser): ParserResult =
|
||||
let state = parser.state
|
||||
let newIndex = state.position + 1
|
||||
|
||||
if newIndex > (state.stream.len - 1):
|
||||
return err(ParserError(
|
||||
kind: endOfStringErr,
|
||||
expected: &"{expectedChars.prettyExpectedSet()}",
|
||||
index: newIndex,
|
||||
parser: parser,
|
||||
))
|
||||
else:
|
||||
let foundChar = state.stream[newIndex]
|
||||
if foundChar in expectedChars:
|
||||
return Parser(
|
||||
state: ParserState(
|
||||
stream: state.stream,
|
||||
position: newIndex,
|
||||
lastPosition: parser.state.position,
|
||||
),
|
||||
tokens: parser.tokens & Token(value: foundChar)
|
||||
).ok()
|
||||
else:
|
||||
return err(ParserError(
|
||||
kind: charMismatchErr,
|
||||
unexpected: &"{foundChar}",
|
||||
expected: &"{expectedChars.prettyExpectedSet()}",
|
||||
index: newIndex,
|
||||
parser: parser,
|
||||
))
|
||||
|
||||
func ch*(expectedChar: char): (Parser -> ParserResult) {.inline.} =
|
||||
return func(parser: Parser): ParserResult =
|
||||
let state = parser.state
|
||||
let newIndex = state.position + 1
|
||||
|
||||
if newIndex > (state.stream.len - 1):
|
||||
return err(ParserError(
|
||||
kind: endOfStringErr,
|
||||
expected: &"{expectedChar}",
|
||||
index: newIndex,
|
||||
parser: parser,
|
||||
))
|
||||
else:
|
||||
let foundChar = state.stream[newIndex]
|
||||
if expectedChar == foundChar:
|
||||
return Parser(
|
||||
state: ParserState(
|
||||
stream: state.stream,
|
||||
position: newIndex,
|
||||
lastPosition: parser.state.position,
|
||||
),
|
||||
tokens: parser.tokens & Token(value: foundChar)
|
||||
).ok()
|
||||
else:
|
||||
return err(ParserError(
|
||||
kind: charMismatchErr,
|
||||
unexpected: &"{foundChar}",
|
||||
expected: &"{expectedChar}",
|
||||
index: newIndex,
|
||||
parser: parser,
|
||||
))
|
||||
|
||||
func str*(s: string): (Parser -> ParserResult) {.inline.} =
|
||||
return func(parser: Parser): ParserResult =
|
||||
var p = parser.ok()
|
||||
for c in s.items:
|
||||
p = p.flatMap(ch(c))
|
||||
return p
|
||||
|
||||
proc endOfStream*(parser: Parser): ParserResult =
|
||||
if parser.state.position == parser.state.stream.len - 1:
|
||||
ok(parser)
|
||||
else:
|
||||
err(ParserError(
|
||||
kind: endOfStringErr,
|
||||
expected: &"EndOfString",
|
||||
index: parser.state.position,
|
||||
parser: parser,
|
||||
))
|
||||
|
||||
func ignore*(parserFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
|
||||
## Parse characters but throw success tokens away
|
||||
return proc(parser: Parser): ParserResult =
|
||||
return parserFn(parser)
|
||||
.map((x: Parser) => Parser(
|
||||
state: x.state,
|
||||
tokens: parser.tokens,
|
||||
))
|
||||
|
||||
func manyUntil*(acceptFn: Parser -> ParserResult, stopFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
|
||||
## Parse characters but throw success tokens away
|
||||
return proc(parser: Parser): ParserResult =
|
||||
var res: ParserResult = acceptFn(parser)
|
||||
while res.isOk() and res.flatMap(stopFn).isErr():
|
||||
res = res.flatMap(acceptFn)
|
||||
return res
|
||||
|
||||
func anyUntil*(stopFn: Parser -> ParserResult): (Parser -> ParserResult) {.inline.} =
|
||||
manyUntil(ch(AllChars), stopFn)
|
||||
|
||||
func choice*(parsers: seq[Parser -> ParserResult]): (Parser -> ParserResult) {.inline.} =
|
||||
return proc(parser: Parser): ParserResult =
|
||||
var errors: seq[ParserResult] = newSeq[ParserResult]()
|
||||
var found = Nothing[ParserResult]()
|
||||
|
||||
for fn in parsers:
|
||||
let fnResult: ParserResult = fn(parser)
|
||||
|
||||
if fnResult.isOk():
|
||||
found = fnResult.just
|
||||
break
|
||||
else:
|
||||
errors = errors & fnResult
|
||||
|
||||
return found
|
||||
.fold(
|
||||
proc(): ParserResult =
|
||||
let prettyErrors = errors.map((x: ParserResult) => x.error().expected)
|
||||
err(ParserError(
|
||||
kind: choiceMismatchErr,
|
||||
expected: &"Choice ({prettyErrors})",
|
||||
unexpected: errors[0].error().unexpected,
|
||||
parser: parser,
|
||||
)),
|
||||
proc(x: ParserResult): ParserResult = x,
|
||||
)
|
||||
|
||||
proc parseSeq*(parser: ParserResult, xs: seq[Parser -> ParserResult]): ParserResult =
|
||||
xs.foldl(a.flatMap(b), parser)
|
||||
|
||||
proc foldTokens*[T](
|
||||
parserResult: ParserResult,
|
||||
onError: ParserError -> T,
|
||||
onSuccess: seq[Token] -> T,
|
||||
): T =
|
||||
if parserResult.isOk():
|
||||
onSuccess(parserResult.unsafeGet().tokens)
|
||||
else:
|
||||
let err = parserResult.error()
|
||||
onError(err)
|
||||
|
||||
proc merge[T](t: Builder[T], parser: Parser, tree: seq[T]): Builder[T] =
|
||||
Builder[T]((
|
||||
parser,
|
||||
tree
|
||||
))
|
||||
|
||||
proc mapTree[T](builder: BuilderResult[T], fn: seq[T] -> seq[T]): BuilderResult[T] =
|
||||
builder.map(proc(b: Builder[T]): Builder[T] = Builder((
|
||||
parser: b[0],
|
||||
tree: fn(b[1]),
|
||||
)))
|
||||
|
||||
proc applyParsers*[T](
|
||||
builder: Builder[T],
|
||||
parsers: seq[Parser -> ParserResult],
|
||||
tokenFoldFn: (seq[Token], seq[T]) -> seq[T],
|
||||
): BuilderResult[T] =
|
||||
# proc nested(b: Builder[T]): BuilderResult[T] =
|
||||
let newParser = ParserResult.ok(Parser(
|
||||
state: builder[0].state,
|
||||
tokens: @[]
|
||||
))
|
||||
.parseSeq(parsers)
|
||||
|
||||
newParser
|
||||
.foldTokens(
|
||||
(err: ParserError) => BuilderResult[T].err((builder, "foo")),
|
||||
(newTokens: seq[Token]) => BuilderResult[T].ok(
|
||||
builder.merge(newParser.unsafeGet(), tokenFoldFn(newTokens, builder[1]))
|
||||
),
|
||||
)
|
||||
|
||||
proc applyParsersSeq*[T](
|
||||
builder: BuilderResult[T],
|
||||
xs: seq[tuple[
|
||||
parsers: seq[Parser -> ParserResult],
|
||||
tokenFoldFn: (seq[Token], seq[T]) -> seq[T],
|
||||
]]): BuilderResult[T] =
|
||||
xs.foldl(a.flatMap((x: Builder[T]) => x.applyParsers(b[0], b[1])), builder)
|
||||
|
||||
proc foldBuilder*[T, T2](
|
||||
builderResult: BuilderResult[T],
|
||||
onError: string -> T2,
|
||||
onSuccess: seq[T] -> T2,
|
||||
): T =
|
||||
if builderResult.isOk():
|
||||
onSuccess(builderResult.unsafeGet().tree)
|
||||
else:
|
||||
let err = builderResult.error()
|
||||
onError(err[1])
|
||||
|
||||
when isMainModule:
|
||||
let parseHeadingStars = @[
|
||||
manyUntil(ch('*'), ch(' ')),
|
||||
ignore(ch(' '))
|
||||
]
|
||||
|
||||
let parseHeadingText = @[
|
||||
anyUntil(endOfStream),
|
||||
]
|
||||
|
||||
type StringBuilderT = string
|
||||
type StringBuilder = Builder[StringBuilderT]
|
||||
type StringBuilderResult = BuilderResult[StringBuilderT]
|
||||
proc stringConcat(typeInfo: StringBuilderT):
|
||||
(seq[Token], seq[StringBuilderT]) -> seq[StringBuilderT] =
|
||||
return proc(xs: seq[Token], ys: seq[StringBuilderT]): seq[StringBuilderT] =
|
||||
return ys & xs.foldl(a & b.value, typeInfo)
|
||||
|
||||
let sampleBuilder = StringBuilderResult
|
||||
.ok(StringBuilder((
|
||||
parser: initParser("**** Some stars"),
|
||||
tree: newSeq[StringBuilderT](),
|
||||
)))
|
||||
.applyParsersSeq(@[
|
||||
(parseHeadingStars, stringConcat("Stars: ")),
|
||||
(parseHeadingText, stringConcat("Text: "))
|
||||
])
|
||||
.foldBuilder(
|
||||
err => &"Error Parsing: {err}",
|
||||
xs => "Parser Succesfull:\n" & xs.join("\n"),
|
||||
)
|
||||
|
||||
echo sampleBuilder
|
||||
Reference in New Issue
Block a user