Files
org-parser/src_v2/parser/parser_api.nim
Florian Schroedl 0ba76a1203 Add peek functions
2022-08-28 18:19:53 +02:00

402 lines
14 KiB
Nim

import std/[
options,
strutils,
strformat,
collections/sequtils,
sugar,
]
import fp/[
maybe,
resultM,
]
import ../utils/str
import ./parser_types
# -- Utilities
proc isStreamCompleted*(parser: Parser): bool =
## Check if the `parser` index is at/over the stream length.
parser.state.position >= parser.state.stream.len - 1
proc isStreamCompleted*(parserResult: ParserResult): bool =
## Check if the `parserResult.state` index is at/over the stream length.
parserResult.fold(
err => false,
isStreamCompleted,
)
# -- Parsing functions
proc ch*(expectedChars: set[char]): parserFnT {.inline.} =
## Create parser function with set of `expectedChar`.
## When the parser has the character set at the following index return `ParserResult.ok`.
return proc(parser: Parser): ParserResult =
let state = parser.state
let newIndex = state.position + 1
if newIndex > (state.stream.len - 1):
return err(ParserError(
kind: endOfStringErr,
expected: expectedChars.prettyExpectedSet(),
index: newIndex,
parser: parser,
))
else:
let foundChar = state.stream[newIndex]
if foundChar in expectedChars:
return Parser(
state: ParserState(
stream: state.stream,
position: newIndex,
lastPosition: parser.state.position,
),
tokens: parser.tokens & initParserToken(foundChar)
).ok()
else:
return err(ParserError(
kind: charMismatchErr,
unexpected: $foundChar,
expected: expectedChars.prettyExpectedSet(),
index: newIndex,
parser: parser,
))
proc ch*(expectedChar: char): parserFnT {.inline.} =
## Creates parser function with `expectedChar`
## When the parser has the character at the following index return `ParserResult.ok`
return proc(parser: Parser): ParserResult =
let state = parser.state
let newIndex = state.position + 1
if newIndex > (state.stream.len - 1):
return err(ParserError(
kind: endOfStringErr,
expected: &"{expectedChar}",
index: newIndex,
parser: parser,
))
else:
let foundChar = state.stream[newIndex]
if expectedChar == foundChar:
return Parser(
state: ParserState(
stream: state.stream,
position: newIndex,
lastPosition: parser.state.position,
),
tokens: parser.tokens & initParserToken(foundChar)
).ok()
else:
return err(ParserError(
kind: charMismatchErr,
unexpected: &"{foundChar}",
expected: &"{expectedChar}",
index: newIndex,
parser: parser,
))
let anyCh* = ch(AllChars)
let digit* = ch(Digits)
let letter* = ch(Letters)
let space* = ch(' ')
let whitespace* = ch(Whitespace)
let newline* = ch(Newlines)
proc str*(expectedString: string): parserFnT {.inline.} =
## Creates parser function with `expectedString`
## When the parser has the string at the following index return `ParserResult.ok`
return proc(parser: Parser): ParserResult =
var res: ParserResult = parser.ok()
for c in expectedString.items:
if res.isErr: break
res = res.flatMap(ch(c))
return res
proc startOfStream*(parser: Parser): ParserResult =
## Check if the following character is the end of the stream.
## Errors when the end of stream was already reached.
let position = parser.state.position
if position == 0 or position == -1:
ok(parser)
else:
err(ParserError(
kind: startOfStringErr,
expected: "startOfStream",
index: position,
parser: parser,
))
proc endOfStream*(parser: Parser): ParserResult =
## Check if the following character is the end of the stream.
## Errors when the end of stream was already reached.
let state = parser.state
let newIndex = state.position + 1
if newIndex == parser.state.stream.len:
ok(parser)
elif newIndex > parser.state.stream.len:
return err(ParserError(
kind: endOfStringErr,
expected: "endOfStream",
index: newIndex,
parser: parser,
))
else:
let foundChar = state.stream[newIndex]
err(ParserError(
kind: charMismatchErr,
unexpected: $foundChar,
expected: "endOfStream",
index: newIndex,
parser: parser,
))
# -- Parsing API
proc plus*(parserFnA: parserFnT, parserFnB: parserFnT): parserFnT {.inline.} =
## Creates parser function with combining two parser functions `parserFnA` plus `parserFnB`
return proc(parser: Parser): ParserResult =
parserFnA(parser).flatMap(parserFnB)
proc `+`*(parserFnA: parserFnT, parserFnB: parserFnT): parserFnT {.inline.} =
## Creates infix parser function with combining two parser functions `parserFnA` plus `parserFnB`
return proc(parser: Parser): ParserResult =
parserFnA(parser).flatMap(parserFnB)
proc optional*(parserFn: parserFnT): parserFnT {.inline.} =
## Creates parser function with a nested `parserFn`:
## Continues on succesful parser
## Ignores failing parsers
return proc(parser: Parser): ParserResult =
let newParser = parserFn(parser)
if newParser.isOk():
newParser
else:
parser.ok()
proc ignore*(parserFn: parserFnT): parserFnT {.inline.} =
## Creates parser function with a nested `parserFn`:
## Parses using the `parserFn` but dont capture the resulting tokens.
return proc(parser: Parser): ParserResult =
return parserFn(parser)
.map((x: Parser) => Parser(
state: x.state,
tokens: parser.tokens,
))
proc peek*(amount: int, parserFn: parserFnT): parserFnT {.inline.} =
## Creates parser function with a nested `parserFn`:
## Parses using the `parserFn` but dont capture the resulting tokens and keep the current position.
return proc(parser: Parser): ParserResult =
let state = parser.state
let newIndex = state.position + amount
let newParser = initParser(
stream = state.stream,
tokens = parser.tokens,
position = newIndex,
lastPosition = state.position,
)
parserFn(newParser)
.map(p => parser)
proc peek1*(parserFn: parserFnT): parserFnT = peek(1, parserFn)
proc peekCurrent*(parserFn: parserFnT): parserFnT = peek(0, parserFn)
proc peekBack1*(parserFn: parserFnT): parserFnT = peek(-1, parserFn)
proc manyUntil*(acceptFn: parserFnT, stopFn: parserFnT): parserFnT {.inline.} =
## Creates parser function with a nested `acceptFn` parser function until the `stopFn` parserFunction is met:
## Parses until the `stopFn` is reached or on an errror.
return proc(parser: Parser): ParserResult =
var res: ParserResult = parser.ok()
while res.isOk() and res.flatMap(stopFn).isErr():
res = res.flatMap(acceptFn)
return res
proc anyUntil*(stopFn: parserFnT): parserFnT {.inline.} =
## Parses any character until the `stopFn` is reached or on an errror.
## Needs at least one character match.
manyUntil(anyCh, stopFn)
proc choice*(parserFns: seq[parserFnT]): parserFnT {.inline} =
## creates parser function that checks any of the `parserFns`.
## Needs one match for a `ParserResult.ok`.
return proc(parser: Parser): ParserResult {.closure.} =
var errors: seq[ParserResult] = newSeq[ParserResult]()
var found = Nothing[ParserResult]()
for fn in parserFns:
let fnResult: ParserResult = fn(parser)
if fnResult.isOk():
found = fnResult.just
break
else:
errors = errors & fnResult
return found
.fold(
proc(): ParserResult =
let prettyErrors = errors.map((x: ParserResult) => x.error().expected)
err(ParserError(
kind: choiceMismatchErr,
index: parser.state.position + 1,
expected: &"Choice ({prettyErrors})",
unexpected: errors[0].error().unexpected,
parser: parser,
)),
proc(x: ParserResult): ParserResult = x,
)
proc following*(parserFns: seq[parserFnT]): parserFnT {.inline.} =
## Checks a sequence of `parserFns`.
## All of them need to be `ParserResult.ok`
return proc(parser: Parser): ParserResult {.closure.} =
parserFns.foldl(a.flatMap(b), parser.ok)
proc between*(startParserFn: parserFnT, stopParserFn: parserFnT): parserFnT -> parserFnT {.inline.} =
## Creates parser function with that matches a `parserFn` between `startParserFn` and `stopParserFn`.
## Ignores the delimiters in the tokens.
## Example:
## between(ch('('), ch(')'))(str("abc")) => Matches (abc)
return proc(parserFn: parserFnT): parserFnT {.closure.} =
ignore(startParserFn) + parserFn + ignore(stopParserFn)
let betweenPair* = proc(delimiterParserFn: parserFnT): parserFnT -> parserFnT {.inline.} =
## Creates parser function with that matches anything between matching `delimiterParserFn`
## Ignores the delimiters in the tokens.
## Example:
## betweenPair(ch('"')(str("abc"))) => Matches "abc"
return proc(parserFn: parserFnT): parserFnT {.closure.} =
ignore(delimiterParserFn) + parserFn + ignore(delimiterParserFn)
proc anyBetween*(startParserFn: parserFnT, stopParserFn: parserFnT): parserFnT {.inline.} =
## Creates parser function with that matches anything between `startParserFn` and `stopParserFn`.
## Ignores the delimiters in the tokens.
## Example:
## anyBetween(ch('('), ch(')')) => Matches (abc)
between(startParserFn, stopParserFn)(anyUntil(stopParserFn))
let anyBetweenPair* = proc(parserFn: parserFnT): parserFnT {.closure.} =
## Creates parser function with that matches anything between matching `parserFns`
## Ignores the delimiters in the tokens.
## Example:
## anyBetweenPair(ch('"')) => Matches "abc"
anyBetween(parserFn, parserFn)
# -- Parsing Aliases
const newlineEolExpectedErr = "NewlineEol"
let newlineOrEolParser = choice(@[
ch(NewLines),
endOfStream,
])
proc newlineOrEol*(parser: Parser): ParserResult =
newlineOrEolParser(parser)
.mapErr((x: ParserError) => x.setErrorExpectedField(newlineEolExpectedErr))
const whitespaceEolExpectedErr = "WhitespaceEol"
let whitespaceOrEolParser = choice(@[
ch(Whitespace),
newlineOrEolParser,
])
proc whitespaceOrEol*(parser: Parser): ParserResult =
whitespaceOrEolParser(parser)
.mapErr((x: ParserError) => x.setErrorExpectedField(whitespaceEolExpectedErr))
# -- Tests
when isMainModule:
let testParser123 = initParserResult("123")
let testAbc1Parser = initParserResult("abc1")
block testParsingFunctions:
let ch1 = ch('1')
# Success
assert testParser123.flatMap(ch1).tokensToString() == "1"
assert testParser123.flatMap(anyCh).tokensToString() == "1"
assert testParser123.flatMap(str("123")).tokensToString() == "123"
# Mismatch
assert testParser123.flatMap(ch('2')).error().kind == charMismatchErr
assert testParser123.flatMap(ch(Letters)).error().kind == charMismatchErr
assert testParser123.flatMap(str("1234")).error().kind == endOfStringErr
assert testParser123.flatMap(str("456")).error().kind == charMismatchErr
# Out of bounds
# assert initParserResult("").flatMap(ch1).error().kind == endOfStringErr
assert initParserResult("1").flatMap(ch1).flatMap(ch1).error().kind == endOfStringErr
# Stream end reached
assert initParserResult("1").flatMap(ch1).isStreamCompleted() == true
assert initParserResult("12").flatMap(ch1).isStreamCompleted() == false
assert initParserResult("").flatMap(ch1).isStreamCompleted() == false
assert testParser123.flatMap(str("123")).isStreamCompleted() == true
# endOfStream
assert testParser123.flatMap(str("123") + endOfStream).tokensToString() == "123"
block testParsingApi:
# plus, +
assert testAbc1Parser.flatMap(str("abc") + ch('1')).tokensToString() == "abc1"
assert testParser123.flatMap(str("12").plus(digit)).tokensToString() == "123"
# optional
assert testParser123.flatMap(optional(ch('1'))).tokensToString() == "1"
assert testParser123.flatMap(optional(ch('2'))).tokensToString() == ""
# ignore
assert testParser123.flatMap(ignore(ch('1'))).tokensToString() == ""
# peek
assert testParser123.flatMap(ch('1') + peekCurrent(ch('2')) + ch('2')).tokensToString() == "12"
assert testParser123.flatMap(ch('1') + peekCurrent(ch('1'))).isErr() == true
assert testParser123.flatMap(peekCurrent(startOfStream)).isOk() == true
assert testParser123.flatMap(ch('1') + peekBack1(startOfStream)).isOk() == true
# manyUntil
assert testAbc1Parser.flatMap(manyUntil(anyCh, digit)).tokensToString() == "abc"
# anyUntil
assert testAbc1Parser.flatMap(anyUntil(digit)).tokensToString() == "abc"
# choice
assert testAbc1Parser.flatMap(choice(@[digit, ch('a')])).tokensToString() == "a"
assert testAbc1Parser.flatMap(choice(@[digit])).error().kind == choiceMismatchErr
# parse
assert testAbc1Parser.flatMap(following(@[ch('a'), str("bc"), digit])).tokensToString() == "abc1"
block testParsingHelpers:
let testParenParser = initParserResult("(123)")
let testQuoteParser = initParserResult("\"123\"")
let testQuote = ch('"')
let testBetweenParen = between(ch('('), ch(')'))
let testBetweenQuotes = betweenPair(ch('"'))
# between
assert testParenParser.flatMap(testBetweenParen(str("123"))).tokensToString() == "123"
assert testQuoteParser.flatMap(testBetweenQuotes(str("123"))).tokensToString() == "123"
# anyBetween
assert testParenParser.flatMap(anyBetween(ch('('), ch(')'))).tokensToString() == "123"
assert testQuoteParser.flatMap(anyBetweenPair(testQuote)).tokensToString() == "123"
block testParsingAliases:
assert initParserResult("").flatMap(newlineOrEol).isOk() == true
assert initParserResult("abc ").flatMap(str("abc") + newlineOrEol).error().expected == newlineEolExpectedErr
assert initParserResult("").flatMap(whitespaceOrEol).isOk() == true
assert initParserResult("abc ").flatMap(str("abc") + whitespaceOrEol + whitespaceOrEol).tokensToString() == "abc "
block testImplementations:
let testSentenceStr = "This is a sentence\n\nFollowing another sentence."
let testSentenceParser = initParserResult(testSentenceStr)
let testSentence1 = following(@[anyUntil(newline), newline, newlineOrEol])
.plus(manyUntil(choice(@[letter, whitespace]), ch('.')) + ch('.') + newlineOrEol)
assert testSentenceParser.flatMap(testSentence1).tokensToString() == testSentenceStr