gigaparsec/gigaparsec.go

325 lines
8.3 KiB
Go
Raw Normal View History

package gigaparsec
2024-09-14 02:48:33 +00:00
//go:generate go run ./internal/bindgen -bindpath bind.go -seqpath seq.go -max 5 -pkg gigaparsec
import (
"errors"
"fmt"
"io"
2024-09-03 03:14:46 +00:00
"slices"
2024-09-18 02:10:19 +00:00
"strings"
"git.codemonkeysoftware.net/b/gigaparsec/cursor"
)
type Result[In, Out any] struct {
2024-09-13 16:44:30 +00:00
consumed, succeeded bool
value Out
next State[In]
message Message
}
func Fail[In, Out any](consumed bool, msg Message) Result[In, Out] {
return Result[In, Out]{
2024-09-13 16:44:30 +00:00
consumed: consumed,
succeeded: false,
message: msg,
}
}
2024-09-13 16:44:30 +00:00
func (r Result[In, Out]) Failed() (failed, consumed bool, msg Message) {
failed = !r.succeeded
if failed {
consumed = r.consumed
2024-09-11 17:09:16 +00:00
msg = r.message
}
return
}
func Succeed[In, Out any](consumed bool, value Out, next State[In], msg Message) Result[In, Out] {
return Result[In, Out]{
2024-09-13 16:44:30 +00:00
succeeded: true,
value: value,
consumed: consumed,
next: next,
message: msg,
}
}
2024-09-13 16:44:30 +00:00
func (r Result[In, Out]) Succeeded() (succeeded, consumed bool, value Out, next State[In], msg Message) {
succeeded = r.succeeded
if succeeded {
consumed = r.consumed
2024-09-11 17:09:16 +00:00
value = r.value
next = r.next
2024-09-11 17:09:16 +00:00
msg = r.message
}
return
}
func (r Result[In, Out]) Consumed() bool {
return r.consumed
}
func (r Result[In, Out]) Consume(consumed bool) Result[In, Out] {
r.consumed = consumed
return r
}
2024-09-11 21:27:10 +00:00
func MakeMessage(pos uint64, got string, expected ...string) Message {
return Message{
pos: pos,
got: got,
expected: expected,
}
}
type Message struct {
2024-09-11 21:27:10 +00:00
pos uint64
got string
expected []string
}
2024-09-18 02:10:19 +00:00
// TODO rename
2024-09-11 21:27:10 +00:00
func (m Message) PosMethod() uint64 {
return m.pos
}
2024-09-18 02:10:19 +00:00
// TODO rename
2024-09-11 21:27:10 +00:00
func (m Message) GotMethod() string {
return m.got
}
2024-09-18 02:10:19 +00:00
// TODO rename
2024-09-11 21:27:10 +00:00
func (m Message) ExpectedMethod() []string {
return m.expected
}
2024-09-10 21:56:09 +00:00
func (m Message) expect(s string) Message {
2024-09-11 21:27:10 +00:00
m.expected = []string{s}
2024-09-10 21:56:09 +00:00
return m
}
2024-09-18 02:10:19 +00:00
func (m Message) String() string {
s := fmt.Sprintf("bad parse at %d", m.pos)
if m.got != "" || len(m.expected) > 0 {
s += ":"
if m.got != "" {
s += fmt.Sprintf(" got %v", m.got)
if len(m.expected) > 0 {
s += ","
}
}
if len(m.expected) > 0 {
s += fmt.Sprintf(" expected %v", strings.Join(m.expected, " or "))
}
}
return s
}
2024-09-11 21:27:10 +00:00
func MessageOK(pos uint64) Message { return Message{pos: pos} }
2024-09-18 03:37:07 +00:00
func MessageEnd(pos uint64, expected ...string) Message {
return Message{pos: pos, got: "end of input", expected: expected}
}
func MakeState[In any](c cursor.Cursor[In]) State[In] {
return State[In]{cursor: c}
2024-09-08 18:09:20 +00:00
}
type State[In any] struct {
cursor cursor.Cursor[In]
}
func (s State[In]) Cursor() cursor.Cursor[In] {
2024-09-08 18:09:20 +00:00
return s.cursor
}
func (s State[In]) Read(dst []In) (n uint64, next State[In], err error) {
2024-09-08 18:09:20 +00:00
n, c, err := s.cursor.Read(dst)
return n, State[In]{cursor: c}, err
2024-09-08 18:09:20 +00:00
}
func (s State[In]) Pos() uint64 {
2024-09-08 18:09:20 +00:00
return s.cursor.Pos()
}
2024-09-10 22:46:31 +00:00
func (s State[In]) At(pos uint64) State[In] {
return State[In]{cursor: s.cursor.At(pos)}
}
type Parser[In, Out any] func(State[In]) (Result[In, Out], error)
// Label creates a parser identical to p, except that a failed result will
// include label as an expected parse.
2024-09-13 17:35:13 +00:00
func (p Parser[In, Out]) Label(label string) Parser[In, Out] {
return func(input State[In]) (Result[In, Out], error) {
result, err := p(input)
if err != nil || result.Consumed() {
return result, err
}
if succeeded, _, value, next, msg := result.Succeeded(); succeeded {
return Succeed(false, value, next, msg.expect(label)), nil
}
_, _, msg := result.Failed()
return Fail[In, Out](false, msg.expect(label)), nil
}
}
// Return creates a parser that always succeeds and returns value without consuming any input.
func Return[In, Out any](value Out) Parser[In, Out] {
return func(state State[In]) (Result[In, Out], error) {
return Succeed(false, value, state, MessageOK(state.Pos())), nil
}
}
// Satisfy creates a parser that attempts to read an input value for which pred returns true.
// If Satisfy succeeds, it returns the matched input value.
func Satisfy[T any](pred func(T) bool) Parser[T, T] {
return func(state State[T]) (Result[T, T], error) {
token := make([]T, 1)
n, next, err := state.Read(token)
if errors.Is(err, io.EOF) {
return Fail[T, T](false, MessageEnd(state.Pos())), nil
}
2024-09-08 18:09:20 +00:00
if err != nil {
return Result[T, T]{}, err
2024-09-08 18:09:20 +00:00
}
if n != 1 {
panic(fmt.Sprintf("expected 1 element from Read, but got %d", n))
}
if pred(token[0]) {
return Succeed(true, token[0], next, MessageOK(state.Pos())), nil
}
2024-09-11 21:27:10 +00:00
return Fail[T, T](false, MakeMessage(state.Pos(), fmt.Sprint(token))), nil
}
}
2024-09-18 02:10:19 +00:00
// Match creates a parser that attempts to read an input value equal to x.
// If Match succeeds, it returns the matched input value.
func Match[T comparable](x T) Parser[T, T] {
expected := fmt.Sprint(x)
return func(state State[T]) (Result[T, T], error) {
token := make([]T, 1)
_, next, err := state.Read(token)
if errors.Is(err, io.EOF) {
return Fail[T, T](false, MessageEnd(state.Pos())), nil
}
if err != nil {
return Result[T, T]{}, err
}
if token[0] == x {
return Succeed(true, token[0], next, MessageOK(state.Pos())), nil
}
return Fail[T, T](false, MakeMessage(state.Pos(), fmt.Sprint(token), expected)), nil
}
}
// MatchSlice creates a parser that attempts to read the contents of s from the input.
// If MatchSlice succeeds, it returns a copy of the matched input values.
func MatchSlice[T comparable](s []T) Parser[T, []T] {
2024-09-08 18:09:20 +00:00
expected := fmt.Sprint(s)
2024-09-11 17:44:40 +00:00
return func(state State[T]) (Result[T, []T], error) {
2024-09-08 18:09:20 +00:00
token := make([]T, len(s))
_, next, err := state.Read(token)
if errors.Is(err, io.EOF) {
return Fail[T, []T](false, MessageEnd(state.Pos())), nil
2024-09-08 18:09:20 +00:00
}
if err != nil {
return Result[T, []T]{}, err
2024-09-08 18:09:20 +00:00
}
if !slices.Equal(s, token) {
2024-09-11 21:27:10 +00:00
return Fail[T, []T](false, MakeMessage(state.Pos(), fmt.Sprint(token), expected)), nil
2024-09-08 18:09:20 +00:00
}
return Succeed(true, token, next, MessageOK(state.Pos())), nil
2024-09-08 18:09:20 +00:00
}
}
2024-09-03 03:20:13 +00:00
func Choose[In, Out any](p Parser[In, Out], ps ...Parser[In, Out]) Parser[In, Out] {
// TODO Check this against the Parsec paper again, and simplify it.
2024-09-03 03:14:46 +00:00
all := append([]Parser[In, Out]{p}, ps...)
return func(input State[In]) (Result[In, Out], error) {
2024-09-03 03:14:46 +00:00
expecteds := make([][]string, 0, len(all))
var value Out
var got string
var failed bool
for _, q := range all {
result, err := q(input)
if err != nil {
return Result[In, Out]{}, err
}
if result.Consumed() {
return result, nil
2024-09-03 03:14:46 +00:00
}
var qMsg Message
if isFailure, _, msg := result.Failed(); isFailure {
qMsg = msg
failed = true
2024-09-03 03:14:46 +00:00
} else {
_, _, qValue, _, msg := result.Succeeded()
2024-09-03 03:14:46 +00:00
if failed {
value = qValue
2024-09-03 03:14:46 +00:00
failed = false
}
qMsg = msg
2024-09-03 03:14:46 +00:00
}
if got == "" {
2024-09-11 21:27:10 +00:00
got = qMsg.got
2024-09-03 03:14:46 +00:00
}
}
2024-09-11 21:27:10 +00:00
msg := MakeMessage(input.Pos(), got, slices.Concat(expecteds...)...)
2024-09-03 03:14:46 +00:00
if failed {
return Fail[In, Out](false, msg), nil
2024-09-03 03:14:46 +00:00
}
return Succeed(false, value, input, msg), nil
2024-09-03 03:14:46 +00:00
}
}
2024-09-02 18:48:48 +00:00
// Try behaves identically to p, except that if p returns an error,
// Try will pretend that no input was consumed. This allows infinite
// lookahead: Since Choose only calls another parser when the previous
// parser consumed nothing, Try will allow backing out of a complex
2024-09-11 17:43:19 +00:00
// parser that partially succeeded.
func Try[In, Out any](p Parser[In, Out]) Parser[In, Out] {
return func(input State[In]) (Result[In, Out], error) {
result, err := p(input)
if err != nil {
return result, err
}
if failed, _, msg := result.Failed(); failed {
return Fail[In, Out](false, msg), nil
}
return result, nil
}
}
2024-09-09 16:33:46 +00:00
// Map creates a parser that converts the output of p from Out1 to Out2.
2024-09-09 16:33:46 +00:00
func Map[In, Out1, Out2 any](p Parser[In, Out1], f func(Out1) Out2) Parser[In, Out2] {
return Bind(p, func(out Out1) Parser[In, Out2] {
return Return[In](f(out))
})
2024-09-09 16:33:46 +00:00
}
2024-09-11 01:00:44 +00:00
func end[In any](s State[In]) (Result[In, struct{}], error) {
2024-09-11 17:44:40 +00:00
_, _, err := s.cursor.Read([]In{})
2024-09-11 01:00:44 +00:00
if errors.Is(err, io.EOF) {
return Succeed(true, struct{}{}, s, MessageOK(s.Pos())), nil
2024-09-11 01:00:44 +00:00
}
if err != nil {
return Result[In, struct{}]{}, fmt.Errorf("End: unexpected error: %w", err)
2024-09-11 01:00:44 +00:00
}
2024-09-11 21:27:10 +00:00
return Fail[In, struct{}](false, MakeMessage(s.Pos(), "", "end of input")), nil
2024-09-11 01:00:44 +00:00
}
// End creates a parser that succeeds at the end of the input and fails otherwise.
func End[In any]() Parser[In, struct{}] {
return end
}
2024-09-18 02:10:19 +00:00
func Pipe[In, Ignore, Through any](p Parser[In, Ignore]) func(Through) Parser[In, Through] {
return func(t Through) Parser[In, Through] {
return Bind(p, func(Ignore) Parser[In, Through] {
return Return[In](t)
})
}
}