gigaparsec/gigaparsec.go

405 lines
10 KiB
Go
Raw Normal View History

// SPDX-License-Identifier: Unlicense
package gigaparsec
2024-09-14 02:48:33 +00:00
//go:generate go run ./internal/bindgen -bindpath bind.go -seqpath seq.go -max 5 -pkg gigaparsec
import (
"errors"
"fmt"
"io"
2024-09-30 21:33:24 +00:00
"math"
2024-09-03 03:14:46 +00:00
"slices"
2024-09-18 02:10:19 +00:00
"strings"
)
type Result[In, Out any] struct {
consumed, success bool
value Out
next State[In]
message Message
}
func Fail[In, Out any](consumed bool, msg Message) Result[In, Out] {
return Result[In, Out]{
consumed: consumed,
success: false,
message: msg,
}
}
func Succeed[In, Out any](consumed bool, value Out, next State[In], msg Message) Result[In, Out] {
return Result[In, Out]{
success: true,
value: value,
consumed: consumed,
next: next,
message: msg,
}
}
func (r Result[In, Out]) Status() (success bool, value Out, next State[In]) {
success = r.success
if success {
2024-09-11 17:09:16 +00:00
value = r.value
next = r.next
}
return
}
func (r Result[In, Out]) Consumed() bool {
return r.consumed
}
func (r Result[In, Out]) Consume(consumed bool) Result[In, Out] {
r.consumed = consumed
return r
}
2024-09-24 18:56:10 +00:00
func (r Result[In, Out]) Message() Message {
return r.message
}
2024-09-11 21:27:10 +00:00
func MakeMessage(pos uint64, got string, expected ...string) Message {
return Message{
pos: pos,
got: got,
expected: expected,
}
}
type Message struct {
2024-09-11 21:27:10 +00:00
pos uint64
got string
expected []string
}
2024-09-20 04:44:51 +00:00
func (m Message) Pos() uint64 {
2024-09-11 21:27:10 +00:00
return m.pos
}
2024-09-20 04:44:51 +00:00
func (m Message) Got() string {
2024-09-11 21:27:10 +00:00
return m.got
}
2024-09-20 04:44:51 +00:00
func (m Message) Expected() []string {
2024-09-11 21:27:10 +00:00
return m.expected
}
2024-09-10 21:56:09 +00:00
func (m Message) expect(s string) Message {
2024-09-11 21:27:10 +00:00
m.expected = []string{s}
2024-09-10 21:56:09 +00:00
return m
}
2024-09-18 02:10:19 +00:00
func (m Message) String() string {
s := fmt.Sprintf("bad parse at %d", m.pos)
if m.got != "" || len(m.expected) > 0 {
s += ":"
if m.got != "" {
s += fmt.Sprintf(" got %v", m.got)
if len(m.expected) > 0 {
s += ","
}
}
if len(m.expected) > 0 {
s += fmt.Sprintf(" expected %v", strings.Join(m.expected, " or "))
}
}
return s
}
2024-09-11 21:27:10 +00:00
func MessageOK(pos uint64) Message { return Message{pos: pos} }
2024-09-18 03:37:07 +00:00
func MessageEnd(pos uint64, expected ...string) Message {
return Message{pos: pos, got: "end of input", expected: expected}
}
2024-09-27 15:29:27 +00:00
type ReaderAt[T any] interface {
ReadAt(p []T, off int64) (n int, err error)
2024-09-08 18:09:20 +00:00
}
2024-09-30 19:02:57 +00:00
type SliceReaderAt[T any] []T
func (s SliceReaderAt[T]) ReadAt(dst []T, off int64) (n int, err error) {
if off < 0 {
return 0, errors.New("SliceReaderAt.ReadAt: negative offset")
}
if off >= int64(len(s)) {
return 0, io.EOF
}
n = copy(dst, s[off:])
if n < len(dst) {
err = io.EOF
}
return n, err
}
2024-09-27 15:29:27 +00:00
func MakeState[In any](r ReaderAt[In]) State[In] {
return State[In]{r: r}
}
2024-09-27 15:29:27 +00:00
type State[In any] struct {
r ReaderAt[In]
pos uint64
2024-09-08 18:09:20 +00:00
}
2024-09-30 21:42:26 +00:00
// Read fills dst with data from this State's position in the underlying source.
// It returns the number of data it read and a new State for the position at which
// the read ended, and an error if the read either (1) failed or (2) reached the
// end of the source before filling dst. All reads from a given State will return
// data from the same position the source.
// If the source had too few data left to fill dst, or if the State's position is
// at or past the end of the source, err will be io.EOF.
func (s State[In]) Read(dst []In) (n uint64, next State[In], err error) {
2024-09-30 21:33:24 +00:00
if s.pos > math.MaxInt64 {
return 0, s, io.EOF
}
2024-09-27 15:29:27 +00:00
nread, err := s.r.ReadAt(dst, int64(s.pos))
2024-09-27 15:39:20 +00:00
if nread > 0 {
s.pos += uint64(nread)
2024-09-27 15:29:27 +00:00
}
2024-09-30 21:33:24 +00:00
if nread == len(dst) && err == io.EOF {
if nread == 0 {
return 0, s, io.EOF
}
return uint64(nread), s, nil
}
2024-09-27 15:29:27 +00:00
return uint64(nread), s, err
2024-09-08 18:09:20 +00:00
}
2024-09-30 21:42:26 +00:00
// Pos returns this State's position.
func (s State[In]) Pos() uint64 {
2024-09-27 15:29:27 +00:00
return s.pos
2024-09-08 18:09:20 +00:00
}
2024-09-30 21:42:26 +00:00
// At returns a State pointing at pos in the same data source.
2024-09-10 22:46:31 +00:00
func (s State[In]) At(pos uint64) State[In] {
2024-09-27 15:29:27 +00:00
return State[In]{r: s.r, pos: pos}
2024-09-10 22:46:31 +00:00
}
type Parser[In, Out any] func(State[In]) (Result[In, Out], error)
// Label creates a parser identical to p, except that a failed result will
// include label as an expected parse.
2024-09-13 17:35:13 +00:00
func (p Parser[In, Out]) Label(label string) Parser[In, Out] {
return func(input State[In]) (Result[In, Out], error) {
result, err := p(input)
if err != nil || result.Consumed() {
return result, err
}
2024-09-24 18:56:10 +00:00
msg := result.Message()
if success, value, next := result.Status(); success {
2024-09-13 17:35:13 +00:00
return Succeed(false, value, next, msg.expect(label)), nil
}
return Fail[In, Out](false, msg.expect(label)), nil
}
}
2024-09-18 19:01:44 +00:00
type ParseError Message
func (pe ParseError) Error() string {
return Message(pe).String()
}
2024-09-27 15:29:27 +00:00
func Run[In, Out any](p Parser[In, Out], r ReaderAt[In]) (out Out, err error) {
start := MakeState(r)
2024-09-18 19:01:44 +00:00
result, err := p(start)
if err != nil {
err = fmt.Errorf("Run: %w", err)
return
}
success, out, _ := result.Status()
if !success {
2024-09-24 18:56:10 +00:00
err = ParseError(result.Message())
2024-09-18 19:01:44 +00:00
return
}
return
}
// Return creates a parser that always succeeds and returns value without consuming any input.
func Return[In, Out any](value Out) Parser[In, Out] {
return func(state State[In]) (Result[In, Out], error) {
return Succeed(false, value, state, MessageOK(state.Pos())), nil
}
}
// Satisfy creates a parser that attempts to read an input value for which pred returns true.
// If Satisfy succeeds, it returns the matched input value.
func Satisfy[T any](pred func(T) bool) Parser[T, T] {
return func(state State[T]) (Result[T, T], error) {
token := make([]T, 1)
n, next, err := state.Read(token)
if errors.Is(err, io.EOF) {
return Fail[T, T](false, MessageEnd(state.Pos())), nil
}
2024-09-08 18:09:20 +00:00
if err != nil {
return Result[T, T]{}, err
2024-09-08 18:09:20 +00:00
}
if n != 1 {
panic(fmt.Sprintf("expected 1 element from Read, but got %d", n))
}
if pred(token[0]) {
return Succeed(true, token[0], next, MessageOK(state.Pos())), nil
}
2024-09-11 21:27:10 +00:00
return Fail[T, T](false, MakeMessage(state.Pos(), fmt.Sprint(token))), nil
}
}
2024-09-18 02:10:19 +00:00
// Match creates a parser that attempts to read an input value equal to x.
// If Match succeeds, it returns the matched input value.
func Match[T comparable](x T) Parser[T, T] {
expected := fmt.Sprint(x)
return func(state State[T]) (Result[T, T], error) {
token := make([]T, 1)
_, next, err := state.Read(token)
if errors.Is(err, io.EOF) {
return Fail[T, T](false, MessageEnd(state.Pos())), nil
}
if err != nil {
return Result[T, T]{}, err
}
if token[0] == x {
return Succeed(true, token[0], next, MessageOK(state.Pos())), nil
}
return Fail[T, T](false, MakeMessage(state.Pos(), fmt.Sprint(token), expected)), nil
}
}
// MatchSlice creates a parser that attempts to read the contents of s from the input.
// If MatchSlice succeeds, it returns a copy of the matched input values.
func MatchSlice[T comparable](s []T) Parser[T, []T] {
2024-09-08 18:09:20 +00:00
expected := fmt.Sprint(s)
2024-09-11 17:44:40 +00:00
return func(state State[T]) (Result[T, []T], error) {
2024-09-08 18:09:20 +00:00
token := make([]T, len(s))
_, next, err := state.Read(token)
if errors.Is(err, io.EOF) {
return Fail[T, []T](false, MessageEnd(state.Pos())), nil
2024-09-08 18:09:20 +00:00
}
if err != nil {
return Result[T, []T]{}, err
2024-09-08 18:09:20 +00:00
}
if !slices.Equal(s, token) {
2024-09-11 21:27:10 +00:00
return Fail[T, []T](false, MakeMessage(state.Pos(), fmt.Sprint(token), expected)), nil
2024-09-08 18:09:20 +00:00
}
return Succeed(true, token, next, MessageOK(state.Pos())), nil
2024-09-08 18:09:20 +00:00
}
}
2024-09-03 03:20:13 +00:00
func Choose[In, Out any](p Parser[In, Out], ps ...Parser[In, Out]) Parser[In, Out] {
// TODO Check this against the Parsec paper again, and simplify it.
2024-09-03 03:14:46 +00:00
all := append([]Parser[In, Out]{p}, ps...)
return func(input State[In]) (Result[In, Out], error) {
2024-09-03 03:14:46 +00:00
expecteds := make([][]string, 0, len(all))
var value Out
var got string
var failed bool
for _, q := range all {
result, err := q(input)
if err != nil {
return Result[In, Out]{}, err
}
if result.Consumed() {
return result, nil
2024-09-03 03:14:46 +00:00
}
var qMsg Message
2024-09-24 18:56:10 +00:00
msg := result.Message()
success, qValue, _ := result.Status()
if !success {
qMsg = msg
failed = true
2024-09-03 03:14:46 +00:00
} else {
if failed {
value = qValue
2024-09-03 03:14:46 +00:00
failed = false
}
qMsg = msg
2024-09-03 03:14:46 +00:00
}
if got == "" {
2024-09-11 21:27:10 +00:00
got = qMsg.got
2024-09-03 03:14:46 +00:00
}
}
2024-09-11 21:27:10 +00:00
msg := MakeMessage(input.Pos(), got, slices.Concat(expecteds...)...)
2024-09-03 03:14:46 +00:00
if failed {
return Fail[In, Out](false, msg), nil
2024-09-03 03:14:46 +00:00
}
return Succeed(false, value, input, msg), nil
2024-09-03 03:14:46 +00:00
}
}
2024-09-02 18:48:48 +00:00
// Try behaves identically to p, except that if p returns an error,
// Try will pretend that no input was consumed. This allows infinite
// lookahead: Since Choose only calls another parser when the previous
// parser consumed nothing, Try will allow backing out of a complex
2024-09-11 17:43:19 +00:00
// parser that partially succeeded.
func Try[In, Out any](p Parser[In, Out]) Parser[In, Out] {
return func(input State[In]) (Result[In, Out], error) {
result, err := p(input)
if err != nil {
return result, err
}
success, _, _ := result.Status()
if !success {
2024-09-24 18:56:10 +00:00
return Fail[In, Out](false, result.Message()), nil
}
return result, nil
}
}
2024-09-09 16:33:46 +00:00
// Map creates a parser that converts the output of p from Out1 to Out2.
2024-09-09 16:33:46 +00:00
func Map[In, Out1, Out2 any](p Parser[In, Out1], f func(Out1) Out2) Parser[In, Out2] {
return Bind(p, func(out Out1) Parser[In, Out2] {
return Return[In](f(out))
})
2024-09-09 16:33:46 +00:00
}
2024-09-11 01:00:44 +00:00
func end[In any](s State[In]) (Result[In, struct{}], error) {
2024-09-27 15:29:27 +00:00
_, _, err := s.Read([]In{})
2024-09-11 01:00:44 +00:00
if errors.Is(err, io.EOF) {
return Succeed(true, struct{}{}, s, MessageOK(s.Pos())), nil
2024-09-11 01:00:44 +00:00
}
if err != nil {
return Result[In, struct{}]{}, fmt.Errorf("End: unexpected error: %w", err)
2024-09-11 01:00:44 +00:00
}
2024-09-11 21:27:10 +00:00
return Fail[In, struct{}](false, MakeMessage(s.Pos(), "", "end of input")), nil
2024-09-11 01:00:44 +00:00
}
// End creates a parser that succeeds at the end of the input and fails otherwise.
func End[In any]() Parser[In, struct{}] {
return end
}
2024-09-18 02:10:19 +00:00
func Pipe[In, Ignore, Through any](p Parser[In, Ignore]) func(Through) Parser[In, Through] {
return func(t Through) Parser[In, Through] {
return Bind(p, func(Ignore) Parser[In, Through] {
return Return[In](t)
})
}
}
// Repeat applies p until p fails, and returns the collected outputs.
// It succeeds if and only if p succeeds at least minCount times.
// It consumes if and only if at least one of the applications of p consumes.
func Repeat[In, Out any](minCount int, p Parser[In, Out]) Parser[In, []Out] {
return func(s State[In]) (Result[In, []Out], error) {
var values []Out
var consumed bool
next := s
for {
result, err := p(next)
if err != nil {
return Result[In, []Out]{}, fmt.Errorf("AtLeastN: %w", err)
}
consumed = consumed || result.Consumed()
var value Out
var success bool
success, value, next = result.Status()
if !success {
if len(values) >= minCount {
return Succeed(consumed, values, next, MessageOK(s.Pos())), nil
}
2024-09-24 18:56:10 +00:00
return Fail[In, []Out](consumed, result.Message()), nil
}
values = append(values, value)
}
}
}