// SPDX-License-Identifier: Unlicense package gigaparsec //go:generate go run ./internal/bindgen -bindpath bind.go -seqpath seq.go -max 5 -pkg gigaparsec import ( "errors" "fmt" "io" "math" "slices" "strings" ) type Result[In, Out any] struct { consumed, success bool value Out next State[In] message Message } func Fail[In, Out any](consumed bool, msg Message) Result[In, Out] { return Result[In, Out]{ consumed: consumed, success: false, message: msg, } } func Succeed[In, Out any](consumed bool, value Out, next State[In], msg Message) Result[In, Out] { return Result[In, Out]{ success: true, value: value, consumed: consumed, next: next, message: msg, } } func (r Result[In, Out]) Status() (success bool, value Out, next State[In]) { success = r.success if success { value = r.value next = r.next } return } func (r Result[In, Out]) Consumed() bool { return r.consumed } func (r Result[In, Out]) Consume(consumed bool) Result[In, Out] { r.consumed = consumed return r } func (r Result[In, Out]) Message() Message { return r.message } func MakeMessage(pos uint64, got string, expected ...string) Message { return Message{ pos: pos, got: got, expected: expected, } } type Message struct { pos uint64 got string expected []string } func (m Message) Pos() uint64 { return m.pos } func (m Message) Got() string { return m.got } func (m Message) Expected() []string { return m.expected } func (m Message) expect(s string) Message { m.expected = []string{s} return m } func (m Message) String() string { s := fmt.Sprintf("bad parse at %d", m.pos) if m.got != "" || len(m.expected) > 0 { s += ":" if m.got != "" { s += fmt.Sprintf(" got %v", m.got) if len(m.expected) > 0 { s += "," } } if len(m.expected) > 0 { s += fmt.Sprintf(" expected %v", strings.Join(m.expected, " or ")) } } return s } func MessageOK(pos uint64) Message { return Message{pos: pos} } func MessageEnd(pos uint64, expected ...string) Message { return Message{pos: pos, got: "end of input", expected: expected} } type ReaderAt[T any] interface { ReadAt(p []T, off int64) (n int, err error) } type SliceReaderAt[T any] []T func (s SliceReaderAt[T]) ReadAt(dst []T, off int64) (n int, err error) { if off < 0 { return 0, errors.New("SliceReaderAt.ReadAt: negative offset") } if off >= int64(len(s)) { return 0, io.EOF } n = copy(dst, s[off:]) if n < len(dst) { err = io.EOF } return n, err } func MakeState[In any](r ReaderAt[In]) State[In] { return State[In]{r: r} } type State[In any] struct { r ReaderAt[In] pos uint64 } // Read fills dst with data from this State's position in the underlying source. // It returns the number of data it read and a new State for the position at which // the read ended, and an error if the read either (1) failed or (2) reached the // end of the source before filling dst. All reads from a given State will return // data from the same position the source. // If the source had too few data left to fill dst, or if the State's position is // at or past the end of the source, err will be io.EOF. func (s State[In]) Read(dst []In) (n uint64, next State[In], err error) { if s.pos > math.MaxInt64 { return 0, s, io.EOF } nread, err := s.r.ReadAt(dst, int64(s.pos)) if nread > 0 { s.pos += uint64(nread) } if nread == len(dst) && err == io.EOF { if nread == 0 { return 0, s, io.EOF } return uint64(nread), s, nil } return uint64(nread), s, err } // Pos returns this State's position. func (s State[In]) Pos() uint64 { return s.pos } // At returns a State pointing at pos in the same data source. func (s State[In]) At(pos uint64) State[In] { return State[In]{r: s.r, pos: pos} } type Parser[In, Out any] func(State[In]) (Result[In, Out], error) // Label creates a parser identical to p, except that a failed result will // include label as an expected parse. func (p Parser[In, Out]) Label(label string) Parser[In, Out] { return func(input State[In]) (Result[In, Out], error) { result, err := p(input) if err != nil || result.Consumed() { return result, err } msg := result.Message() if success, value, next := result.Status(); success { return Succeed(false, value, next, msg.expect(label)), nil } return Fail[In, Out](false, msg.expect(label)), nil } } type ParseError Message func (pe ParseError) Error() string { return Message(pe).String() } func Run[In, Out any](p Parser[In, Out], r ReaderAt[In]) (out Out, err error) { start := MakeState(r) result, err := p(start) if err != nil { err = fmt.Errorf("Run: %w", err) return } success, out, _ := result.Status() if !success { err = ParseError(result.Message()) return } return } // Return creates a parser that always succeeds and returns value without consuming any input. func Return[In, Out any](value Out) Parser[In, Out] { return func(state State[In]) (Result[In, Out], error) { return Succeed(false, value, state, MessageOK(state.Pos())), nil } } // Satisfy creates a parser that attempts to read an input value for which pred returns true. // If Satisfy succeeds, it returns the matched input value. func Satisfy[T any](pred func(T) bool) Parser[T, T] { return func(state State[T]) (Result[T, T], error) { token := make([]T, 1) n, next, err := state.Read(token) if errors.Is(err, io.EOF) { return Fail[T, T](false, MessageEnd(state.Pos())), nil } if err != nil { return Result[T, T]{}, err } if n != 1 { panic(fmt.Sprintf("expected 1 element from Read, but got %d", n)) } if pred(token[0]) { return Succeed(true, token[0], next, MessageOK(state.Pos())), nil } return Fail[T, T](false, MakeMessage(state.Pos(), fmt.Sprint(token))), nil } } // Match creates a parser that attempts to read an input value equal to x. // If Match succeeds, it returns the matched input value. func Match[T comparable](x T) Parser[T, T] { expected := fmt.Sprint(x) return func(state State[T]) (Result[T, T], error) { token := make([]T, 1) _, next, err := state.Read(token) if errors.Is(err, io.EOF) { return Fail[T, T](false, MessageEnd(state.Pos())), nil } if err != nil { return Result[T, T]{}, err } if token[0] == x { return Succeed(true, token[0], next, MessageOK(state.Pos())), nil } return Fail[T, T](false, MakeMessage(state.Pos(), fmt.Sprint(token), expected)), nil } } // MatchSlice creates a parser that attempts to read the contents of s from the input. // If MatchSlice succeeds, it returns a copy of the matched input values. func MatchSlice[T comparable](s []T) Parser[T, []T] { expected := fmt.Sprint(s) return func(state State[T]) (Result[T, []T], error) { token := make([]T, len(s)) _, next, err := state.Read(token) if errors.Is(err, io.EOF) { return Fail[T, []T](false, MessageEnd(state.Pos())), nil } if err != nil { return Result[T, []T]{}, err } if !slices.Equal(s, token) { return Fail[T, []T](false, MakeMessage(state.Pos(), fmt.Sprint(token), expected)), nil } return Succeed(true, token, next, MessageOK(state.Pos())), nil } } func Choose[In, Out any](p Parser[In, Out], ps ...Parser[In, Out]) Parser[In, Out] { // TODO Check this against the Parsec paper again, and simplify it. all := append([]Parser[In, Out]{p}, ps...) return func(input State[In]) (Result[In, Out], error) { expecteds := make([][]string, 0, len(all)) var value Out var got string var failed bool for _, q := range all { result, err := q(input) if err != nil { return Result[In, Out]{}, err } if result.Consumed() { return result, nil } var qMsg Message msg := result.Message() success, qValue, _ := result.Status() if !success { qMsg = msg failed = true } else { if failed { value = qValue failed = false } qMsg = msg } if got == "" { got = qMsg.got } } msg := MakeMessage(input.Pos(), got, slices.Concat(expecteds...)...) if failed { return Fail[In, Out](false, msg), nil } return Succeed(false, value, input, msg), nil } } // Try behaves identically to p, except that if p returns an error, // Try will pretend that no input was consumed. This allows infinite // lookahead: Since Choose only calls another parser when the previous // parser consumed nothing, Try will allow backing out of a complex // parser that partially succeeded. func Try[In, Out any](p Parser[In, Out]) Parser[In, Out] { return func(input State[In]) (Result[In, Out], error) { result, err := p(input) if err != nil { return result, err } success, _, _ := result.Status() if !success { return Fail[In, Out](false, result.Message()), nil } return result, nil } } // Map creates a parser that converts the output of p from Out1 to Out2. func Map[In, Out1, Out2 any](p Parser[In, Out1], f func(Out1) Out2) Parser[In, Out2] { return Bind(p, func(out Out1) Parser[In, Out2] { return Return[In](f(out)) }) } func end[In any](s State[In]) (Result[In, struct{}], error) { _, _, err := s.Read([]In{}) if errors.Is(err, io.EOF) { return Succeed(true, struct{}{}, s, MessageOK(s.Pos())), nil } if err != nil { return Result[In, struct{}]{}, fmt.Errorf("End: unexpected error: %w", err) } return Fail[In, struct{}](false, MakeMessage(s.Pos(), "", "end of input")), nil } // End creates a parser that succeeds at the end of the input and fails otherwise. func End[In any]() Parser[In, struct{}] { return end } func Pipe[In, Ignore, Through any](p Parser[In, Ignore]) func(Through) Parser[In, Through] { return func(t Through) Parser[In, Through] { return Bind(p, func(Ignore) Parser[In, Through] { return Return[In](t) }) } } // Repeat applies p until p fails, and returns the collected outputs. // It succeeds if and only if p succeeds at least minCount times. // It consumes if and only if at least one of the applications of p consumes. func Repeat[In, Out any](minCount int, p Parser[In, Out]) Parser[In, []Out] { return func(s State[In]) (Result[In, []Out], error) { var values []Out var consumed bool next := s for { result, err := p(next) if err != nil { return Result[In, []Out]{}, fmt.Errorf("AtLeastN: %w", err) } consumed = consumed || result.Consumed() var value Out var success bool success, value, next = result.Status() if !success { if len(values) >= minCount { return Succeed(consumed, values, next, MessageOK(s.Pos())), nil } return Fail[In, []Out](consumed, result.Message()), nil } values = append(values, value) } } }