Separate parsing status from actual errors

This commit is contained in:
Brandon Dyck 2024-09-11 10:25:45 -06:00
parent c1eae9fa34
commit d0460e71e3
3 changed files with 140 additions and 109 deletions

View File

@ -46,18 +46,18 @@ func Regexp(str string) gigaparsec.Parser[byte, []byte] {
} }
re := regexp.MustCompile(str) re := regexp.MustCompile(str)
expected := fmt.Sprintf("match `%s`", str) expected := fmt.Sprintf("match `%s`", str)
return func(input gigaparsec.State[byte]) (consumed bool, result gigaparsec.Result[byte, []byte], err error) { return func(input gigaparsec.State[byte]) (gigaparsec.Result[byte, []byte], error) {
r := NewRuneReader(input.Cursor()) r := NewRuneReader(input.Cursor())
idx := re.FindReaderIndex(r) idx := re.FindReaderIndex(r)
// TODO Check error from r; this requires an Error() method on cursor.RuneReader. // TODO Check error from r; this requires an Error() method on cursor.RuneReader.
if idx == nil { if idx == nil {
return false, gigaparsec.Result[byte, []byte]{}, gigaparsec.ParseError{ return gigaparsec.Fail[byte, []byte](false, gigaparsec.Message{
Pos: input.Pos(), Pos: input.Pos(),
Expected: []string{expected}, Expected: []string{expected},
// TODO Not having a Got is unsatisfactory, but how do I extract useful information? // TODO Not having a Got is unsatisfactory, but how do I extract useful information?
// Maybe just read a fixed number of bytes or to the end, whichever comes first? // Maybe just read a fixed number of bytes or to the end, whichever comes first?
// I could add extra methods to cursor.RuneReader to figure out how much it had read. // I could add extra methods to cursor.RuneReader to figure out how much it had read.
} }), nil
} }
// Alas, this is a little wasteful because a Regexp can only return indices // Alas, this is a little wasteful because a Regexp can only return indices
// when searching a RuneReader. // when searching a RuneReader.
@ -65,13 +65,9 @@ func Regexp(str string) gigaparsec.Parser[byte, []byte] {
n, _, err := input.Cursor().Read(dst) n, _, err := input.Cursor().Read(dst)
if err != nil { if err != nil {
// If we can't access those same bytes again, something is wrong. // If we can't access those same bytes again, something is wrong.
return false, gigaparsec.Result[byte, []byte]{}, fmt.Errorf("Regex: unexpected error: %w", err) return gigaparsec.Result[byte, []byte]{}, fmt.Errorf("Regex: unexpected error: %w", err)
} }
result = gigaparsec.Result[byte, []byte]{ next := input.At(input.Pos() + n)
State: input.At(input.Pos() + n), return gigaparsec.Succeed(true, dst, next, gigaparsec.MessageOK(input.Pos())), nil
Value: dst,
Message: gigaparsec.MessageOK(input.Pos()),
}
return true, result, nil
} }
} }

View File

@ -10,9 +10,57 @@ import (
) )
type Result[In, Out any] struct { type Result[In, Out any] struct {
Value Out consumed, failed bool
State[In] Value Out
Message next State[In]
Message Message
}
func Fail[In, Out any](consumed bool, msg Message) Result[In, Out] {
return Result[In, Out]{
consumed: consumed,
failed: true,
Message: msg,
}
}
func (r Result[In, Out]) Failed() (ok, consumed bool, msg Message) {
ok = r.failed
if ok {
consumed = r.consumed
msg = r.Message
}
return
}
func Succeed[In, Out any](consumed bool, value Out, next State[In], msg Message) Result[In, Out] {
return Result[In, Out]{
failed: false,
Value: value,
consumed: consumed,
next: next,
Message: msg,
}
}
func (r Result[In, Out]) Succeeded() (ok, consumed bool, value Out, next State[In], msg Message) {
ok = !r.failed
if ok {
consumed = r.consumed
value = r.Value
next = r.next
msg = r.Message
}
return
}
func (r Result[In, Out]) Consumed() bool {
return r.consumed
}
func (r Result[In, Out]) Consume(consumed bool) Result[In, Out] {
r.consumed = consumed
return r
} }
type Message struct { type Message struct {
@ -30,11 +78,11 @@ func MessageOK(pos uint64) Message { return Message{Pos: pos} }
func MessageEnd(pos uint64) Message { return Message{Pos: pos, Got: "end of input"} } func MessageEnd(pos uint64) Message { return Message{Pos: pos, Got: "end of input"} }
type ParseError Message // type ParseError Message
func (pe ParseError) Error() string { // func (pe ParseError) Error() string {
return fmt.Sprintf("parse error: %d: %s", pe.Pos, pe.Got) // return fmt.Sprintf("parse error: %d: %s", pe.Pos, pe.Got)
} // }
func MakeState[In any](c cursor.Cursor[In]) State[In] { func MakeState[In any](c cursor.Cursor[In]) State[In] {
return State[In]{cursor: c} return State[In]{cursor: c}
@ -61,125 +109,115 @@ func (s State[In]) At(pos uint64) State[In] {
return State[In]{cursor: s.cursor.At(pos)} return State[In]{cursor: s.cursor.At(pos)}
} }
type Parser[In, Out any] func(State[In]) (consumed bool, reply Result[In, Out], err error) type Parser[In, Out any] func(State[In]) (Result[In, Out], error)
func Return[In, Out any](value Out) Parser[In, Out] { func Return[In, Out any](value Out) Parser[In, Out] {
return func(state State[In]) (bool, Result[In, Out], error) { return func(state State[In]) (Result[In, Out], error) {
return false, Result[In, Out]{ return Succeed(false, value, state, MessageOK(state.Pos())), nil
Value: value,
State: state,
Message: MessageOK(state.Pos()),
}, nil
} }
} }
func Satisfy[T any](pred func(T) bool) Parser[T, T] { func Satisfy[T any](pred func(T) bool) Parser[T, T] {
return func(state State[T]) (bool, Result[T, T], error) { return func(state State[T]) (Result[T, T], error) {
token := make([]T, 1) token := make([]T, 1)
n, next, err := state.Read(token) n, next, err := state.Read(token)
if errors.Is(err, io.EOF) { if errors.Is(err, io.EOF) {
return false, Result[T, T]{}, ParseError(MessageEnd(state.Pos())) return Fail[T, T](false, MessageEnd(state.Pos())), nil
} }
if err != nil { if err != nil {
return false, Result[T, T]{}, err return Result[T, T]{}, err
} }
if n != 1 { if n != 1 {
panic(fmt.Sprintf("expected 1 element from Read, but got %d", n)) panic(fmt.Sprintf("expected 1 element from Read, but got %d", n))
} }
if pred(token[0]) { if pred(token[0]) {
return true, Result[T, T]{ return Succeed(true, token[0], next, MessageOK(state.Pos())), nil
Value: token[0],
State: next,
Message: MessageOK(state.Pos()),
}, nil
} }
return false, Result[T, T]{}, ParseError{ return Fail[T, T](false, Message{
Pos: state.Pos(), Pos: state.Pos(),
Got: fmt.Sprint(token), Got: fmt.Sprint(token),
} }), nil
} }
} }
func Slice[T comparable](s []T) Parser[T, []T] { func Slice[T comparable](s []T) Parser[T, []T] {
expected := fmt.Sprint(s) expected := fmt.Sprint(s)
return func(state State[T]) (consumed bool, reply Result[T, []T], err error) { return func(state State[T]) (reply Result[T, []T], err error) {
token := make([]T, len(s)) token := make([]T, len(s))
_, next, err := state.Read(token) _, next, err := state.Read(token)
if errors.Is(err, io.EOF) { if errors.Is(err, io.EOF) {
return false, Result[T, []T]{}, ParseError(MessageEnd(state.Pos())) return Fail[T, []T](false, MessageEnd(state.Pos())), nil
} }
if err != nil { if err != nil {
return false, Result[T, []T]{}, err return Result[T, []T]{}, err
} }
if !slices.Equal(s, token) { if !slices.Equal(s, token) {
return false, Result[T, []T]{}, ParseError{ return Fail[T, []T](false, Message{
Pos: state.Pos(), Pos: state.Pos(),
Got: fmt.Sprint(token), Got: fmt.Sprint(token),
Expected: []string{expected}, Expected: []string{expected},
} }), nil
} }
return true, Result[T, []T]{ return Succeed(true, token, next, MessageOK(state.Pos())), nil
Value: token,
State: next,
Message: MessageOK(state.Pos()),
}, nil
} }
} }
func Bind[In, A, B any](p Parser[In, A], f func(A) Parser[In, B]) Parser[In, B] { func Bind[In, A, B any](p Parser[In, A], f func(A) Parser[In, B]) Parser[In, B] {
return func(input State[In]) (bool, Result[In, B], error) { return func(input State[In]) (Result[In, B], error) {
consumed, resultA, err := p(input) resultA, err := p(input)
if err != nil { if err != nil {
return false, Result[In, B]{}, err return Result[In, B]{}, err
} }
consumed2, replyB, err := f(resultA.Value)(resultA.State) if ok, consumed, msg := resultA.Failed(); ok {
return consumed || consumed2, replyB, err return Fail[In, B](consumed, msg), nil
}
_, consumedA, valueA, next, _ := resultA.Succeeded()
resultB, err := f(valueA)(next)
if err != nil {
return Result[In, B]{}, err
}
return resultB.Consume(consumedA || resultB.Consumed()), nil
} }
} }
func Choose[In, Out any](p Parser[In, Out], ps ...Parser[In, Out]) Parser[In, Out] { func Choose[In, Out any](p Parser[In, Out], ps ...Parser[In, Out]) Parser[In, Out] {
// TODO Check this against the Parsec paper again, and simplify it.
all := append([]Parser[In, Out]{p}, ps...) all := append([]Parser[In, Out]{p}, ps...)
return func(input State[In]) (bool, Result[In, Out], error) { return func(input State[In]) (Result[In, Out], error) {
expecteds := make([][]string, 0, len(all)) expecteds := make([][]string, 0, len(all))
var value Out var value Out
var got string var got string
var gotGot bool
var failed bool var failed bool
for _, q := range all { for _, q := range all {
consumed, result, err := q(input) result, err := q(input)
if consumed { if err != nil {
return consumed, result, err return Result[In, Out]{}, err
}
if result.Consumed() {
return result, nil
} }
var qMsg Message var qMsg Message
if err != nil { if isFailure, _, msg := result.Failed(); isFailure {
var parseErr ParseError qMsg = msg
if !errors.As(err, &parseErr) { failed = true
// It broke. Give up.
return consumed, result, err
}
failed = failed && true
qMsg = Message(parseErr)
} else { } else {
_, _, qValue, _, msg := result.Succeeded()
if failed { if failed {
value = result.Value value = qValue
failed = false failed = false
} }
qMsg = result.Message qMsg = msg
} }
if !gotGot { if got == "" {
got = qMsg.Got got = qMsg.Got
gotGot = true
} }
} }
msg := Message{Pos: input.Pos(), Got: got, Expected: slices.Concat(expecteds...)} msg := Message{Pos: input.Pos(), Got: got, Expected: slices.Concat(expecteds...)}
if failed { if failed {
return false, Result[In, Out]{}, ParseError(msg) return Fail[In, Out](false, msg), nil
} }
return false, Result[In, Out]{ return Succeed(false, value, input, msg), nil
Value: value,
State: input,
Message: msg,
}, nil
} }
} }
@ -189,31 +227,29 @@ func Choose[In, Out any](p Parser[In, Out], ps ...Parser[In, Out]) Parser[In, Ou
// parser consumed nothing, Try will allow backing out of a complex // parser consumed nothing, Try will allow backing out of a complex
// parser that did partially succeeded. // parser that did partially succeeded.
func Try[In, Out any](p Parser[In, Out]) Parser[In, Out] { func Try[In, Out any](p Parser[In, Out]) Parser[In, Out] {
return func(input State[In]) (bool, Result[In, Out], error) { return func(input State[In]) (Result[In, Out], error) {
consumed, reply, err := p(input) result, err := p(input)
if err != nil { if err != nil {
return false, Result[In, Out]{}, err return result, err
} }
return consumed, reply, err if failed, _, msg := result.Failed(); failed {
return Fail[In, Out](false, msg), nil
}
return result, nil
} }
} }
func Label[In, Out any](p Parser[In, Out], l string) Parser[In, Out] { func Label[In, Out any](p Parser[In, Out], l string) Parser[In, Out] {
return func(input State[In]) (consumed bool, reply Result[In, Out], err error) { return func(input State[In]) (reply Result[In, Out], err error) {
consumed, reply, err = p(input) result, err := p(input)
if consumed { if err != nil || result.Consumed() {
return return result, err
} }
if err == nil { if succeeded, _, value, next, msg := result.Succeeded(); succeeded {
reply.Message = reply.Message.expect(l) return Succeed(false, value, next, msg.expect(l)), nil
return
} }
var parseErr ParseError _, _, msg := result.Failed()
if errors.As(err, &parseErr) { return Fail[In, Out](false, msg.expect(l)), nil
err = ParseError(Message(parseErr).expect(l))
return
}
return
} }
} }
@ -223,21 +259,17 @@ func Map[In, Out1, Out2 any](p Parser[In, Out1], f func(Out1) Out2) Parser[In, O
}) })
} }
func End[In any](s State[In]) (consumed bool, reply Result[In, struct{}], err error) { func End[In any](s State[In]) (reply Result[In, struct{}], err error) {
_, _, err = s.cursor.Read([]In{}) _, _, err = s.cursor.Read([]In{})
if errors.Is(err, io.EOF) { if errors.Is(err, io.EOF) {
reply := Result[In, struct{}]{ return Succeed(true, struct{}{}, s, MessageOK(s.Pos())), nil
State: s,
Message: MessageOK(s.Pos()),
}
return true, reply, nil
} }
if err != nil { if err != nil {
return false, Result[In, struct{}]{}, fmt.Errorf("End: unexpected error: %w", err) return Result[In, struct{}]{}, fmt.Errorf("End: unexpected error: %w", err)
} }
return false, Result[In, struct{}]{}, ParseError{ return Fail[In, struct{}](false, Message{
Pos: s.Pos(), Pos: s.Pos(),
Got: "", Got: "",
Expected: []string{"end of input"}, Expected: []string{"end of input"},
} }), nil
} }

View File

@ -7,6 +7,7 @@ import (
"git.codemonkeysoftware.net/b/gigaparsec" "git.codemonkeysoftware.net/b/gigaparsec"
"git.codemonkeysoftware.net/b/gigaparsec/cursor" "git.codemonkeysoftware.net/b/gigaparsec/cursor"
"github.com/shoenig/test" "github.com/shoenig/test"
"github.com/shoenig/test/must"
"pgregory.net/rapid" "pgregory.net/rapid"
) )
@ -23,17 +24,17 @@ func hasPrefix(prefix []byte) func([]byte) bool {
} }
func TestSlice(t *testing.T) { func TestSlice(t *testing.T) {
assertParseFails := func(t rapid.TB, input []byte, p gigaparsec.Parser[byte, []byte]) (parseErr gigaparsec.ParseError) { assertParseFails := func(t rapid.TB, input []byte, p gigaparsec.Parser[byte, []byte]) {
t.Helper() t.Helper()
start := gigaparsec.MakeState(cursor.NewSlice(input)) start := gigaparsec.MakeState(cursor.NewSlice(input))
consumed, result, err := p(start) result, err := p(start)
test.ErrorAs(t, err, &parseErr, test.Sprint("expected ParseError")) must.NoError(t, err)
test.False(t, consumed, test.Sprint("expected consumed to be false")) failed, consumed, _ := result.Failed()
test.SliceEmpty(t, result.Value, test.Sprint("expected result value to be empty")) test.True(t, failed)
test.False(t, consumed)
if t.Failed() { if t.Failed() {
t.FailNow() t.FailNow()
} }
return parseErr
} }
t.Run("fails with wrong contents", rapid.MakeCheck(func(t *rapid.T) { t.Run("fails with wrong contents", rapid.MakeCheck(func(t *rapid.T) {
@ -55,11 +56,13 @@ func TestSlice(t *testing.T) {
s := input[:sLen] s := input[:sLen]
start := gigaparsec.MakeState(cursor.NewSlice(input)) start := gigaparsec.MakeState(cursor.NewSlice(input))
consumed, result, err := gigaparsec.Slice(s)(start) result, err := gigaparsec.Slice(s)(start)
test.NoError(t, err) must.NoError(t, err)
test.True(t, consumed, test.Sprint("expected consumed to be true")) succeeded, consumed, value, next, _ := result.Succeeded()
test.SliceEqOp(t, s, result.Value) test.True(t, succeeded)
test.EqOp(t, uint64(len(s)), result.State.Pos()) test.True(t, consumed)
test.SliceEqOp(t, s, value)
test.EqOp(t, uint64(len(s)), next.Pos())
})) }))
} }