Return a useful Got when Regexp fails

This commit is contained in:
Brandon Dyck 2024-09-11 14:43:33 -06:00
parent ee544cd121
commit 1f746ae6de
2 changed files with 13 additions and 5 deletions

View File

@ -16,11 +16,12 @@ import (
// with the regexp package. // with the regexp package.
type RuneReader struct { type RuneReader struct {
cursor cursor.Cursor[byte] cursor cursor.Cursor[byte]
start uint64
err error err error
} }
func NewRuneReader(c cursor.Cursor[byte]) *RuneReader { func NewRuneReader(c cursor.Cursor[byte]) *RuneReader {
return &RuneReader{cursor: c} return &RuneReader{cursor: c, start: c.Pos()}
} }
func (rr *RuneReader) ReadRune() (r rune, size int, err error) { func (rr *RuneReader) ReadRune() (r rune, size int, err error) {
@ -48,6 +49,10 @@ func (rr *RuneReader) Error() error {
return rr.err return rr.err
} }
func (rr *RuneReader) Count() uint64 {
return rr.cursor.Pos() - rr.start
}
func Regexp(str string) gigaparsec.Parser[byte, string] { func Regexp(str string) gigaparsec.Parser[byte, string] {
if !strings.HasPrefix(str, "^") && !strings.HasPrefix(str, `\A`) { if !strings.HasPrefix(str, "^") && !strings.HasPrefix(str, `\A`) {
str = "^" + str str = "^" + str
@ -62,12 +67,15 @@ func Regexp(str string) gigaparsec.Parser[byte, string] {
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: reader error: %w", err) return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: reader error: %w", err)
} }
if idx == nil { if idx == nil {
got := make([]byte, r.Count())
_, _, err = input.Cursor().Read(got)
if err != nil {
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err)
}
return gigaparsec.Fail[byte, string](false, gigaparsec.Message{ return gigaparsec.Fail[byte, string](false, gigaparsec.Message{
Pos: input.Pos(), Pos: input.Pos(),
Got: string(got),
Expected: []string{expected}, Expected: []string{expected},
// TODO Not having a Got is unsatisfactory, but how do I extract useful information?
// Maybe just read a fixed number of bytes or to the end, whichever comes first?
// I could add extra methods to cursor.RuneReader to figure out how much it had read.
}), nil }), nil
} }
// Alas, this is a little wasteful because a Regexp can only return indices // Alas, this is a little wasteful because a Regexp can only return indices
@ -75,7 +83,6 @@ func Regexp(str string) gigaparsec.Parser[byte, string] {
dst := make([]byte, idx[1]-idx[0]) dst := make([]byte, idx[1]-idx[0])
n, _, err := input.Cursor().Read(dst) n, _, err := input.Cursor().Read(dst)
if err != nil { if err != nil {
// If we can't access those same bytes again, something is wrong.
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err) return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err)
} }
next := input.At(input.Pos() + n) next := input.At(input.Pos() + n)

View File

@ -10,6 +10,7 @@ func TestRegexp(t *testing.T) {
t.Run("only searches the beginning of input", Todo) t.Run("only searches the beginning of input", Todo)
t.Run("position is correct after match", Todo) t.Run("position is correct after match", Todo)
t.Run("fails on unexpected error", Todo) t.Run("fails on unexpected error", Todo)
t.Run("returns a useful Got value", Todo)
} }
func TestRuneReader(t *testing.T) { func TestRuneReader(t *testing.T) {