From 1f746ae6de2043396bbbc763ced69b4ed1c429b6 Mon Sep 17 00:00:00 2001 From: Brandon Dyck Date: Wed, 11 Sep 2024 14:43:33 -0600 Subject: [PATCH] Return a useful Got when Regexp fails --- bytes/regexp.go | 17 ++++++++++++----- bytes/regexp_test.go | 1 + 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/bytes/regexp.go b/bytes/regexp.go index 99ad5cc..c371aa0 100644 --- a/bytes/regexp.go +++ b/bytes/regexp.go @@ -16,11 +16,12 @@ import ( // with the regexp package. type RuneReader struct { cursor cursor.Cursor[byte] + start uint64 err error } func NewRuneReader(c cursor.Cursor[byte]) *RuneReader { - return &RuneReader{cursor: c} + return &RuneReader{cursor: c, start: c.Pos()} } func (rr *RuneReader) ReadRune() (r rune, size int, err error) { @@ -48,6 +49,10 @@ func (rr *RuneReader) Error() error { return rr.err } +func (rr *RuneReader) Count() uint64 { + return rr.cursor.Pos() - rr.start +} + func Regexp(str string) gigaparsec.Parser[byte, string] { if !strings.HasPrefix(str, "^") && !strings.HasPrefix(str, `\A`) { str = "^" + str @@ -62,12 +67,15 @@ func Regexp(str string) gigaparsec.Parser[byte, string] { return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: reader error: %w", err) } if idx == nil { + got := make([]byte, r.Count()) + _, _, err = input.Cursor().Read(got) + if err != nil { + return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err) + } return gigaparsec.Fail[byte, string](false, gigaparsec.Message{ Pos: input.Pos(), + Got: string(got), Expected: []string{expected}, - // TODO Not having a Got is unsatisfactory, but how do I extract useful information? - // Maybe just read a fixed number of bytes or to the end, whichever comes first? - // I could add extra methods to cursor.RuneReader to figure out how much it had read. }), nil } // Alas, this is a little wasteful because a Regexp can only return indices @@ -75,7 +83,6 @@ func Regexp(str string) gigaparsec.Parser[byte, string] { dst := make([]byte, idx[1]-idx[0]) n, _, err := input.Cursor().Read(dst) if err != nil { - // If we can't access those same bytes again, something is wrong. return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err) } next := input.At(input.Pos() + n) diff --git a/bytes/regexp_test.go b/bytes/regexp_test.go index a694586..377a31d 100644 --- a/bytes/regexp_test.go +++ b/bytes/regexp_test.go @@ -10,6 +10,7 @@ func TestRegexp(t *testing.T) { t.Run("only searches the beginning of input", Todo) t.Run("position is correct after match", Todo) t.Run("fails on unexpected error", Todo) + t.Run("returns a useful Got value", Todo) } func TestRuneReader(t *testing.T) {