diff --git a/bytes/regexp.go b/bytes/regexp.go index e810345..45a558a 100644 --- a/bytes/regexp.go +++ b/bytes/regexp.go @@ -1,14 +1,45 @@ package bytes import ( + "errors" "fmt" + "io" "regexp" "strings" + "unicode/utf8" "git.codemonkeysoftware.net/b/gigaparsec" "git.codemonkeysoftware.net/b/gigaparsec/cursor" ) +// RuneReader is an io.RuneReader backed by a Cursor, for compatibility +// with the regexp package. +type RuneReader struct { + cursor cursor.Cursor[byte] +} + +func NewRuneReader(c cursor.Cursor[byte]) *RuneReader { + return &RuneReader{cursor: c} +} + +func (rr *RuneReader) ReadRune() (r rune, size int, err error) { + var b [4]byte + s := b[:] + n, next, err := rr.cursor.Read(s) + if err != nil && !errors.Is(err, io.EOF) { + rr.cursor = next + return 0, 0, fmt.Errorf("ReadRune: %w", err) + } + s = s[:n] + r, size = utf8.DecodeRune(s) + rr.cursor = rr.cursor.At(rr.cursor.Pos() + uint64(size)) + return r, size, err +} + +func (rr *RuneReader) Cursor() cursor.Cursor[byte] { + return rr.cursor +} + func Regexp(str string) gigaparsec.Parser[byte, []byte] { if !strings.HasPrefix(str, "^") { str = "^" + str @@ -16,7 +47,7 @@ func Regexp(str string) gigaparsec.Parser[byte, []byte] { re := regexp.MustCompile(str) expected := fmt.Sprintf("match `%s`", str) return func(input gigaparsec.State[byte]) (consumed bool, result gigaparsec.Result[byte, []byte], err error) { - r := cursor.NewRuneReader(input.Cursor()) + r := NewRuneReader(input.Cursor()) idx := re.FindReaderIndex(r) // TODO Check error from r; this requires an Error() method on cursor.RuneReader. if idx == nil { diff --git a/bytes/regexp_test.go b/bytes/regexp_test.go index fe1b656..d0d4583 100644 --- a/bytes/regexp_test.go +++ b/bytes/regexp_test.go @@ -9,3 +9,7 @@ func Todo(t *testing.T) { func TestRegexp(t *testing.T) { Todo(t) } + +func TestRuneReader(t *testing.T) { + Todo(t) +} diff --git a/cursor/helper.go b/cursor/helper.go index 1f0547d..715317c 100644 --- a/cursor/helper.go +++ b/cursor/helper.go @@ -1,10 +1,7 @@ package cursor import ( - "errors" - "fmt" "io" - "unicode/utf8" ) // BufferedReaderAt uses a buffer to supplement an io.Reader @@ -25,31 +22,3 @@ func NewBufferedReaderAt(r io.Reader, minBuffer uint64) *BufferedReaderAt { func (b *BufferedReaderAt) ReadAt(dst []byte, offset int64) (int, error) { return 0, nil } - -// RuneReader is an io.RuneReader backed by a Cursor, for compatibility -// with the regexp package. -type RuneReader struct { - cursor Cursor[byte] -} - -func NewRuneReader(c Cursor[byte]) *RuneReader { - return &RuneReader{cursor: c} -} - -func (rr *RuneReader) ReadRune() (r rune, size int, err error) { - var b [4]byte - s := b[:] - n, next, err := rr.cursor.Read(s) - if err != nil && !errors.Is(err, io.EOF) { - rr.cursor = next - return 0, 0, fmt.Errorf("ReadRune: %w", err) - } - s = s[:n] - r, size = utf8.DecodeRune(s) - rr.cursor = rr.cursor.At(rr.cursor.Pos() + uint64(size)) - return r, size, err -} - -func (rr *RuneReader) Cursor() Cursor[byte] { - return rr.cursor -}