Move RuneReader into bytes package

This commit is contained in:
Brandon Dyck 2024-09-10 16:52:08 -06:00
parent 2a4e499be2
commit 0aa8a89014
3 changed files with 36 additions and 32 deletions

View File

@ -1,14 +1,45 @@
package bytes package bytes
import ( import (
"errors"
"fmt" "fmt"
"io"
"regexp" "regexp"
"strings" "strings"
"unicode/utf8"
"git.codemonkeysoftware.net/b/gigaparsec" "git.codemonkeysoftware.net/b/gigaparsec"
"git.codemonkeysoftware.net/b/gigaparsec/cursor" "git.codemonkeysoftware.net/b/gigaparsec/cursor"
) )
// RuneReader is an io.RuneReader backed by a Cursor, for compatibility
// with the regexp package.
type RuneReader struct {
cursor cursor.Cursor[byte]
}
func NewRuneReader(c cursor.Cursor[byte]) *RuneReader {
return &RuneReader{cursor: c}
}
func (rr *RuneReader) ReadRune() (r rune, size int, err error) {
var b [4]byte
s := b[:]
n, next, err := rr.cursor.Read(s)
if err != nil && !errors.Is(err, io.EOF) {
rr.cursor = next
return 0, 0, fmt.Errorf("ReadRune: %w", err)
}
s = s[:n]
r, size = utf8.DecodeRune(s)
rr.cursor = rr.cursor.At(rr.cursor.Pos() + uint64(size))
return r, size, err
}
func (rr *RuneReader) Cursor() cursor.Cursor[byte] {
return rr.cursor
}
func Regexp(str string) gigaparsec.Parser[byte, []byte] { func Regexp(str string) gigaparsec.Parser[byte, []byte] {
if !strings.HasPrefix(str, "^") { if !strings.HasPrefix(str, "^") {
str = "^" + str str = "^" + str
@ -16,7 +47,7 @@ func Regexp(str string) gigaparsec.Parser[byte, []byte] {
re := regexp.MustCompile(str) re := regexp.MustCompile(str)
expected := fmt.Sprintf("match `%s`", str) expected := fmt.Sprintf("match `%s`", str)
return func(input gigaparsec.State[byte]) (consumed bool, result gigaparsec.Result[byte, []byte], err error) { return func(input gigaparsec.State[byte]) (consumed bool, result gigaparsec.Result[byte, []byte], err error) {
r := cursor.NewRuneReader(input.Cursor()) r := NewRuneReader(input.Cursor())
idx := re.FindReaderIndex(r) idx := re.FindReaderIndex(r)
// TODO Check error from r; this requires an Error() method on cursor.RuneReader. // TODO Check error from r; this requires an Error() method on cursor.RuneReader.
if idx == nil { if idx == nil {

View File

@ -9,3 +9,7 @@ func Todo(t *testing.T) {
func TestRegexp(t *testing.T) { func TestRegexp(t *testing.T) {
Todo(t) Todo(t)
} }
func TestRuneReader(t *testing.T) {
Todo(t)
}

View File

@ -1,10 +1,7 @@
package cursor package cursor
import ( import (
"errors"
"fmt"
"io" "io"
"unicode/utf8"
) )
// BufferedReaderAt uses a buffer to supplement an io.Reader // BufferedReaderAt uses a buffer to supplement an io.Reader
@ -25,31 +22,3 @@ func NewBufferedReaderAt(r io.Reader, minBuffer uint64) *BufferedReaderAt {
func (b *BufferedReaderAt) ReadAt(dst []byte, offset int64) (int, error) { func (b *BufferedReaderAt) ReadAt(dst []byte, offset int64) (int, error) {
return 0, nil return 0, nil
} }
// RuneReader is an io.RuneReader backed by a Cursor, for compatibility
// with the regexp package.
type RuneReader struct {
cursor Cursor[byte]
}
func NewRuneReader(c Cursor[byte]) *RuneReader {
return &RuneReader{cursor: c}
}
func (rr *RuneReader) ReadRune() (r rune, size int, err error) {
var b [4]byte
s := b[:]
n, next, err := rr.cursor.Read(s)
if err != nil && !errors.Is(err, io.EOF) {
rr.cursor = next
return 0, 0, fmt.Errorf("ReadRune: %w", err)
}
s = s[:n]
r, size = utf8.DecodeRune(s)
rr.cursor = rr.cursor.At(rr.cursor.Pos() + uint64(size))
return r, size, err
}
func (rr *RuneReader) Cursor() Cursor[byte] {
return rr.cursor
}