Combine Cursor with State

This commit is contained in:
Brandon Dyck 2024-09-27 09:29:27 -06:00
parent 5c779c4a33
commit 82ade62274
8 changed files with 68 additions and 187 deletions

View File

@ -1,6 +1,5 @@
Fix State test failures
Write Repeat tests
Clean up cursor tests
Combine Cursor with State
Think about not requiring so much Pos() when making messages
Rename Seq2 to Seq
Document Seq

View File

@ -11,19 +11,18 @@ import (
"unicode/utf8"
"git.codemonkeysoftware.net/b/gigaparsec"
"git.codemonkeysoftware.net/b/gigaparsec/cursor"
)
// RuneReader is an io.RuneReader backed by a Cursor, for compatibility
// with the regexp package.
type RuneReader struct {
cursor cursor.Cursor[byte]
state gigaparsec.State[byte]
start uint64
err error
}
func NewRuneReader(c cursor.Cursor[byte]) *RuneReader {
return &RuneReader{cursor: c, start: c.Pos()}
func NewRuneReader(state gigaparsec.State[byte]) *RuneReader {
return &RuneReader{state: state, start: state.Pos()}
}
func (rr *RuneReader) ReadRune() (r rune, size int, err error) {
@ -32,9 +31,9 @@ func (rr *RuneReader) ReadRune() (r rune, size int, err error) {
}()
var b [4]byte
s := b[:]
n, next, err := rr.cursor.Read(s)
n, next, err := rr.state.Read(s)
if err != nil && !errors.Is(err, io.EOF) {
rr.cursor = next
rr.state = next
return 0, 0, fmt.Errorf("ReadRune: %w", err)
}
if n == 0 {
@ -42,12 +41,12 @@ func (rr *RuneReader) ReadRune() (r rune, size int, err error) {
}
s = s[:n]
r, size = utf8.DecodeRune(s)
rr.cursor = rr.cursor.At(rr.cursor.Pos() + uint64(size))
rr.state = rr.state.At(rr.state.Pos() + uint64(size))
return r, size, nil
}
func (rr *RuneReader) Cursor() cursor.Cursor[byte] {
return rr.cursor
func (rr *RuneReader) State() gigaparsec.State[byte] {
return rr.state
}
func (rr *RuneReader) Error() error {
@ -55,7 +54,7 @@ func (rr *RuneReader) Error() error {
}
func (rr *RuneReader) Count() uint64 {
return rr.cursor.Pos() - rr.start
return rr.state.Pos() - rr.start
}
func Regexp(pattern string) gigaparsec.Parser[byte, string] {
@ -63,7 +62,7 @@ func Regexp(pattern string) gigaparsec.Parser[byte, string] {
re := regexp.MustCompile(pattern)
expected := fmt.Sprintf("match `%s`", pattern)
return func(input gigaparsec.State[byte]) (gigaparsec.Result[byte, string], error) {
r := NewRuneReader(input.Cursor())
r := NewRuneReader(input)
idx := re.FindReaderIndex(r)
err := r.Error()
if err != nil && !errors.Is(err, io.EOF) {
@ -71,7 +70,7 @@ func Regexp(pattern string) gigaparsec.Parser[byte, string] {
}
if idx == nil {
got := make([]byte, r.Count())
_, _, err = input.Cursor().Read(got)
_, _, err = input.Read(got)
if err != nil {
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err)
}
@ -80,7 +79,7 @@ func Regexp(pattern string) gigaparsec.Parser[byte, string] {
// Alas, this is a little wasteful because a Regexp can only return indices
// when searching a RuneReader.
dst := make([]byte, idx[1]-idx[0])
n, _, err := input.Cursor().Read(dst)
n, _, err := input.Read(dst)
if err != nil {
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err)
}

View File

@ -9,7 +9,6 @@ import (
"git.codemonkeysoftware.net/b/gigaparsec"
pbytes "git.codemonkeysoftware.net/b/gigaparsec/bytes"
"git.codemonkeysoftware.net/b/gigaparsec/cursor"
ptest "git.codemonkeysoftware.net/b/gigaparsec/test"
pgen "git.codemonkeysoftware.net/b/gigaparsec/test/generator"
"github.com/shoenig/test"
@ -29,7 +28,7 @@ func TestRegexp(t *testing.T) {
}))
t.Run("basically works", func(t *testing.T) {
result, err := pbytes.Regexp("a")(gigaparsec.MakeState(cursor.NewReaderAt(strings.NewReader("a"))))
result, err := pbytes.Regexp("a")(gigaparsec.MakeState(strings.NewReader("a")))
must.NoError(t, err)
success, value, _ := result.Status()
test.True(t, success, test.Sprint(result.Message()))
@ -40,7 +39,7 @@ func TestRegexp(t *testing.T) {
func TestRuneReader(t *testing.T) {
var s = "abcdefghijklmnopqrstuvwxyz"
rr := pbytes.NewRuneReader(cursor.NewReaderAt(strings.NewReader(s)))
rr := pbytes.NewRuneReader(gigaparsec.MakeState(strings.NewReader(s)))
for i, b := range s {
r, n, err := rr.ReadRune()
test.NoError(t, err)
@ -56,7 +55,7 @@ func TestMatchString(t *testing.T) {
t.Run("fails on unexpected error", rapid.MakeCheck(func(t *rapid.T) {
s := rapid.StringN(-1, -1, 100).Draw(t, "s")
readErr := pgen.Error().Draw(t, "readErr")
result, err := pbytes.MatchString(s)(gigaparsec.MakeState(cursor.NewReaderAt(ptest.ErrReaderAt(readErr))))
result, err := pbytes.MatchString(s)(gigaparsec.MakeState(ptest.ErrReaderAt(readErr)))
test.ErrorIs(t, err, readErr)
success, _, _ := result.Status()
test.False(t, success)
@ -68,7 +67,7 @@ func TestMatchString(t *testing.T) {
notPrefix := func(b []byte) bool { return !bytes.HasPrefix(input, b) }
s := string(bgen.Filter(notPrefix).Draw(t, "s"))
result, err := pbytes.MatchString(s)(gigaparsec.MakeState(cursor.NewReaderAt(bytes.NewReader(input))))
result, err := pbytes.MatchString(s)(gigaparsec.MakeState(bytes.NewReader(input)))
test.NoError(t, err)
success, _, _ := result.Status()
test.False(t, success)
@ -78,7 +77,7 @@ func TestMatchString(t *testing.T) {
input := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "input")
slen := rapid.IntRange(0, len(input)).Draw(t, "slen")
s := string(input[:slen])
result, err := pbytes.MatchString(s)(gigaparsec.MakeState(cursor.NewReaderAt(bytes.NewReader(input))))
result, err := pbytes.MatchString(s)(gigaparsec.MakeState(bytes.NewReader(input)))
must.NoError(t, err)
success, value, next := result.Status()
must.True(t, success)

View File

@ -1,80 +0,0 @@
// SPDX-License-Identifier: Unlicense
package cursor
import (
"errors"
"io"
)
// Cursor reads data from a specific spot in a data source.
type Cursor[Datum any] interface {
// I almost parameterized Cursor by its implementation (i.e. the Curiously
// Recurring Template Pattern), but then each parser would need that parameter.
// That might work well in a language with much stronger type inference, but
// not in Go. The upside would have been that for each implementation Impl,
// Impl.Read could have returned an unboxed Impl, which would have slightly
// simplified testing and maybe slightly reduced allocs.
// Read fill dst with data from this Cursor's position in the underlying
// source. It returns the number of data it read and a new Cursor for
// the position at which the read ended, or an error if the read failed.
// All calls to a given Cursor will return data from the same position.
// If n < len(dst) or if the cursor's position is at the end of the data source,
// Read will return an error explaining why it read fewer bytes than requested.
// If the error was due to the cursor reaching the end of the data source,
// err will be io.EOF.
Read(dst []Datum) (n uint64, next Cursor[Datum], err error)
// Pos returns the Cursor's position within the source.
Pos() uint64
// At returns a new cursor at the position pos.
At(pos uint64) Cursor[Datum]
}
type ReaderAt[T any] interface {
ReadAt(p []T, off int64) (n int, err error)
}
type SliceReaderAt[T any] []T
func (s SliceReaderAt[T]) ReadAt(dst []T, off int64) (n int, err error) {
if off < 0 {
return 0, errors.New("SliceReaderAt.ReadAt: negative offset")
}
if off >= int64(len(s)) {
return 0, io.EOF
}
n = copy(dst, s[off:])
if n < len(dst) {
err = io.EOF
}
return n, err
}
type ReaderAtCursor[T any] struct {
r ReaderAt[T]
pos uint64
}
func NewReaderAt[T any](r ReaderAt[T]) ReaderAtCursor[T] {
return ReaderAtCursor[T]{r: r}
}
func (rac ReaderAtCursor[T]) Read(dst []T) (uint64, Cursor[T], error) {
n, err := rac.r.ReadAt(dst, int64(rac.pos))
if n > 0 {
rac.pos += uint64(n)
}
return uint64(n), rac, err
}
func (rac ReaderAtCursor[T]) Pos() uint64 {
return rac.pos
}
func (rac ReaderAtCursor[T]) At(pos uint64) Cursor[T] {
rac.pos = pos
return rac
}

View File

@ -9,8 +9,6 @@ import (
"io"
"slices"
"strings"
"git.codemonkeysoftware.net/b/gigaparsec/cursor"
)
type Result[In, Out any] struct {
@ -114,29 +112,33 @@ func MessageEnd(pos uint64, expected ...string) Message {
return Message{pos: pos, got: "end of input", expected: expected}
}
func MakeState[In any](c cursor.Cursor[In]) State[In] {
return State[In]{cursor: c}
type ReaderAt[T any] interface {
ReadAt(p []T, off int64) (n int, err error)
}
func MakeState[In any](r ReaderAt[In]) State[In] {
return State[In]{r: r}
}
type State[In any] struct {
cursor cursor.Cursor[In]
}
func (s State[In]) Cursor() cursor.Cursor[In] {
return s.cursor
r ReaderAt[In]
pos uint64
}
func (s State[In]) Read(dst []In) (n uint64, next State[In], err error) {
n, c, err := s.cursor.Read(dst)
return n, State[In]{cursor: c}, err
nread, err := s.r.ReadAt(dst, int64(s.pos))
if n > 0 {
s.pos += uint64(n)
}
return uint64(nread), s, err
}
func (s State[In]) Pos() uint64 {
return s.cursor.Pos()
return s.pos
}
func (s State[In]) At(pos uint64) State[In] {
return State[In]{cursor: s.cursor.At(pos)}
return State[In]{r: s.r, pos: pos}
}
type Parser[In, Out any] func(State[In]) (Result[In, Out], error)
@ -163,8 +165,8 @@ func (pe ParseError) Error() string {
return Message(pe).String()
}
func Run[In, Out any](p Parser[In, Out], c cursor.Cursor[In]) (out Out, err error) {
start := MakeState(c)
func Run[In, Out any](p Parser[In, Out], r ReaderAt[In]) (out Out, err error) {
start := MakeState(r)
result, err := p(start)
if err != nil {
err = fmt.Errorf("Run: %w", err)
@ -315,7 +317,7 @@ func Map[In, Out1, Out2 any](p Parser[In, Out1], f func(Out1) Out2) Parser[In, O
}
func end[In any](s State[In]) (Result[In, struct{}], error) {
_, _, err := s.cursor.Read([]In{})
_, _, err := s.Read([]In{})
if errors.Is(err, io.EOF) {
return Succeed(true, struct{}{}, s, MessageOK(s.Pos())), nil
}

View File

@ -7,7 +7,6 @@ import (
"testing"
"git.codemonkeysoftware.net/b/gigaparsec"
"git.codemonkeysoftware.net/b/gigaparsec/cursor"
ptest "git.codemonkeysoftware.net/b/gigaparsec/test"
"git.codemonkeysoftware.net/b/gigaparsec/test/generator"
"github.com/shoenig/test"
@ -30,7 +29,7 @@ func hasPrefix(prefix []byte) func([]byte) bool {
func TestSlice(t *testing.T) {
assertParseFails := func(t rapid.TB, input []byte, p gigaparsec.Parser[byte, []byte]) {
t.Helper()
start := gigaparsec.MakeState(cursor.NewReaderAt(bytes.NewReader(input)))
start := gigaparsec.MakeState(bytes.NewReader(input))
result, err := p(start)
must.NoError(t, err)
success, _, _ := result.Status()
@ -55,9 +54,9 @@ func TestSlice(t *testing.T) {
}))
t.Run("fails when read fails", rapid.MakeCheck(func(t *rapid.T) {
expectedErr := generator.Error().Draw(t, "expectedErr")
c := ptest.ErrCursor[byte](expectedErr)
r := ptest.ErrReaderAt(expectedErr)
s := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "s")
result, err := gigaparsec.MatchSlice(s)(gigaparsec.MakeState(c))
result, err := gigaparsec.MatchSlice(s)(gigaparsec.MakeState(r))
test.ErrorIs(t, err, expectedErr)
success, _, _ := result.Status()
test.False(t, success)
@ -66,7 +65,7 @@ func TestSlice(t *testing.T) {
input := rapid.SliceOfN(rapid.Byte(), 1, -1).Draw(t, "input")
sLen := rapid.IntRange(0, len(input)).Draw(t, "sLen")
s := input[:sLen]
start := gigaparsec.MakeState(cursor.NewReaderAt(bytes.NewReader(input)))
start := gigaparsec.MakeState(bytes.NewReader(input))
result, err := gigaparsec.MatchSlice(s)(start)
must.NoError(t, err)
@ -108,7 +107,7 @@ func TestBind(t *testing.T) {
p := makeParser(pConsume)
q := func(struct{}) gigaparsec.Parser[byte, struct{}] { return makeParser(qConsume) }
result, err := gigaparsec.Bind(p, q)(gigaparsec.MakeState(cursor.NewReaderAt(bytes.NewReader(nil))))
result, err := gigaparsec.Bind(p, q)(gigaparsec.MakeState(bytes.NewReader(nil)))
must.NoError(t, err)
must.EqOp(t, pConsume || qConsume, result.Consumed())
}))

View File

@ -1,13 +1,11 @@
// SPDX-License-Identifier: Unlicense
package cursor_test
package gigaparsec_test
import (
"bytes"
"io"
"testing"
"git.codemonkeysoftware.net/b/gigaparsec/cursor"
"git.codemonkeysoftware.net/b/gigaparsec"
ptest "git.codemonkeysoftware.net/b/gigaparsec/test"
pgen "git.codemonkeysoftware.net/b/gigaparsec/test/generator"
"github.com/shoenig/test"
@ -15,69 +13,64 @@ import (
"pgregory.net/rapid"
)
func Todo(t *testing.T) {
t.Errorf("TODO")
}
func testCursor[C cursor.Cursor[byte]](t *testing.T, makeCursor func([]byte) C) {
t.Helper()
t.Run("cursor reads the same position every time", rapid.MakeCheck(func(t *rapid.T) {
func TestState(t *testing.T) {
t.Run("state reads the same position every time", rapid.MakeCheck(func(t *rapid.T) {
data := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "data")
dst := pgen.SliceOfNZero[byte](0, len(data)-1).Draw(t, "dst")
expected := data[:len(dst)]
c := makeCursor(data)
st := gigaparsec.MakeState(bytes.NewReader(data))
_, next, err := c.Read(dst)
_, next, err := st.Read(dst)
must.NoError(t, err)
must.SliceEqOp(t, expected, dst)
next.Read(dst)
_, _, err = c.Read(dst)
_, _, err = st.Read(dst)
must.NoError(t, err)
must.SliceEqOp(t, expected, dst)
}))
t.Run("Read returns io.EOF iff it overruns source", rapid.MakeCheck(func(t *rapid.T) {
data := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "data")
dst := pgen.SliceOfNZero[byte](0, 200).Draw(t, "dst")
c := makeCursor(data)
st := gigaparsec.MakeState(bytes.NewReader(data))
n, _, err := c.Read(dst)
n, _, err := st.Read(dst)
t.Logf("n=%d", n)
must.EqOp(t, min(len(data), len(dst)), int(n))
if len(dst) > len(data) || c.Pos() == uint64(len(data)) {
if len(dst) > len(data) || st.Pos() == uint64(len(data)) {
must.ErrorIs(t, err, io.EOF)
} else {
must.NoError(t, err)
}
}))
t.Run("next cursor reads next input", rapid.MakeCheck(func(t *rapid.T) {
t.Run("next state reads next input", rapid.MakeCheck(func(t *rapid.T) {
const maxLen = 100
data := rapid.SliceOfN(rapid.Byte(), 1, maxLen).Draw(t, "data")
skip := rapid.IntRange(0, len(data)-1).Draw(t, "skip")
c := makeCursor(data)
st := gigaparsec.MakeState(bytes.NewReader(data))
_, next, err := c.Read(make([]byte, skip))
_, next, err := st.Read(make([]byte, skip))
must.NoError(t, err)
must.EqOp(t, skip, int(next.Pos()))
dst := make([]byte, maxLen)
n, _, _ := next.Read(dst)
must.SliceEqOp(t, data[skip:skip+int(n)], dst[:n])
}))
t.Run("Read returns an error if n is less than requested", rapid.MakeCheck(func(t *rapid.T) {
t.Run("Read returns io.EOF if n is less than requested", rapid.MakeCheck(func(t *rapid.T) {
data := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "data")
c := makeCursor(data)
st := gigaparsec.MakeState(bytes.NewReader(data))
n, _, err := c.Read(make([]byte, len(data)+1))
n, _, err := st.Read(make([]byte, len(data)+1))
test.ErrorIs(t, err, io.EOF)
test.EqOp(t, len(data), int(n))
}))
t.Run("At sets cursor position", rapid.MakeCheck(func(t *rapid.T) {
t.Run("At sets state position", rapid.MakeCheck(func(t *rapid.T) {
data := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "data")
pos := rapid.Uint64Range(0, uint64(len(data)-1)).Draw(t, "pos")
c := makeCursor(data).At(pos)
st := gigaparsec.MakeState(bytes.NewReader(data)).At(pos)
dst := make([]byte, 1)
n, _, err := c.Read(dst)
n, _, err := st.Read(dst)
test.EqOp(t, 1, n)
test.NoError(t, err)
test.EqOp(t, data[pos], dst[0])
@ -85,31 +78,25 @@ func testCursor[C cursor.Cursor[byte]](t *testing.T, makeCursor func([]byte) C)
t.Run("Pos returns correct position after At", rapid.MakeCheck(func(t *rapid.T) {
var data []byte
pos := rapid.Uint64().Draw(t, "pos")
c := makeCursor(data).At(pos)
test.EqOp(t, pos, c.Pos())
st := gigaparsec.MakeState(bytes.NewReader(data)).At(pos)
test.EqOp(t, pos, st.Pos())
}))
t.Run("Pos returns correct position after Read", rapid.MakeCheck(func(t *rapid.T) {
const maxLen = 100
data := rapid.SliceOfN(rapid.Byte(), 1, maxLen).Draw(t, "data")
skip := rapid.Uint64Range(0, uint64(len(data)-1)).Draw(t, "skip")
c := makeCursor(data)
st := gigaparsec.MakeState(bytes.NewReader(data))
_, next, err := c.Read(make([]byte, skip))
_, next, err := st.Read(make([]byte, skip))
must.NoError(t, err)
test.EqOp(t, skip, next.Pos())
}))
}
func TestReaderAtCursor(t *testing.T) {
testCursor(t, func(b []byte) cursor.ReaderAtCursor[byte] {
return cursor.NewReaderAt(bytes.NewReader(b))
})
t.Run("Read returns an error if the ReaderAt fails", rapid.MakeCheck(func(t *rapid.T) {
expectedErr := pgen.Error().Draw(t, "expectedErr")
startPos := rapid.Uint64().Draw(t, "startPos")
dst := pgen.SliceOfNZero[byte](0, 100).Draw(t, "dst")
c := cursor.NewReaderAt(ptest.ErrReaderAt(expectedErr)).At(startPos)
n, next, err := c.Read(dst)
st := gigaparsec.MakeState(ptest.ErrReaderAt(expectedErr)).At(startPos)
n, next, err := st.Read(dst)
test.ErrorIs(t, err, expectedErr)
test.EqOp(t, startPos, next.Pos())
test.Zero(t, n)

View File

@ -7,7 +7,6 @@ import (
"io"
"git.codemonkeysoftware.net/b/gigaparsec"
"git.codemonkeysoftware.net/b/gigaparsec/cursor"
"github.com/shoenig/test"
)
@ -24,29 +23,6 @@ func ErrReaderAt(err error) io.ReaderAt {
return errReaderAt{err: err}
}
type errCursor[T any] struct {
err error
pos uint64
}
func (c errCursor[T]) Read([]T) (uint64, cursor.Cursor[T], error) {
return 0, c, c.err
}
func (c errCursor[T]) At(pos uint64) cursor.Cursor[T] {
c.pos = pos
return c
}
func (c errCursor[T]) Pos() uint64 {
return c.pos
}
// ErrCursor return a [cursor.Cursor] with a Read method that always returns err.
func ErrCursor[T any](err error) cursor.Cursor[T] {
return errCursor[T]{err: err}
}
func StateIsAt[Input any](t test.T, s gigaparsec.State[Input], pos uint64) {
test.EqOp(t, pos, s.Pos(), test.Sprintf("expected parser state to be at position %d, got %d", pos, s.Pos()))
}