diff --git a/TODO.txt b/TODO.txt index c7a1a0f..ca99171 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,6 +1,5 @@ +Fix State test failures Write Repeat tests -Clean up cursor tests -Combine Cursor with State Think about not requiring so much Pos() when making messages Rename Seq2 to Seq Document Seq diff --git a/bytes/regexp.go b/bytes/regexp.go index 7155fca..66f6b65 100644 --- a/bytes/regexp.go +++ b/bytes/regexp.go @@ -11,19 +11,18 @@ import ( "unicode/utf8" "git.codemonkeysoftware.net/b/gigaparsec" - "git.codemonkeysoftware.net/b/gigaparsec/cursor" ) // RuneReader is an io.RuneReader backed by a Cursor, for compatibility // with the regexp package. type RuneReader struct { - cursor cursor.Cursor[byte] - start uint64 - err error + state gigaparsec.State[byte] + start uint64 + err error } -func NewRuneReader(c cursor.Cursor[byte]) *RuneReader { - return &RuneReader{cursor: c, start: c.Pos()} +func NewRuneReader(state gigaparsec.State[byte]) *RuneReader { + return &RuneReader{state: state, start: state.Pos()} } func (rr *RuneReader) ReadRune() (r rune, size int, err error) { @@ -32,9 +31,9 @@ func (rr *RuneReader) ReadRune() (r rune, size int, err error) { }() var b [4]byte s := b[:] - n, next, err := rr.cursor.Read(s) + n, next, err := rr.state.Read(s) if err != nil && !errors.Is(err, io.EOF) { - rr.cursor = next + rr.state = next return 0, 0, fmt.Errorf("ReadRune: %w", err) } if n == 0 { @@ -42,12 +41,12 @@ func (rr *RuneReader) ReadRune() (r rune, size int, err error) { } s = s[:n] r, size = utf8.DecodeRune(s) - rr.cursor = rr.cursor.At(rr.cursor.Pos() + uint64(size)) + rr.state = rr.state.At(rr.state.Pos() + uint64(size)) return r, size, nil } -func (rr *RuneReader) Cursor() cursor.Cursor[byte] { - return rr.cursor +func (rr *RuneReader) State() gigaparsec.State[byte] { + return rr.state } func (rr *RuneReader) Error() error { @@ -55,7 +54,7 @@ func (rr *RuneReader) Error() error { } func (rr *RuneReader) Count() uint64 { - return rr.cursor.Pos() - rr.start + return rr.state.Pos() - rr.start } func Regexp(pattern string) gigaparsec.Parser[byte, string] { @@ -63,7 +62,7 @@ func Regexp(pattern string) gigaparsec.Parser[byte, string] { re := regexp.MustCompile(pattern) expected := fmt.Sprintf("match `%s`", pattern) return func(input gigaparsec.State[byte]) (gigaparsec.Result[byte, string], error) { - r := NewRuneReader(input.Cursor()) + r := NewRuneReader(input) idx := re.FindReaderIndex(r) err := r.Error() if err != nil && !errors.Is(err, io.EOF) { @@ -71,7 +70,7 @@ func Regexp(pattern string) gigaparsec.Parser[byte, string] { } if idx == nil { got := make([]byte, r.Count()) - _, _, err = input.Cursor().Read(got) + _, _, err = input.Read(got) if err != nil { return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err) } @@ -80,7 +79,7 @@ func Regexp(pattern string) gigaparsec.Parser[byte, string] { // Alas, this is a little wasteful because a Regexp can only return indices // when searching a RuneReader. dst := make([]byte, idx[1]-idx[0]) - n, _, err := input.Cursor().Read(dst) + n, _, err := input.Read(dst) if err != nil { return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err) } diff --git a/bytes/regexp_test.go b/bytes/regexp_test.go index 8ce81fc..11f3953 100644 --- a/bytes/regexp_test.go +++ b/bytes/regexp_test.go @@ -9,7 +9,6 @@ import ( "git.codemonkeysoftware.net/b/gigaparsec" pbytes "git.codemonkeysoftware.net/b/gigaparsec/bytes" - "git.codemonkeysoftware.net/b/gigaparsec/cursor" ptest "git.codemonkeysoftware.net/b/gigaparsec/test" pgen "git.codemonkeysoftware.net/b/gigaparsec/test/generator" "github.com/shoenig/test" @@ -29,7 +28,7 @@ func TestRegexp(t *testing.T) { })) t.Run("basically works", func(t *testing.T) { - result, err := pbytes.Regexp("a")(gigaparsec.MakeState(cursor.NewReaderAt(strings.NewReader("a")))) + result, err := pbytes.Regexp("a")(gigaparsec.MakeState(strings.NewReader("a"))) must.NoError(t, err) success, value, _ := result.Status() test.True(t, success, test.Sprint(result.Message())) @@ -40,7 +39,7 @@ func TestRegexp(t *testing.T) { func TestRuneReader(t *testing.T) { var s = "abcdefghijklmnopqrstuvwxyz" - rr := pbytes.NewRuneReader(cursor.NewReaderAt(strings.NewReader(s))) + rr := pbytes.NewRuneReader(gigaparsec.MakeState(strings.NewReader(s))) for i, b := range s { r, n, err := rr.ReadRune() test.NoError(t, err) @@ -56,7 +55,7 @@ func TestMatchString(t *testing.T) { t.Run("fails on unexpected error", rapid.MakeCheck(func(t *rapid.T) { s := rapid.StringN(-1, -1, 100).Draw(t, "s") readErr := pgen.Error().Draw(t, "readErr") - result, err := pbytes.MatchString(s)(gigaparsec.MakeState(cursor.NewReaderAt(ptest.ErrReaderAt(readErr)))) + result, err := pbytes.MatchString(s)(gigaparsec.MakeState(ptest.ErrReaderAt(readErr))) test.ErrorIs(t, err, readErr) success, _, _ := result.Status() test.False(t, success) @@ -68,7 +67,7 @@ func TestMatchString(t *testing.T) { notPrefix := func(b []byte) bool { return !bytes.HasPrefix(input, b) } s := string(bgen.Filter(notPrefix).Draw(t, "s")) - result, err := pbytes.MatchString(s)(gigaparsec.MakeState(cursor.NewReaderAt(bytes.NewReader(input)))) + result, err := pbytes.MatchString(s)(gigaparsec.MakeState(bytes.NewReader(input))) test.NoError(t, err) success, _, _ := result.Status() test.False(t, success) @@ -78,7 +77,7 @@ func TestMatchString(t *testing.T) { input := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "input") slen := rapid.IntRange(0, len(input)).Draw(t, "slen") s := string(input[:slen]) - result, err := pbytes.MatchString(s)(gigaparsec.MakeState(cursor.NewReaderAt(bytes.NewReader(input)))) + result, err := pbytes.MatchString(s)(gigaparsec.MakeState(bytes.NewReader(input))) must.NoError(t, err) success, value, next := result.Status() must.True(t, success) diff --git a/cursor/cursor.go b/cursor/cursor.go deleted file mode 100644 index c669a4d..0000000 --- a/cursor/cursor.go +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: Unlicense - -package cursor - -import ( - "errors" - "io" -) - -// Cursor reads data from a specific spot in a data source. -type Cursor[Datum any] interface { - // I almost parameterized Cursor by its implementation (i.e. the Curiously - // Recurring Template Pattern), but then each parser would need that parameter. - // That might work well in a language with much stronger type inference, but - // not in Go. The upside would have been that for each implementation Impl, - // Impl.Read could have returned an unboxed Impl, which would have slightly - // simplified testing and maybe slightly reduced allocs. - - // Read fill dst with data from this Cursor's position in the underlying - // source. It returns the number of data it read and a new Cursor for - // the position at which the read ended, or an error if the read failed. - // All calls to a given Cursor will return data from the same position. - // If n < len(dst) or if the cursor's position is at the end of the data source, - // Read will return an error explaining why it read fewer bytes than requested. - // If the error was due to the cursor reaching the end of the data source, - // err will be io.EOF. - Read(dst []Datum) (n uint64, next Cursor[Datum], err error) - - // Pos returns the Cursor's position within the source. - Pos() uint64 - - // At returns a new cursor at the position pos. - At(pos uint64) Cursor[Datum] -} - -type ReaderAt[T any] interface { - ReadAt(p []T, off int64) (n int, err error) -} - -type SliceReaderAt[T any] []T - -func (s SliceReaderAt[T]) ReadAt(dst []T, off int64) (n int, err error) { - if off < 0 { - return 0, errors.New("SliceReaderAt.ReadAt: negative offset") - } - if off >= int64(len(s)) { - return 0, io.EOF - } - n = copy(dst, s[off:]) - if n < len(dst) { - err = io.EOF - } - return n, err -} - -type ReaderAtCursor[T any] struct { - r ReaderAt[T] - pos uint64 -} - -func NewReaderAt[T any](r ReaderAt[T]) ReaderAtCursor[T] { - return ReaderAtCursor[T]{r: r} -} - -func (rac ReaderAtCursor[T]) Read(dst []T) (uint64, Cursor[T], error) { - n, err := rac.r.ReadAt(dst, int64(rac.pos)) - if n > 0 { - rac.pos += uint64(n) - } - return uint64(n), rac, err -} - -func (rac ReaderAtCursor[T]) Pos() uint64 { - return rac.pos -} - -func (rac ReaderAtCursor[T]) At(pos uint64) Cursor[T] { - rac.pos = pos - return rac -} diff --git a/gigaparsec.go b/gigaparsec.go index 9cc8091..d96fed0 100644 --- a/gigaparsec.go +++ b/gigaparsec.go @@ -9,8 +9,6 @@ import ( "io" "slices" "strings" - - "git.codemonkeysoftware.net/b/gigaparsec/cursor" ) type Result[In, Out any] struct { @@ -114,29 +112,33 @@ func MessageEnd(pos uint64, expected ...string) Message { return Message{pos: pos, got: "end of input", expected: expected} } -func MakeState[In any](c cursor.Cursor[In]) State[In] { - return State[In]{cursor: c} +type ReaderAt[T any] interface { + ReadAt(p []T, off int64) (n int, err error) +} + +func MakeState[In any](r ReaderAt[In]) State[In] { + return State[In]{r: r} } type State[In any] struct { - cursor cursor.Cursor[In] -} - -func (s State[In]) Cursor() cursor.Cursor[In] { - return s.cursor + r ReaderAt[In] + pos uint64 } func (s State[In]) Read(dst []In) (n uint64, next State[In], err error) { - n, c, err := s.cursor.Read(dst) - return n, State[In]{cursor: c}, err + nread, err := s.r.ReadAt(dst, int64(s.pos)) + if n > 0 { + s.pos += uint64(n) + } + return uint64(nread), s, err } func (s State[In]) Pos() uint64 { - return s.cursor.Pos() + return s.pos } func (s State[In]) At(pos uint64) State[In] { - return State[In]{cursor: s.cursor.At(pos)} + return State[In]{r: s.r, pos: pos} } type Parser[In, Out any] func(State[In]) (Result[In, Out], error) @@ -163,8 +165,8 @@ func (pe ParseError) Error() string { return Message(pe).String() } -func Run[In, Out any](p Parser[In, Out], c cursor.Cursor[In]) (out Out, err error) { - start := MakeState(c) +func Run[In, Out any](p Parser[In, Out], r ReaderAt[In]) (out Out, err error) { + start := MakeState(r) result, err := p(start) if err != nil { err = fmt.Errorf("Run: %w", err) @@ -315,7 +317,7 @@ func Map[In, Out1, Out2 any](p Parser[In, Out1], f func(Out1) Out2) Parser[In, O } func end[In any](s State[In]) (Result[In, struct{}], error) { - _, _, err := s.cursor.Read([]In{}) + _, _, err := s.Read([]In{}) if errors.Is(err, io.EOF) { return Succeed(true, struct{}{}, s, MessageOK(s.Pos())), nil } diff --git a/parser_test.go b/parser_test.go index cf20ad9..acd9deb 100644 --- a/parser_test.go +++ b/parser_test.go @@ -7,7 +7,6 @@ import ( "testing" "git.codemonkeysoftware.net/b/gigaparsec" - "git.codemonkeysoftware.net/b/gigaparsec/cursor" ptest "git.codemonkeysoftware.net/b/gigaparsec/test" "git.codemonkeysoftware.net/b/gigaparsec/test/generator" "github.com/shoenig/test" @@ -30,7 +29,7 @@ func hasPrefix(prefix []byte) func([]byte) bool { func TestSlice(t *testing.T) { assertParseFails := func(t rapid.TB, input []byte, p gigaparsec.Parser[byte, []byte]) { t.Helper() - start := gigaparsec.MakeState(cursor.NewReaderAt(bytes.NewReader(input))) + start := gigaparsec.MakeState(bytes.NewReader(input)) result, err := p(start) must.NoError(t, err) success, _, _ := result.Status() @@ -55,9 +54,9 @@ func TestSlice(t *testing.T) { })) t.Run("fails when read fails", rapid.MakeCheck(func(t *rapid.T) { expectedErr := generator.Error().Draw(t, "expectedErr") - c := ptest.ErrCursor[byte](expectedErr) + r := ptest.ErrReaderAt(expectedErr) s := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "s") - result, err := gigaparsec.MatchSlice(s)(gigaparsec.MakeState(c)) + result, err := gigaparsec.MatchSlice(s)(gigaparsec.MakeState(r)) test.ErrorIs(t, err, expectedErr) success, _, _ := result.Status() test.False(t, success) @@ -66,7 +65,7 @@ func TestSlice(t *testing.T) { input := rapid.SliceOfN(rapid.Byte(), 1, -1).Draw(t, "input") sLen := rapid.IntRange(0, len(input)).Draw(t, "sLen") s := input[:sLen] - start := gigaparsec.MakeState(cursor.NewReaderAt(bytes.NewReader(input))) + start := gigaparsec.MakeState(bytes.NewReader(input)) result, err := gigaparsec.MatchSlice(s)(start) must.NoError(t, err) @@ -108,7 +107,7 @@ func TestBind(t *testing.T) { p := makeParser(pConsume) q := func(struct{}) gigaparsec.Parser[byte, struct{}] { return makeParser(qConsume) } - result, err := gigaparsec.Bind(p, q)(gigaparsec.MakeState(cursor.NewReaderAt(bytes.NewReader(nil)))) + result, err := gigaparsec.Bind(p, q)(gigaparsec.MakeState(bytes.NewReader(nil))) must.NoError(t, err) must.EqOp(t, pConsume || qConsume, result.Consumed()) })) diff --git a/cursor/cursor_test.go b/state_test.go similarity index 63% rename from cursor/cursor_test.go rename to state_test.go index b7d39a3..d385548 100644 --- a/cursor/cursor_test.go +++ b/state_test.go @@ -1,13 +1,11 @@ -// SPDX-License-Identifier: Unlicense - -package cursor_test +package gigaparsec_test import ( "bytes" "io" "testing" - "git.codemonkeysoftware.net/b/gigaparsec/cursor" + "git.codemonkeysoftware.net/b/gigaparsec" ptest "git.codemonkeysoftware.net/b/gigaparsec/test" pgen "git.codemonkeysoftware.net/b/gigaparsec/test/generator" "github.com/shoenig/test" @@ -15,69 +13,64 @@ import ( "pgregory.net/rapid" ) -func Todo(t *testing.T) { - t.Errorf("TODO") -} - -func testCursor[C cursor.Cursor[byte]](t *testing.T, makeCursor func([]byte) C) { - t.Helper() - t.Run("cursor reads the same position every time", rapid.MakeCheck(func(t *rapid.T) { +func TestState(t *testing.T) { + t.Run("state reads the same position every time", rapid.MakeCheck(func(t *rapid.T) { data := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "data") dst := pgen.SliceOfNZero[byte](0, len(data)-1).Draw(t, "dst") expected := data[:len(dst)] - c := makeCursor(data) + st := gigaparsec.MakeState(bytes.NewReader(data)) - _, next, err := c.Read(dst) + _, next, err := st.Read(dst) must.NoError(t, err) must.SliceEqOp(t, expected, dst) next.Read(dst) - _, _, err = c.Read(dst) + _, _, err = st.Read(dst) must.NoError(t, err) must.SliceEqOp(t, expected, dst) })) t.Run("Read returns io.EOF iff it overruns source", rapid.MakeCheck(func(t *rapid.T) { data := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "data") dst := pgen.SliceOfNZero[byte](0, 200).Draw(t, "dst") - c := makeCursor(data) + st := gigaparsec.MakeState(bytes.NewReader(data)) - n, _, err := c.Read(dst) + n, _, err := st.Read(dst) t.Logf("n=%d", n) must.EqOp(t, min(len(data), len(dst)), int(n)) - if len(dst) > len(data) || c.Pos() == uint64(len(data)) { + if len(dst) > len(data) || st.Pos() == uint64(len(data)) { must.ErrorIs(t, err, io.EOF) } else { must.NoError(t, err) } })) - t.Run("next cursor reads next input", rapid.MakeCheck(func(t *rapid.T) { + t.Run("next state reads next input", rapid.MakeCheck(func(t *rapid.T) { const maxLen = 100 data := rapid.SliceOfN(rapid.Byte(), 1, maxLen).Draw(t, "data") skip := rapid.IntRange(0, len(data)-1).Draw(t, "skip") - c := makeCursor(data) + st := gigaparsec.MakeState(bytes.NewReader(data)) - _, next, err := c.Read(make([]byte, skip)) + _, next, err := st.Read(make([]byte, skip)) must.NoError(t, err) must.EqOp(t, skip, int(next.Pos())) dst := make([]byte, maxLen) n, _, _ := next.Read(dst) must.SliceEqOp(t, data[skip:skip+int(n)], dst[:n]) })) - t.Run("Read returns an error if n is less than requested", rapid.MakeCheck(func(t *rapid.T) { + t.Run("Read returns io.EOF if n is less than requested", rapid.MakeCheck(func(t *rapid.T) { data := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "data") - c := makeCursor(data) + st := gigaparsec.MakeState(bytes.NewReader(data)) - n, _, err := c.Read(make([]byte, len(data)+1)) + n, _, err := st.Read(make([]byte, len(data)+1)) test.ErrorIs(t, err, io.EOF) test.EqOp(t, len(data), int(n)) })) - t.Run("At sets cursor position", rapid.MakeCheck(func(t *rapid.T) { + t.Run("At sets state position", rapid.MakeCheck(func(t *rapid.T) { data := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "data") pos := rapid.Uint64Range(0, uint64(len(data)-1)).Draw(t, "pos") - c := makeCursor(data).At(pos) + st := gigaparsec.MakeState(bytes.NewReader(data)).At(pos) dst := make([]byte, 1) - n, _, err := c.Read(dst) + n, _, err := st.Read(dst) test.EqOp(t, 1, n) test.NoError(t, err) test.EqOp(t, data[pos], dst[0]) @@ -85,31 +78,25 @@ func testCursor[C cursor.Cursor[byte]](t *testing.T, makeCursor func([]byte) C) t.Run("Pos returns correct position after At", rapid.MakeCheck(func(t *rapid.T) { var data []byte pos := rapid.Uint64().Draw(t, "pos") - c := makeCursor(data).At(pos) - test.EqOp(t, pos, c.Pos()) + st := gigaparsec.MakeState(bytes.NewReader(data)).At(pos) + test.EqOp(t, pos, st.Pos()) })) t.Run("Pos returns correct position after Read", rapid.MakeCheck(func(t *rapid.T) { const maxLen = 100 data := rapid.SliceOfN(rapid.Byte(), 1, maxLen).Draw(t, "data") skip := rapid.Uint64Range(0, uint64(len(data)-1)).Draw(t, "skip") - c := makeCursor(data) + st := gigaparsec.MakeState(bytes.NewReader(data)) - _, next, err := c.Read(make([]byte, skip)) + _, next, err := st.Read(make([]byte, skip)) must.NoError(t, err) test.EqOp(t, skip, next.Pos()) })) -} - -func TestReaderAtCursor(t *testing.T) { - testCursor(t, func(b []byte) cursor.ReaderAtCursor[byte] { - return cursor.NewReaderAt(bytes.NewReader(b)) - }) t.Run("Read returns an error if the ReaderAt fails", rapid.MakeCheck(func(t *rapid.T) { expectedErr := pgen.Error().Draw(t, "expectedErr") startPos := rapid.Uint64().Draw(t, "startPos") dst := pgen.SliceOfNZero[byte](0, 100).Draw(t, "dst") - c := cursor.NewReaderAt(ptest.ErrReaderAt(expectedErr)).At(startPos) - n, next, err := c.Read(dst) + st := gigaparsec.MakeState(ptest.ErrReaderAt(expectedErr)).At(startPos) + n, next, err := st.Read(dst) test.ErrorIs(t, err, expectedErr) test.EqOp(t, startPos, next.Pos()) test.Zero(t, n) diff --git a/test/readerat.go b/test/readerat.go index 14d0136..0c57402 100644 --- a/test/readerat.go +++ b/test/readerat.go @@ -7,7 +7,6 @@ import ( "io" "git.codemonkeysoftware.net/b/gigaparsec" - "git.codemonkeysoftware.net/b/gigaparsec/cursor" "github.com/shoenig/test" ) @@ -24,29 +23,6 @@ func ErrReaderAt(err error) io.ReaderAt { return errReaderAt{err: err} } -type errCursor[T any] struct { - err error - pos uint64 -} - -func (c errCursor[T]) Read([]T) (uint64, cursor.Cursor[T], error) { - return 0, c, c.err -} - -func (c errCursor[T]) At(pos uint64) cursor.Cursor[T] { - c.pos = pos - return c -} - -func (c errCursor[T]) Pos() uint64 { - return c.pos -} - -// ErrCursor return a [cursor.Cursor] with a Read method that always returns err. -func ErrCursor[T any](err error) cursor.Cursor[T] { - return errCursor[T]{err: err} -} - func StateIsAt[Input any](t test.T, s gigaparsec.State[Input], pos uint64) { test.EqOp(t, pos, s.Pos(), test.Sprintf("expected parser state to be at position %d, got %d", pos, s.Pos())) }