diff --git a/TODO.txt b/TODO.txt index 3df4ac3..848c55f 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,4 +1,3 @@ -Test State against both possible ReaderAt EOF behaviors Write Repeat tests Think about not requiring so much Pos() when making messages Rename Seq2 to Seq diff --git a/gigaparsec.go b/gigaparsec.go index c5d7549..cf59916 100644 --- a/gigaparsec.go +++ b/gigaparsec.go @@ -7,6 +7,7 @@ import ( "errors" "fmt" "io" + "math" "slices" "strings" ) @@ -142,10 +143,19 @@ type State[In any] struct { } func (s State[In]) Read(dst []In) (n uint64, next State[In], err error) { + if s.pos > math.MaxInt64 { + return 0, s, io.EOF + } nread, err := s.r.ReadAt(dst, int64(s.pos)) if nread > 0 { s.pos += uint64(nread) } + if nread == len(dst) && err == io.EOF { + if nread == 0 { + return 0, s, io.EOF + } + return uint64(nread), s, nil + } return uint64(nread), s, err } diff --git a/state_test.go b/state_test.go index e6f3ce0..3a64cbe 100644 --- a/state_test.go +++ b/state_test.go @@ -4,6 +4,7 @@ import ( "bytes" "cmp" "io" + "math" "testing" "git.codemonkeysoftware.net/b/gigaparsec" @@ -15,6 +16,30 @@ import ( "pgregory.net/rapid" ) +type customEOFReaderAt struct { + r *bytes.Reader + eofAtExactFit bool +} + +func newCustomEOFReaderAt(b []byte, eofAtExactFit bool) customEOFReaderAt { + return customEOFReaderAt{ + r: bytes.NewReader(b), + eofAtExactFit: eofAtExactFit, + } +} + +func (r customEOFReaderAt) ReadAt(p []byte, off int64) (n int, err error) { + n, err = r.r.ReadAt(p, off) + if int64(len(p))+off >= r.r.Size() { + if r.eofAtExactFit { + err = io.EOF + } else { + err = nil + } + } + return n, err +} + func TestState(t *testing.T) { t.Run("state reads the same position every time", rapid.MakeCheck(func(t *rapid.T) { data := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "data") @@ -31,19 +56,59 @@ func TestState(t *testing.T) { must.NoError(t, err) must.SliceEqOp(t, expected, dst) })) - t.Run("Read returns io.EOF iff it overruns source", rapid.MakeCheck(func(t *rapid.T) { - data := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "data") - dst := pgen.SliceOfNZero[byte](0, 200).Draw(t, "dst") - st := gigaparsec.MakeState(bytes.NewReader(data)) + t.Run("Read ends before end of source", rapid.MakeCheck(func(t *rapid.T) { + src := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "src") + endReadAt := rapid.IntRange(0, len(src)-1).Draw(t, "endReadAt") + pos := rapid.Uint64Range(0, uint64(endReadAt)).Draw(t, "pos") + dst := pgen.SliceOfNZero[byte](0, endReadAt-int(pos)).Draw(t, "dst") + st := gigaparsec.MakeState(bytes.NewReader(src)).At(pos) - n, _, err := st.Read(dst) - t.Logf("n=%d", n) - must.EqOp(t, min(len(data), len(dst)), int(n)) - if len(dst) > len(data) || st.Pos() == uint64(len(data)) { - must.ErrorIs(t, err, io.EOF) - } else { - must.NoError(t, err) - } + n, next, err := st.At(pos).Read(dst) + + test.EqOp(t, uint64(len(dst)), n) + ptest.StateIsAt(t, next, pos+n) + test.NoError(t, err) + test.SliceEqOp(t, src[pos:pos+n], dst) + })) + t.Run("Non-empty Read ends at end of source", rapid.MakeCheck(func(t *rapid.T) { + readerReturnsEOF := rapid.Bool().Draw(t, "readerReturnsEOF") + src := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "src") + dst := pgen.SliceOfNZero[byte](1, len(src)).Draw(t, "dst") + pos := uint64(len(src) - len(dst)) + st := gigaparsec.MakeState(newCustomEOFReaderAt(src, readerReturnsEOF)) + + n, next, err := st.At(pos).Read(dst) + + test.EqOp(t, uint64(len(dst)), n) + ptest.StateIsAt(t, next, pos+n) + test.NoError(t, err) + test.SliceEqOp(t, src[pos:pos+n], dst) + })) + t.Run("Read overruns source", rapid.MakeCheck(func(t *rapid.T) { + src := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "src") + pos := rapid.Uint64Range(0, uint64(len(src))-1).Draw(t, "pos") + minDstLen := len(src) - int(pos) + 1 + dst := pgen.SliceOfNZero[byte](minDstLen, minDstLen+10).Draw(t, "dst") + st := gigaparsec.MakeState(bytes.NewReader(src)).At(pos) + + n, next, err := st.Read(dst) + + test.EqOp(t, uint64(len(src)), n+pos) + ptest.StateIsAt(t, next, pos+n) + test.ErrorIs(t, err, io.EOF) + test.SliceEqOp(t, src[pos:pos+n], dst[:n]) + })) + t.Run("Read starts after end of source", rapid.MakeCheck(func(t *rapid.T) { + src := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "src") + dst := pgen.SliceOfNZero[byte](0, 100).Draw(t, "dst") + pos := rapid.Uint64Min(uint64(len(src))).Draw(t, "pos") + st := gigaparsec.MakeState(bytes.NewReader(src)).At(pos) + + n, next, err := st.Read(dst) + + test.EqOp(t, 0, n) + ptest.StateIsAt(t, next, pos) + test.ErrorIs(t, err, io.EOF) })) t.Run("next state reads next input", rapid.MakeCheck(func(t *rapid.T) { const maxLen = 100 @@ -58,14 +123,6 @@ func TestState(t *testing.T) { n, _, _ := next.Read(dst) must.SliceEqOp(t, data[skip:skip+int(n)], dst[:n]) })) - t.Run("Read returns io.EOF if n is less than requested", rapid.MakeCheck(func(t *rapid.T) { - data := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "data") - st := gigaparsec.MakeState(bytes.NewReader(data)) - - n, _, err := st.Read(make([]byte, len(data)+1)) - test.ErrorIs(t, err, io.EOF) - test.EqOp(t, len(data), int(n)) - })) t.Run("At sets state position", rapid.MakeCheck(func(t *rapid.T) { data := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "data") pos := rapid.Uint64Range(0, uint64(len(data)-1)).Draw(t, "pos") @@ -95,7 +152,7 @@ func TestState(t *testing.T) { })) t.Run("Read returns an error if the ReaderAt fails", rapid.MakeCheck(func(t *rapid.T) { expectedErr := pgen.Error().Draw(t, "expectedErr") - startPos := rapid.Uint64().Draw(t, "startPos") + startPos := rapid.Uint64Max(math.MaxInt64).Draw(t, "startPos") dst := pgen.SliceOfNZero[byte](0, 100).Draw(t, "dst") st := gigaparsec.MakeState(ptest.ErrReaderAt(expectedErr)).At(startPos) n, next, err := st.Read(dst)