Rename SpotReader, remove cruft, and move helpers

2024-09-02 11:04:00 -06:00
parent 4be0e425ba
commit a00aea29f4
4 changed files with 229 additions and 113 deletions
--- a/cursor/cursor.go
+++ b/cursor/cursor.go
@@ -0,0 +1,95 @@
 package cursor
 import (
 	"io"
 )
 // Cursor reads data from a specific spot in a data source.
 type Cursor[Datum any] interface {
 	// I almost parameterized Cursor by its implementation (i.e. the Curiously
 	// Recurring Template Pattern), but then each parser would need that parameter.
 	// That might work well in a language with much stronger type inference, but
 	// not in Go. The upside would have been that for each implementation Impl,
 	// Impl.Read could have returned an unboxed Impl, which would have slightly
 	// simplified testing and maybe slightly reduced allocs.
 	// Read fill dst with data from this Cursor's position in the underlying
 	// source. It returns the number of data it read and a new Cursor for
 	// the position at which the read ended, or an error if the read failed.
 	// All calls to a given Cursor will return data from the same position.
 	// If n < len(dst), Read will return an error explaining why it read fewer
 	// bytes than requested. If Read tried to read past the end of the source,
 	// err will be io.EOF.
 	Read(dst []Datum) (n uint64, next Cursor[Datum], err error)
 	// Pos returns the Cursor's position within the source.
 	Pos() uint64
 }
 type SliceCursor[Datum any] struct {
 	data   []Datum
 	offset uint64
 }
 func NewSlice[Datum any]([]Datum) SliceCursor[Datum] { panic("not implemented") }
 func (sc SliceCursor[Datum]) Read(dst []Datum) (n uint64, next Cursor[Datum], err error) {
 	copied := copy(dst, sc.data[sc.offset:])
 	if copied < len(dst) {
 		err = io.EOF
 	}
 	n = uint64(copied)
 	sc.offset += n
 	return n, sc, err
 }
 func (sc SliceCursor[Datum]) Pos() uint64 {
 	return sc.offset
 }
 type ReaderAtCursor struct {
 	r   io.ReaderAt
 	pos uint64
 }
 func NewReaderAt(r io.ReaderAt) ReaderAtCursor {
 	return ReaderAtCursor{r: r}
 }
 func (rac ReaderAtCursor) Read(dst []byte) (uint64, Cursor[byte], error) {
 	n, err := rac.r.ReadAt(dst, int64(rac.pos))
 	if n > 0 {
 		rac.pos += uint64(n)
 	}
 	return uint64(n), rac, err
 }
 func (rac ReaderAtCursor) Pos() uint64 {
 	return rac.pos
 }
 // StringCursor is identical to SliceCursor[byte], but uses a string as its data source.
 // The advantage is that creating a StringCursor does not require copying the source
 // string into a []byte.
 type StringCursor struct {
 	source string
 	offset uint64
 }
 func NewString(s string) StringCursor {
 	return StringCursor{source: s}
 }
 func (sc StringCursor) Read(dst []byte) (n uint64, next Cursor[byte], err error) {
 	copied := copy(dst, sc.source[sc.offset:])
 	if copied < len(dst) {
 		err = io.EOF
 	}
 	n = uint64(copied)
 	sc.offset += n
 	return n, sc, err
 }
 func (sc StringCursor) Pos() uint64 {
 	return sc.offset
 }
--- a/cursor/helper.go
+++ b/cursor/helper.go
@@ -0,0 +1,38 @@
 package cursor
 import "io"
 // BufferedReaderAt uses a buffer to supplement an io.Reader
 // with limited backward seeking.
 type BufferedReaderAt struct{}
 func NewBufferedReaderAt(r io.Reader, minBuffer uint64) *BufferedReaderAt
 // ReadAt reads bytes from the underlying reader. If the offset is after
 // the end of the buffer, ReadAt will first read and ignore bytes from the
 // underlying reader until it reaches the offset. If the offset is
 // before the start of the buffer, ReadAt will return an error.
 //
 // If your parser needs unlimited lookahead, you should probably
 // just read the whole input into a slice and use BytesCursor.
 func (b *BufferedReaderAt) ReadAt(dst []byte, offset int64) (int, error)
 // RuneReader is an io.RuneReader backed by a Cursor, for compatibility
 // with the regexp package.
 type RuneReader struct {
 	cursor Cursor[byte]
 }
 func NewRuneReader(c Cursor[byte]) *RuneReader {
 	return &RuneReader{cursor: c}
 }
 func (r *RuneReader) Read(dst []byte) (int, error) {
 	n, c, err := r.cursor.Read(dst)
 	r.cursor = c
 	return int(n), err
 }
 func (r *RuneReader) Cursor() Cursor[byte] {
 	return r.cursor
 }
--- a/gigaparsec.go
+++ b/gigaparsec.go
@@ -0,0 +1,96 @@
 package gigaparsec
 import (
 	"errors"
 	"fmt"
 )
 // TODO Everything. See https://smunix.github.io/dev.stephendiehl.com/fun/002_parsers.html.
 var ErrNoParse = errors.New("no parse")
 type Result[T any] struct {
 	Value T
 	State
 	Message
 }
 type Message struct {
 	Pos
 	Msg      string
 	Expected []string
 }
 type ParseError struct {
 	Message
 }
 func (pe ParseError) Error() string {
 	return fmt.Sprintf("parse error: %d: %s", pe.Message.Pos, pe.Message.Msg)
 }
 type Pos uint64
 type State struct {
 	Pos
 	Input []byte
 }
 type Parser[T any] func(State) (consumed bool, reply Result[T], err error)
 func Return[T any](value T) Parser[T] {
 	return func(state State) (bool, Result[T], error) {
 		return false, Result[T]{Value: value, State: state}, nil
 	}
 }
 func Satisfy(pred func(byte) bool) Parser[byte] {
 	return func(state State) (bool, Result[byte], error) {
 		if len(state.Input) == 0 {
 			return false, Result[byte]{}, ErrNoParse
 		}
 		b := state.Input[0]
 		if pred(b) {
 			return true, Result[byte]{Value: b, State: state.Input[1:]}, nil
 		}
 		return false, Result[byte]{}, ErrNoParse
 	}
 }
 func Bind[A, B any](p Parser[A], f func(A) Parser[B]) Parser[B] {
 	return func(input State) (bool, Result[B], error) {
 		consumed, replyA, err := p(input)
 		if err != nil {
 			return false, Result[B]{}, err
 		}
 		consumed2, replyB, err := f(replyA.Value)(replyA.Rest)
 		return consumed || consumed2, replyB, err
 	}
 }
 func Choose[A any](p, q Parser[A]) Parser[A] {
 	return func(input State) (bool, Result[A], error) {
 		consumedP, replyP, errP := p(input)
 		if consumedP {
 			return consumedP, replyP, errP
 		}
 		if errP != nil {
 			return q(input)
 		}
 		consumedQ, replyQ, errQ := q(input)
 		if consumedQ {
 			return consumedQ, replyQ, errQ
 		}
 		return consumedP, replyP, errP
 	}
 }
 func Try[A any](p Parser[A]) Parser[A] {
 	return func(input State) (bool, Result[A], error) {
 		consumed, reply, err := p(input)
 		if err != nil {
 			return false, Result[A]{}, err
 		}
 		return consumed, reply, err
 	}
 }
--- a/spotreader/reader.go
+++ b/spotreader/reader.go
@@ -1,113 +0,0 @@
 package spotreader
 import (
 	"io"
 	"iter"
 )
 type ReaderSource struct {
 	io.ReadSeeker
 }
 // BufferedReadSeeker uses a buffer to supplement an io.Reader
 // with limited backward seeking.
 type BufferedReadSeeker struct{}
 func NewBufferedReadSeeker(r io.Reader, minBuffer uint64) *BufferedReadSeeker
 // Read reads bytes from the underlying reader. If the current offset is after
 // the end of the buffer, Read will first read and ignore bytes from the
 // underlying reader until it reaches the offset. If the current offset is
 // before the start of the buffer, Read will return an error.
 //
 // If your parser needs unlimited lookahead, you should probably
 // just read the whole input into a slice and use BytesSpotReader.
 func (b *BufferedReadSeeker) Read([]byte) (int, error)
 func (b *BufferedReadSeeker) Seek(offset int64, whence int) (int64, error)
 // SpotReader reads data from a specific spot in a stream.
 type SpotReader[Datum any] interface {
 	// Read returns n data from this SpotReader's position in the underlying
 	// stream. It returns the data and a new SpotReader for the position at which
 	// the read ended, or an error if the read failed.
 	// All calls to a given SpotReader will return data from the same position.
 	Read(n uint64) ([]Datum, SpotReader[Datum], error)
 	// Pos returns the SpotReader's position within the stream.
 	Pos() int64
 }
 // TODO Consider parameterizing SpotReader by its implementation so that Read
 // doesn't have to box the next SpotReader:
 type UnboxedSpotReader[Datum any, Impl any] interface {
 	Read(n uint64) ([]Datum, Impl, error)
 	Pos() int64
 }
 // FakeSpotReader is an example of an UnboxedSpotReader.
 // This style would only be worth using after pretty solid benchmarking.
 // If this doesn't lower allocs, then I could also try parameterizing
 // parsers by type constrained by UnboxedSpotReader, but that would make
 // the user write a lot of hideous type signatures.
 type FakeSpotReader[Datum any] struct{}
 func (f FakeSpotReader[Datum]) Read(uint64) ([]Datum, SpotReader[Datum], error)
 func (f FakeSpotReader[Datum]) Pos() int64
 func ExampleFakeSpotReader() {
 	var sr1 SpotReader[int] = FakeSpotReader[int]{}
 	var sr2 SpotReader[int]
 	_, sr2, _ = sr1.Read(0)
 	sr2.Pos()
 }
 // SeqSpotReader as backed by a sequence of values of some type.
 // It is intended for use with a concurrent lexing pass.
 // TODO Since this will probably be handling tokens one at a time,
 // consider using a circular buffer.
 type SeqSpotReader[Datum any] struct{}
 func NewSeq[Datum any](seq iter.Seq[Datum], buflen uint) SeqSpotReader[Datum] {
 	panic("not implemented")
 }
 type SliceSpotReader struct{}
 func NewSlice[Datum any]([]Datum) SliceSpotReader { panic("not implemented") }
 type BytesSpotReader struct{}
 func NewBytes([]byte) BytesSpotReader
 type ReadSeekerSpotReader struct{}
 func NewReadSeeker(io.ReadSeeker) ReadSeekerSpotReader
 type StringSpotReader struct{}
 func NewString(s string) StringSpotReader
 // RuneReader is an io.RuneReader backed by a SpotReader, for compatibility
 // with the regexp package.
 type RuneReader struct{}
 func NewRuneReader(s SpotReader[byte]) *RuneReader
 func (s *RuneReader) Read([]byte) (int, error)
 /*
 I don't know how to structure this yet, and I'll need some experimentation to
 decide. The idea is that there will be a readseeker that lives outside the
 parser calls, and there will be an immutable reader that refers to it and gets
 passed through them as part of the parser state. That immutable reader will
 also hold an offset from the start of the input, so when it reads, it will
 first seek to that point in the ReadSeeker. Thus a given reader can only read
 at a particular point in the input. It will return a new reader with an offset
 equal to the first readers offset plus the length of the read.
 For using SpotReader with an io.Reader source that is not an io.ReadSeeker,
 BufferedReadSeeker allows limited backward seeking. This will not work with
 unlimited lookahead/backtracking; its Seek method will return an error if
 the desired offset is before the start of the buffer.
 */