package spotreader import ( "io" "iter" ) type ReaderSource struct { io.ReadSeeker } // BufferedReadSeeker uses a buffer to supplement an io.Reader // with limited backward seeking. type BufferedReadSeeker struct{} func NewBufferedReadSeeker(r io.Reader, minBuffer uint64) *BufferedReadSeeker // Read reads bytes from the underlying reader. If the current offset is after // the end of the buffer, Read will first read and ignore bytes from the // underlying reader until it reaches the offset. If the current offset is // before the start of the buffer, Read will return an error. // // If your parser needs unlimited lookahead, you should probably // just read the whole input into a slice and use BytesSpotReader. func (b *BufferedReadSeeker) Read([]byte) (int, error) func (b *BufferedReadSeeker) Seek(offset int64, whence int) (int64, error) // SpotReader reads data from a specific spot in a stream. type SpotReader[Datum any] interface { // Read returns n data from this SpotReader's position in the underlying // stream. It returns the data and a new SpotReader for the position at which // the read ended, or an error if the read failed. // All calls to a given SpotReader will return data from the same position. Read(n uint64) ([]Datum, SpotReader[Datum], error) // Pos returns the SpotReader's position within the stream. Pos() int64 } // TODO Consider parameterizing SpotReader by its implementation so that Read // doesn't have to box the next SpotReader: type UnboxedSpotReader[Datum any, Impl any] interface { Read(n uint64) ([]Datum, Impl, error) Pos() int64 } // FakeSpotReader is an example of an UnboxedSpotReader. // This style would only be worth using after pretty solid benchmarking. // If this doesn't lower allocs, then I could also try parameterizing // parsers by type constrained by UnboxedSpotReader, but that would make // the user write a lot of hideous type signatures. type FakeSpotReader[Datum any] struct{} func (f FakeSpotReader[Datum]) Read(uint64) ([]Datum, SpotReader[Datum], error) func (f FakeSpotReader[Datum]) Pos() int64 func ExampleFakeSpotReader() { var sr1 SpotReader[int] = FakeSpotReader[int]{} var sr2 SpotReader[int] _, sr2, _ = sr1.Read(0) sr2.Pos() } // SeqSpotReader as backed by a sequence of values of some type. // It is intended for use with a concurrent lexing pass. // TODO Since this will probably be handling tokens one at a time, // consider using a circular buffer. type SeqSpotReader[Datum any] struct{} func NewSeq[Datum any](seq iter.Seq[Datum], buflen uint) SeqSpotReader[Datum] { panic("not implemented") } type SliceSpotReader struct{} func NewSlice[Datum any]([]Datum) SliceSpotReader { panic("not implemented") } type BytesSpotReader struct{} func NewBytes([]byte) BytesSpotReader type ReadSeekerSpotReader struct{} func NewReadSeeker(io.ReadSeeker) ReadSeekerSpotReader type StringSpotReader struct{} func NewString(s string) StringSpotReader // RuneReader is an io.RuneReader backed by a SpotReader, for compatibility // with the regexp package. type RuneReader struct{} func NewRuneReader(s SpotReader[byte]) *RuneReader func (s *RuneReader) Read([]byte) (int, error) /* I don't know how to structure this yet, and I'll need some experimentation to decide. The idea is that there will be a readseeker that lives outside the parser calls, and there will be an immutable reader that refers to it and gets passed through them as part of the parser state. That immutable reader will also hold an offset from the start of the input, so when it reads, it will first seek to that point in the ReadSeeker. Thus a given reader can only read at a particular point in the input. It will return a new reader with an offset equal to the first readers offset plus the length of the read. For using SpotReader with an io.Reader source that is not an io.ReadSeeker, BufferedReadSeeker allows limited backward seeking. This will not work with unlimited lookahead/backtracking; its Seek method will return an error if the desired offset is before the start of the buffer. */