Added sketch and notes on reader design

This commit is contained in:
Brandon Dyck 2024-08-31 15:13:27 -06:00
parent 2041afdb94
commit 4be0e425ba
2 changed files with 116 additions and 0 deletions

3
go.mod Normal file
View File

@ -0,0 +1,3 @@
module git.codemonkeysoftware.net/b/gigaparsec
go 1.23

113
spotreader/reader.go Normal file
View File

@ -0,0 +1,113 @@
package spotreader
import (
"io"
"iter"
)
type ReaderSource struct {
io.ReadSeeker
}
// BufferedReadSeeker uses a buffer to supplement an io.Reader
// with limited backward seeking.
type BufferedReadSeeker struct{}
func NewBufferedReadSeeker(r io.Reader, minBuffer uint64) *BufferedReadSeeker
// Read reads bytes from the underlying reader. If the current offset is after
// the end of the buffer, Read will first read and ignore bytes from the
// underlying reader until it reaches the offset. If the current offset is
// before the start of the buffer, Read will return an error.
//
// If your parser needs unlimited lookahead, you should probably
// just read the whole input into a slice and use BytesSpotReader.
func (b *BufferedReadSeeker) Read([]byte) (int, error)
func (b *BufferedReadSeeker) Seek(offset int64, whence int) (int64, error)
// SpotReader reads data from a specific spot in a stream.
type SpotReader[Datum any] interface {
// Read returns n data from this SpotReader's position in the underlying
// stream. It returns the data and a new SpotReader for the position at which
// the read ended, or an error if the read failed.
// All calls to a given SpotReader will return data from the same position.
Read(n uint64) ([]Datum, SpotReader[Datum], error)
// Pos returns the SpotReader's position within the stream.
Pos() int64
}
// TODO Consider parameterizing SpotReader by its implementation so that Read
// doesn't have to box the next SpotReader:
type UnboxedSpotReader[Datum any, Impl any] interface {
Read(n uint64) ([]Datum, Impl, error)
Pos() int64
}
// FakeSpotReader is an example of an UnboxedSpotReader.
// This style would only be worth using after pretty solid benchmarking.
// If this doesn't lower allocs, then I could also try parameterizing
// parsers by type constrained by UnboxedSpotReader, but that would make
// the user write a lot of hideous type signatures.
type FakeSpotReader[Datum any] struct{}
func (f FakeSpotReader[Datum]) Read(uint64) ([]Datum, SpotReader[Datum], error)
func (f FakeSpotReader[Datum]) Pos() int64
func ExampleFakeSpotReader() {
var sr1 SpotReader[int] = FakeSpotReader[int]{}
var sr2 SpotReader[int]
_, sr2, _ = sr1.Read(0)
sr2.Pos()
}
// SeqSpotReader as backed by a sequence of values of some type.
// It is intended for use with a concurrent lexing pass.
// TODO Since this will probably be handling tokens one at a time,
// consider using a circular buffer.
type SeqSpotReader[Datum any] struct{}
func NewSeq[Datum any](seq iter.Seq[Datum], buflen uint) SeqSpotReader[Datum] {
panic("not implemented")
}
type SliceSpotReader struct{}
func NewSlice[Datum any]([]Datum) SliceSpotReader { panic("not implemented") }
type BytesSpotReader struct{}
func NewBytes([]byte) BytesSpotReader
type ReadSeekerSpotReader struct{}
func NewReadSeeker(io.ReadSeeker) ReadSeekerSpotReader
type StringSpotReader struct{}
func NewString(s string) StringSpotReader
// RuneReader is an io.RuneReader backed by a SpotReader, for compatibility
// with the regexp package.
type RuneReader struct{}
func NewRuneReader(s SpotReader[byte]) *RuneReader
func (s *RuneReader) Read([]byte) (int, error)
/*
I don't know how to structure this yet, and I'll need some experimentation to
decide. The idea is that there will be a readseeker that lives outside the
parser calls, and there will be an immutable reader that refers to it and gets
passed through them as part of the parser state. That immutable reader will
also hold an offset from the start of the input, so when it reads, it will
first seek to that point in the ReadSeeker. Thus a given reader can only read
at a particular point in the input. It will return a new reader with an offset
equal to the first readers offset plus the length of the read.
For using SpotReader with an io.Reader source that is not an io.ReadSeeker,
BufferedReadSeeker allows limited backward seeking. This will not work with
unlimited lookahead/backtracking; its Seek method will return an error if
the desired offset is before the start of the buffer.
*/