Added sketch and notes on reader design
This commit is contained in:
parent
2041afdb94
commit
4be0e425ba
3
go.mod
Normal file
3
go.mod
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
module git.codemonkeysoftware.net/b/gigaparsec
|
||||||
|
|
||||||
|
go 1.23
|
113
spotreader/reader.go
Normal file
113
spotreader/reader.go
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
package spotreader
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"iter"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ReaderSource struct {
|
||||||
|
io.ReadSeeker
|
||||||
|
}
|
||||||
|
|
||||||
|
// BufferedReadSeeker uses a buffer to supplement an io.Reader
|
||||||
|
// with limited backward seeking.
|
||||||
|
type BufferedReadSeeker struct{}
|
||||||
|
|
||||||
|
func NewBufferedReadSeeker(r io.Reader, minBuffer uint64) *BufferedReadSeeker
|
||||||
|
|
||||||
|
// Read reads bytes from the underlying reader. If the current offset is after
|
||||||
|
// the end of the buffer, Read will first read and ignore bytes from the
|
||||||
|
// underlying reader until it reaches the offset. If the current offset is
|
||||||
|
// before the start of the buffer, Read will return an error.
|
||||||
|
//
|
||||||
|
// If your parser needs unlimited lookahead, you should probably
|
||||||
|
// just read the whole input into a slice and use BytesSpotReader.
|
||||||
|
func (b *BufferedReadSeeker) Read([]byte) (int, error)
|
||||||
|
|
||||||
|
func (b *BufferedReadSeeker) Seek(offset int64, whence int) (int64, error)
|
||||||
|
|
||||||
|
// SpotReader reads data from a specific spot in a stream.
|
||||||
|
type SpotReader[Datum any] interface {
|
||||||
|
// Read returns n data from this SpotReader's position in the underlying
|
||||||
|
// stream. It returns the data and a new SpotReader for the position at which
|
||||||
|
// the read ended, or an error if the read failed.
|
||||||
|
// All calls to a given SpotReader will return data from the same position.
|
||||||
|
Read(n uint64) ([]Datum, SpotReader[Datum], error)
|
||||||
|
|
||||||
|
// Pos returns the SpotReader's position within the stream.
|
||||||
|
Pos() int64
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO Consider parameterizing SpotReader by its implementation so that Read
|
||||||
|
// doesn't have to box the next SpotReader:
|
||||||
|
type UnboxedSpotReader[Datum any, Impl any] interface {
|
||||||
|
Read(n uint64) ([]Datum, Impl, error)
|
||||||
|
Pos() int64
|
||||||
|
}
|
||||||
|
|
||||||
|
// FakeSpotReader is an example of an UnboxedSpotReader.
|
||||||
|
// This style would only be worth using after pretty solid benchmarking.
|
||||||
|
// If this doesn't lower allocs, then I could also try parameterizing
|
||||||
|
// parsers by type constrained by UnboxedSpotReader, but that would make
|
||||||
|
// the user write a lot of hideous type signatures.
|
||||||
|
type FakeSpotReader[Datum any] struct{}
|
||||||
|
|
||||||
|
func (f FakeSpotReader[Datum]) Read(uint64) ([]Datum, SpotReader[Datum], error)
|
||||||
|
func (f FakeSpotReader[Datum]) Pos() int64
|
||||||
|
|
||||||
|
func ExampleFakeSpotReader() {
|
||||||
|
var sr1 SpotReader[int] = FakeSpotReader[int]{}
|
||||||
|
var sr2 SpotReader[int]
|
||||||
|
_, sr2, _ = sr1.Read(0)
|
||||||
|
sr2.Pos()
|
||||||
|
}
|
||||||
|
|
||||||
|
// SeqSpotReader as backed by a sequence of values of some type.
|
||||||
|
// It is intended for use with a concurrent lexing pass.
|
||||||
|
// TODO Since this will probably be handling tokens one at a time,
|
||||||
|
// consider using a circular buffer.
|
||||||
|
type SeqSpotReader[Datum any] struct{}
|
||||||
|
|
||||||
|
func NewSeq[Datum any](seq iter.Seq[Datum], buflen uint) SeqSpotReader[Datum] {
|
||||||
|
panic("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
type SliceSpotReader struct{}
|
||||||
|
|
||||||
|
func NewSlice[Datum any]([]Datum) SliceSpotReader { panic("not implemented") }
|
||||||
|
|
||||||
|
type BytesSpotReader struct{}
|
||||||
|
|
||||||
|
func NewBytes([]byte) BytesSpotReader
|
||||||
|
|
||||||
|
type ReadSeekerSpotReader struct{}
|
||||||
|
|
||||||
|
func NewReadSeeker(io.ReadSeeker) ReadSeekerSpotReader
|
||||||
|
|
||||||
|
type StringSpotReader struct{}
|
||||||
|
|
||||||
|
func NewString(s string) StringSpotReader
|
||||||
|
|
||||||
|
// RuneReader is an io.RuneReader backed by a SpotReader, for compatibility
|
||||||
|
// with the regexp package.
|
||||||
|
type RuneReader struct{}
|
||||||
|
|
||||||
|
func NewRuneReader(s SpotReader[byte]) *RuneReader
|
||||||
|
|
||||||
|
func (s *RuneReader) Read([]byte) (int, error)
|
||||||
|
|
||||||
|
/*
|
||||||
|
I don't know how to structure this yet, and I'll need some experimentation to
|
||||||
|
decide. The idea is that there will be a readseeker that lives outside the
|
||||||
|
parser calls, and there will be an immutable reader that refers to it and gets
|
||||||
|
passed through them as part of the parser state. That immutable reader will
|
||||||
|
also hold an offset from the start of the input, so when it reads, it will
|
||||||
|
first seek to that point in the ReadSeeker. Thus a given reader can only read
|
||||||
|
at a particular point in the input. It will return a new reader with an offset
|
||||||
|
equal to the first readers offset plus the length of the read.
|
||||||
|
|
||||||
|
For using SpotReader with an io.Reader source that is not an io.ReadSeeker,
|
||||||
|
BufferedReadSeeker allows limited backward seeking. This will not work with
|
||||||
|
unlimited lookahead/backtracking; its Seek method will return an error if
|
||||||
|
the desired offset is before the start of the buffer.
|
||||||
|
*/
|
Loading…
Reference in New Issue
Block a user