package bytes import ( "errors" "fmt" "io" "regexp" "strings" "unicode/utf8" "git.codemonkeysoftware.net/b/gigaparsec" "git.codemonkeysoftware.net/b/gigaparsec/cursor" ) // RuneReader is an io.RuneReader backed by a Cursor, for compatibility // with the regexp package. type RuneReader struct { cursor cursor.Cursor[byte] } func NewRuneReader(c cursor.Cursor[byte]) *RuneReader { return &RuneReader{cursor: c} } func (rr *RuneReader) ReadRune() (r rune, size int, err error) { var b [4]byte s := b[:] n, next, err := rr.cursor.Read(s) if err != nil && !errors.Is(err, io.EOF) { rr.cursor = next return 0, 0, fmt.Errorf("ReadRune: %w", err) } s = s[:n] r, size = utf8.DecodeRune(s) rr.cursor = rr.cursor.At(rr.cursor.Pos() + uint64(size)) return r, size, err } func (rr *RuneReader) Cursor() cursor.Cursor[byte] { return rr.cursor } func Regexp(str string) gigaparsec.Parser[byte, []byte] { if !strings.HasPrefix(str, "^") { str = "^" + str } re := regexp.MustCompile(str) expected := fmt.Sprintf("match `%s`", str) return func(input gigaparsec.State[byte]) (consumed bool, result gigaparsec.Result[byte, []byte], err error) { r := NewRuneReader(input.Cursor()) idx := re.FindReaderIndex(r) // TODO Check error from r; this requires an Error() method on cursor.RuneReader. if idx == nil { return false, gigaparsec.Result[byte, []byte]{}, gigaparsec.ParseError{ Pos: input.Pos(), Expected: []string{expected}, // TODO Not having a Got is unsatisfactory, but how do I extract useful information? // Maybe just read a fixed number of bytes or to the end, whichever comes first? // I could add extra methods to cursor.RuneReader to figure out how much it had read. } } // Alas, this is a little wasteful because a Regexp can only return indices // when searching a RuneReader. dst := make([]byte, idx[1]-idx[0]) n, _, err := input.Cursor().Read(dst) if err != nil { // If we can't access those same bytes again, something is wrong. return false, gigaparsec.Result[byte, []byte]{}, fmt.Errorf("Regex: unexpected error: %w", err) } result = gigaparsec.Result[byte, []byte]{ State: input.At(input.Pos() + n), Value: dst, Message: gigaparsec.MessageOK(input.Pos()), } return true, result, nil } }