package bytes import ( "errors" "fmt" "io" "regexp" "strings" "unicode/utf8" "git.codemonkeysoftware.net/b/gigaparsec" "git.codemonkeysoftware.net/b/gigaparsec/cursor" ) // RuneReader is an io.RuneReader backed by a Cursor, for compatibility // with the regexp package. type RuneReader struct { cursor cursor.Cursor[byte] err error } func NewRuneReader(c cursor.Cursor[byte]) *RuneReader { return &RuneReader{cursor: c} } func (rr *RuneReader) ReadRune() (r rune, size int, err error) { defer func() { rr.err = err }() var b [4]byte s := b[:] n, next, err := rr.cursor.Read(s) if err != nil && !errors.Is(err, io.EOF) { rr.cursor = next return 0, 0, fmt.Errorf("ReadRune: %w", err) } s = s[:n] r, size = utf8.DecodeRune(s) rr.cursor = rr.cursor.At(rr.cursor.Pos() + uint64(size)) return r, size, err } func (rr *RuneReader) Cursor() cursor.Cursor[byte] { return rr.cursor } func (rr *RuneReader) Error() error { return rr.err } func Regexp(str string) gigaparsec.Parser[byte, []byte] { if !strings.HasPrefix(str, "^") && !strings.HasPrefix(str, `\A`) { str = "^" + str } re := regexp.MustCompile(str) expected := fmt.Sprintf("match `%s`", str) return func(input gigaparsec.State[byte]) (gigaparsec.Result[byte, []byte], error) { r := NewRuneReader(input.Cursor()) idx := re.FindReaderIndex(r) err := r.Error() if err != nil && !errors.Is(err, io.EOF) { return gigaparsec.Result[byte, []byte]{}, fmt.Errorf("Regexp: reader error: %w", err) } if idx == nil { return gigaparsec.Fail[byte, []byte](false, gigaparsec.Message{ Pos: input.Pos(), Expected: []string{expected}, // TODO Not having a Got is unsatisfactory, but how do I extract useful information? // Maybe just read a fixed number of bytes or to the end, whichever comes first? // I could add extra methods to cursor.RuneReader to figure out how much it had read. }), nil } // Alas, this is a little wasteful because a Regexp can only return indices // when searching a RuneReader. dst := make([]byte, idx[1]-idx[0]) n, _, err := input.Cursor().Read(dst) if err != nil { // If we can't access those same bytes again, something is wrong. return gigaparsec.Result[byte, []byte]{}, fmt.Errorf("Regexp: unexpected error: %w", err) } next := input.At(input.Pos() + n) return gigaparsec.Succeed(true, dst, next, gigaparsec.MessageOK(input.Pos())), nil } }