// SPDX-License-Identifier: Unlicense package bytes import ( "bytes" "errors" "fmt" "io" "regexp" "unicode/utf8" "git.codemonkeysoftware.net/b/gigaparsec" ) // RuneReader is an io.RuneReader backed by a Cursor, for compatibility // with the regexp package. type RuneReader struct { state gigaparsec.State[byte] start uint64 err error } func NewRuneReader(state gigaparsec.State[byte]) *RuneReader { return &RuneReader{state: state, start: state.Pos()} } func (rr *RuneReader) ReadRune() (r rune, size int, err error) { defer func() { rr.err = err }() var b [4]byte s := b[:] n, next, err := rr.state.Read(s) if err != nil && !errors.Is(err, io.EOF) { rr.state = next return 0, 0, fmt.Errorf("ReadRune: %w", err) } if n == 0 { return 0, 0, io.EOF } s = s[:n] r, size = utf8.DecodeRune(s) rr.state = rr.state.At(rr.state.Pos() + uint64(size)) return r, size, nil } func (rr *RuneReader) State() gigaparsec.State[byte] { return rr.state } func (rr *RuneReader) Error() error { return rr.err } func (rr *RuneReader) Count() uint64 { return rr.state.Pos() - rr.start } func Regexp(pattern string) gigaparsec.Parser[byte, string] { pattern = fmt.Sprintf(`^(?:%s)`, pattern) re := regexp.MustCompile(pattern) expected := fmt.Sprintf("match `%s`", pattern) return func(input gigaparsec.State[byte]) (gigaparsec.Result[byte, string], error) { r := NewRuneReader(input) idx := re.FindReaderIndex(r) err := r.Error() if err != nil && !errors.Is(err, io.EOF) { return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: reader error: %w", err) } if idx == nil { got := make([]byte, r.Count()) _, _, err = input.Read(got) if err != nil { return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err) } return gigaparsec.Fail[byte, string](false, gigaparsec.MakeMessage(input.Pos(), string(got), expected)), nil } // Alas, this is a little wasteful because a Regexp can only return indices // when searching a RuneReader. dst := make([]byte, idx[1]-idx[0]) n, _, err := input.Read(dst) if err != nil { return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err) } next := input.At(input.Pos() + n) return gigaparsec.Succeed(true, string(dst), next, gigaparsec.MessageOK(input.Pos())), nil } } func MatchString(s string) gigaparsec.Parser[byte, string] { expected := fmt.Sprintf("%q", s) b := []byte(s) return func(input gigaparsec.State[byte]) (gigaparsec.Result[byte, string], error) { dst := make([]byte, len(s)) _, next, err := input.Read(dst) if errors.Is(err, io.EOF) { return gigaparsec.Fail[byte, string](false, gigaparsec.MessageEnd(input.Pos(), expected)), nil } if err != nil { return gigaparsec.Result[byte, string]{}, fmt.Errorf("MatchString: %w", err) } if !bytes.Equal(dst, b) { return gigaparsec.Fail[byte, string](false, gigaparsec.MakeMessage(input.Pos(), string(dst), expected)), nil } return gigaparsec.Succeed(true, s, next, gigaparsec.MessageOK(input.Pos())), nil } }