// SPDX-License-Identifier: Unlicense package bytes import ( "bytes" "errors" "fmt" "io" "regexp" "unicode/utf8" "git.codemonkeysoftware.net/b/gigaparsec" "git.codemonkeysoftware.net/b/gigaparsec/cursor" ) // RuneReader is an io.RuneReader backed by a Cursor, for compatibility // with the regexp package. type RuneReader struct { cursor cursor.Cursor[byte] start uint64 err error } func NewRuneReader(c cursor.Cursor[byte]) *RuneReader { return &RuneReader{cursor: c, start: c.Pos()} } func (rr *RuneReader) ReadRune() (r rune, size int, err error) { defer func() { rr.err = err }() var b [4]byte s := b[:] n, next, err := rr.cursor.Read(s) if err != nil && !errors.Is(err, io.EOF) { rr.cursor = next return 0, 0, fmt.Errorf("ReadRune: %w", err) } if n == 0 { return 0, 0, io.EOF } s = s[:n] r, size = utf8.DecodeRune(s) rr.cursor = rr.cursor.At(rr.cursor.Pos() + uint64(size)) return r, size, nil } func (rr *RuneReader) Cursor() cursor.Cursor[byte] { return rr.cursor } func (rr *RuneReader) Error() error { return rr.err } func (rr *RuneReader) Count() uint64 { return rr.cursor.Pos() - rr.start } func Regexp(pattern string) gigaparsec.Parser[byte, string] { pattern = fmt.Sprintf(`^(?:%s)`, pattern) re := regexp.MustCompile(pattern) expected := fmt.Sprintf("match `%s`", pattern) return func(input gigaparsec.State[byte]) (gigaparsec.Result[byte, string], error) { r := NewRuneReader(input.Cursor()) idx := re.FindReaderIndex(r) err := r.Error() if err != nil && !errors.Is(err, io.EOF) { return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: reader error: %w", err) } if idx == nil { got := make([]byte, r.Count()) _, _, err = input.Cursor().Read(got) if err != nil { return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err) } return gigaparsec.Fail[byte, string](false, gigaparsec.MakeMessage(input.Pos(), string(got), expected)), nil } // Alas, this is a little wasteful because a Regexp can only return indices // when searching a RuneReader. dst := make([]byte, idx[1]-idx[0]) n, _, err := input.Cursor().Read(dst) if err != nil { return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err) } next := input.At(input.Pos() + n) return gigaparsec.Succeed(true, string(dst), next, gigaparsec.MessageOK(input.Pos())), nil } } func MatchString(s string) gigaparsec.Parser[byte, string] { expected := fmt.Sprintf("%q", s) b := []byte(s) return func(input gigaparsec.State[byte]) (gigaparsec.Result[byte, string], error) { dst := make([]byte, len(s)) _, next, err := input.Read(dst) if errors.Is(err, io.EOF) { return gigaparsec.Fail[byte, string](false, gigaparsec.MessageEnd(input.Pos(), expected)), nil } if err != nil { return gigaparsec.Result[byte, string]{}, fmt.Errorf("MatchString: %w", err) } if !bytes.Equal(dst, b) { return gigaparsec.Fail[byte, string](false, gigaparsec.MakeMessage(input.Pos(), string(dst), expected)), nil } return gigaparsec.Succeed(true, s, next, gigaparsec.MessageOK(input.Pos())), nil } }