gigaparsec/bytes/regexp.go

109 lines
3.0 KiB
Go
Raw Normal View History

// SPDX-License-Identifier: Unlicense
2024-09-10 22:46:31 +00:00
package bytes
import (
2024-09-24 17:53:32 +00:00
"bytes"
2024-09-10 22:52:08 +00:00
"errors"
2024-09-10 22:46:31 +00:00
"fmt"
2024-09-10 22:52:08 +00:00
"io"
2024-09-10 22:46:31 +00:00
"regexp"
2024-09-10 22:52:08 +00:00
"unicode/utf8"
2024-09-10 22:46:31 +00:00
"git.codemonkeysoftware.net/b/gigaparsec"
)
2024-09-10 22:52:08 +00:00
// RuneReader is an io.RuneReader backed by a Cursor, for compatibility
// with the regexp package.
type RuneReader struct {
2024-09-27 15:29:27 +00:00
state gigaparsec.State[byte]
start uint64
err error
2024-09-10 22:52:08 +00:00
}
2024-09-27 15:29:27 +00:00
func NewRuneReader(state gigaparsec.State[byte]) *RuneReader {
return &RuneReader{state: state, start: state.Pos()}
2024-09-10 22:52:08 +00:00
}
func (rr *RuneReader) ReadRune() (r rune, size int, err error) {
2024-09-11 20:23:08 +00:00
defer func() {
rr.err = err
}()
2024-09-10 22:52:08 +00:00
var b [4]byte
s := b[:]
2024-09-27 15:29:27 +00:00
n, next, err := rr.state.Read(s)
2024-09-10 22:52:08 +00:00
if err != nil && !errors.Is(err, io.EOF) {
2024-09-27 15:29:27 +00:00
rr.state = next
2024-09-10 22:52:08 +00:00
return 0, 0, fmt.Errorf("ReadRune: %w", err)
}
if n == 0 {
return 0, 0, io.EOF
}
2024-09-10 22:52:08 +00:00
s = s[:n]
r, size = utf8.DecodeRune(s)
2024-09-27 15:29:27 +00:00
rr.state = rr.state.At(rr.state.Pos() + uint64(size))
return r, size, nil
2024-09-10 22:52:08 +00:00
}
2024-09-27 15:29:27 +00:00
func (rr *RuneReader) State() gigaparsec.State[byte] {
return rr.state
2024-09-10 22:52:08 +00:00
}
2024-09-11 20:23:08 +00:00
func (rr *RuneReader) Error() error {
return rr.err
}
2024-09-11 20:43:33 +00:00
func (rr *RuneReader) Count() uint64 {
2024-09-27 15:29:27 +00:00
return rr.state.Pos() - rr.start
2024-09-11 20:43:33 +00:00
}
func Regexp(pattern string) gigaparsec.Parser[byte, string] {
pattern = fmt.Sprintf(`^(?:%s)`, pattern)
re := regexp.MustCompile(pattern)
expected := fmt.Sprintf("match `%s`", pattern)
2024-09-11 20:25:49 +00:00
return func(input gigaparsec.State[byte]) (gigaparsec.Result[byte, string], error) {
2024-09-27 15:29:27 +00:00
r := NewRuneReader(input)
2024-09-10 22:46:31 +00:00
idx := re.FindReaderIndex(r)
2024-09-11 20:23:08 +00:00
err := r.Error()
if err != nil && !errors.Is(err, io.EOF) {
2024-09-11 20:25:49 +00:00
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: reader error: %w", err)
2024-09-11 20:23:08 +00:00
}
2024-09-10 22:46:31 +00:00
if idx == nil {
2024-09-11 20:43:33 +00:00
got := make([]byte, r.Count())
2024-09-27 15:29:27 +00:00
_, _, err = input.Read(got)
2024-09-11 20:43:33 +00:00
if err != nil {
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err)
}
2024-09-11 21:27:10 +00:00
return gigaparsec.Fail[byte, string](false, gigaparsec.MakeMessage(input.Pos(), string(got), expected)), nil
2024-09-10 22:46:31 +00:00
}
// Alas, this is a little wasteful because a Regexp can only return indices
// when searching a RuneReader.
dst := make([]byte, idx[1]-idx[0])
2024-09-27 15:29:27 +00:00
n, _, err := input.Read(dst)
2024-09-10 22:46:31 +00:00
if err != nil {
2024-09-11 20:25:49 +00:00
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err)
2024-09-10 22:46:31 +00:00
}
next := input.At(input.Pos() + n)
2024-09-11 20:25:49 +00:00
return gigaparsec.Succeed(true, string(dst), next, gigaparsec.MessageOK(input.Pos())), nil
2024-09-10 22:46:31 +00:00
}
}
2024-09-24 17:53:32 +00:00
func MatchString(s string) gigaparsec.Parser[byte, string] {
expected := fmt.Sprintf("%q", s)
b := []byte(s)
return func(input gigaparsec.State[byte]) (gigaparsec.Result[byte, string], error) {
dst := make([]byte, len(s))
_, next, err := input.Read(dst)
if errors.Is(err, io.EOF) {
return gigaparsec.Fail[byte, string](false, gigaparsec.MessageEnd(input.Pos(), expected)), nil
}
if err != nil {
return gigaparsec.Result[byte, string]{}, fmt.Errorf("MatchString: %w", err)
}
if !bytes.Equal(dst, b) {
return gigaparsec.Fail[byte, string](false, gigaparsec.MakeMessage(input.Pos(), string(dst), expected)), nil
}
return gigaparsec.Succeed(true, s, next, gigaparsec.MessageOK(input.Pos())), nil
}
}