2024-09-10 22:46:31 +00:00
|
|
|
package bytes
|
|
|
|
|
|
|
|
import (
|
2024-09-10 22:52:08 +00:00
|
|
|
"errors"
|
2024-09-10 22:46:31 +00:00
|
|
|
"fmt"
|
2024-09-10 22:52:08 +00:00
|
|
|
"io"
|
2024-09-10 22:46:31 +00:00
|
|
|
"regexp"
|
|
|
|
"strings"
|
2024-09-10 22:52:08 +00:00
|
|
|
"unicode/utf8"
|
2024-09-10 22:46:31 +00:00
|
|
|
|
|
|
|
"git.codemonkeysoftware.net/b/gigaparsec"
|
|
|
|
"git.codemonkeysoftware.net/b/gigaparsec/cursor"
|
|
|
|
)
|
|
|
|
|
2024-09-10 22:52:08 +00:00
|
|
|
// RuneReader is an io.RuneReader backed by a Cursor, for compatibility
|
|
|
|
// with the regexp package.
|
|
|
|
type RuneReader struct {
|
|
|
|
cursor cursor.Cursor[byte]
|
2024-09-11 20:43:33 +00:00
|
|
|
start uint64
|
2024-09-11 20:23:08 +00:00
|
|
|
err error
|
2024-09-10 22:52:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func NewRuneReader(c cursor.Cursor[byte]) *RuneReader {
|
2024-09-11 20:43:33 +00:00
|
|
|
return &RuneReader{cursor: c, start: c.Pos()}
|
2024-09-10 22:52:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (rr *RuneReader) ReadRune() (r rune, size int, err error) {
|
2024-09-11 20:23:08 +00:00
|
|
|
defer func() {
|
|
|
|
rr.err = err
|
|
|
|
}()
|
2024-09-10 22:52:08 +00:00
|
|
|
var b [4]byte
|
|
|
|
s := b[:]
|
|
|
|
n, next, err := rr.cursor.Read(s)
|
|
|
|
if err != nil && !errors.Is(err, io.EOF) {
|
|
|
|
rr.cursor = next
|
|
|
|
return 0, 0, fmt.Errorf("ReadRune: %w", err)
|
|
|
|
}
|
2024-09-18 02:48:38 +00:00
|
|
|
if n == 0 {
|
|
|
|
return 0, 0, io.EOF
|
|
|
|
}
|
2024-09-10 22:52:08 +00:00
|
|
|
s = s[:n]
|
2024-09-18 02:48:38 +00:00
|
|
|
fmt.Println("read bytes:", s)
|
2024-09-10 22:52:08 +00:00
|
|
|
r, size = utf8.DecodeRune(s)
|
|
|
|
rr.cursor = rr.cursor.At(rr.cursor.Pos() + uint64(size))
|
2024-09-18 02:48:38 +00:00
|
|
|
return r, size, nil
|
2024-09-10 22:52:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (rr *RuneReader) Cursor() cursor.Cursor[byte] {
|
|
|
|
return rr.cursor
|
|
|
|
}
|
|
|
|
|
2024-09-11 20:23:08 +00:00
|
|
|
func (rr *RuneReader) Error() error {
|
|
|
|
return rr.err
|
|
|
|
}
|
|
|
|
|
2024-09-11 20:43:33 +00:00
|
|
|
func (rr *RuneReader) Count() uint64 {
|
|
|
|
return rr.cursor.Pos() - rr.start
|
|
|
|
}
|
|
|
|
|
2024-09-11 20:25:49 +00:00
|
|
|
func Regexp(str string) gigaparsec.Parser[byte, string] {
|
2024-09-11 00:39:54 +00:00
|
|
|
if !strings.HasPrefix(str, "^") && !strings.HasPrefix(str, `\A`) {
|
2024-09-10 22:46:31 +00:00
|
|
|
str = "^" + str
|
|
|
|
}
|
|
|
|
re := regexp.MustCompile(str)
|
|
|
|
expected := fmt.Sprintf("match `%s`", str)
|
2024-09-11 20:25:49 +00:00
|
|
|
return func(input gigaparsec.State[byte]) (gigaparsec.Result[byte, string], error) {
|
2024-09-10 22:52:08 +00:00
|
|
|
r := NewRuneReader(input.Cursor())
|
2024-09-10 22:46:31 +00:00
|
|
|
idx := re.FindReaderIndex(r)
|
2024-09-11 20:23:08 +00:00
|
|
|
err := r.Error()
|
|
|
|
if err != nil && !errors.Is(err, io.EOF) {
|
2024-09-11 20:25:49 +00:00
|
|
|
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: reader error: %w", err)
|
2024-09-11 20:23:08 +00:00
|
|
|
}
|
2024-09-10 22:46:31 +00:00
|
|
|
if idx == nil {
|
2024-09-11 20:43:33 +00:00
|
|
|
got := make([]byte, r.Count())
|
|
|
|
_, _, err = input.Cursor().Read(got)
|
|
|
|
if err != nil {
|
|
|
|
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err)
|
|
|
|
}
|
2024-09-11 21:27:10 +00:00
|
|
|
return gigaparsec.Fail[byte, string](false, gigaparsec.MakeMessage(input.Pos(), string(got), expected)), nil
|
2024-09-10 22:46:31 +00:00
|
|
|
}
|
|
|
|
// Alas, this is a little wasteful because a Regexp can only return indices
|
|
|
|
// when searching a RuneReader.
|
|
|
|
dst := make([]byte, idx[1]-idx[0])
|
|
|
|
n, _, err := input.Cursor().Read(dst)
|
|
|
|
if err != nil {
|
2024-09-11 20:25:49 +00:00
|
|
|
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err)
|
2024-09-10 22:46:31 +00:00
|
|
|
}
|
2024-09-11 16:25:45 +00:00
|
|
|
next := input.At(input.Pos() + n)
|
2024-09-11 20:25:49 +00:00
|
|
|
return gigaparsec.Succeed(true, string(dst), next, gigaparsec.MessageOK(input.Pos())), nil
|
2024-09-10 22:46:31 +00:00
|
|
|
}
|
|
|
|
}
|