gigaparsec/bytes/regexp.go

47 lines
1.6 KiB
Go

package bytes
import (
"fmt"
"regexp"
"strings"
"git.codemonkeysoftware.net/b/gigaparsec"
"git.codemonkeysoftware.net/b/gigaparsec/cursor"
)
func Regexp(str string) gigaparsec.Parser[byte, []byte] {
if !strings.HasPrefix(str, "^") {
str = "^" + str
}
re := regexp.MustCompile(str)
expected := fmt.Sprintf("match `%s`", str)
return func(input gigaparsec.State[byte]) (consumed bool, result gigaparsec.Result[byte, []byte], err error) {
r := cursor.NewRuneReader(input.Cursor())
idx := re.FindReaderIndex(r)
// TODO Check error from r; this requires an Error() method on cursor.RuneReader.
if idx == nil {
return false, gigaparsec.Result[byte, []byte]{}, gigaparsec.ParseError{
Pos: input.Pos(),
Expected: []string{expected},
// TODO Not having a Got is unsatisfactory, but how do I extract useful information?
// Maybe just read a fixed number of bytes or to the end, whichever comes first?
// I could add extra methods to cursor.RuneReader to figure out how much it had read.
}
}
// Alas, this is a little wasteful because a Regexp can only return indices
// when searching a RuneReader.
dst := make([]byte, idx[1]-idx[0])
n, _, err := input.Cursor().Read(dst)
if err != nil {
// If we can't access those same bytes again, something is wrong.
return false, gigaparsec.Result[byte, []byte]{}, fmt.Errorf("Regex: unexpected error: %w", err)
}
result = gigaparsec.Result[byte, []byte]{
State: input.At(input.Pos() + n),
Value: dst,
Message: gigaparsec.MessageOK(input.Pos()),
}
return true, result, nil
}
}