package bytes import ( "fmt" "regexp" "strings" "git.codemonkeysoftware.net/b/gigaparsec" "git.codemonkeysoftware.net/b/gigaparsec/cursor" ) func Regexp(str string) gigaparsec.Parser[byte, []byte] { if !strings.HasPrefix(str, "^") { str = "^" + str } re := regexp.MustCompile(str) expected := fmt.Sprintf("match `%s`", str) return func(input gigaparsec.State[byte]) (consumed bool, result gigaparsec.Result[byte, []byte], err error) { r := cursor.NewRuneReader(input.Cursor()) idx := re.FindReaderIndex(r) // TODO Check error from r; this requires an Error() method on cursor.RuneReader. if idx == nil { return false, gigaparsec.Result[byte, []byte]{}, gigaparsec.ParseError{ Pos: input.Pos(), Expected: []string{expected}, // TODO Not having a Got is unsatisfactory, but how do I extract useful information? // Maybe just read a fixed number of bytes or to the end, whichever comes first? // I could add extra methods to cursor.RuneReader to figure out how much it had read. } } // Alas, this is a little wasteful because a Regexp can only return indices // when searching a RuneReader. dst := make([]byte, idx[1]-idx[0]) n, _, err := input.Cursor().Read(dst) if err != nil { // If we can't access those same bytes again, something is wrong. return false, gigaparsec.Result[byte, []byte]{}, fmt.Errorf("Regex: unexpected error: %w", err) } result = gigaparsec.Result[byte, []byte]{ State: input.At(input.Pos() + n), Value: dst, Message: gigaparsec.MessageOK(input.Pos()), } return true, result, nil } }