From 96d178efefbe2357725d5b1fcb9649f4ec56487e Mon Sep 17 00:00:00 2001 From: Brandon Dyck Date: Tue, 10 Sep 2024 16:46:31 -0600 Subject: [PATCH] Implement most of Regexp parser --- bytes/bytes.go | 46 ++++++++++++++++++++++++++++++++++++++++++++++ gigaparsec.go | 4 ++++ parser_test.go | 4 ++++ 3 files changed, 54 insertions(+) create mode 100644 bytes/bytes.go diff --git a/bytes/bytes.go b/bytes/bytes.go new file mode 100644 index 0000000..e810345 --- /dev/null +++ b/bytes/bytes.go @@ -0,0 +1,46 @@ +package bytes + +import ( + "fmt" + "regexp" + "strings" + + "git.codemonkeysoftware.net/b/gigaparsec" + "git.codemonkeysoftware.net/b/gigaparsec/cursor" +) + +func Regexp(str string) gigaparsec.Parser[byte, []byte] { + if !strings.HasPrefix(str, "^") { + str = "^" + str + } + re := regexp.MustCompile(str) + expected := fmt.Sprintf("match `%s`", str) + return func(input gigaparsec.State[byte]) (consumed bool, result gigaparsec.Result[byte, []byte], err error) { + r := cursor.NewRuneReader(input.Cursor()) + idx := re.FindReaderIndex(r) + // TODO Check error from r; this requires an Error() method on cursor.RuneReader. + if idx == nil { + return false, gigaparsec.Result[byte, []byte]{}, gigaparsec.ParseError{ + Pos: input.Pos(), + Expected: []string{expected}, + // TODO Not having a Got is unsatisfactory, but how do I extract useful information? + // Maybe just read a fixed number of bytes or to the end, whichever comes first? + // I could add extra methods to cursor.RuneReader to figure out how much it had read. + } + } + // Alas, this is a little wasteful because a Regexp can only return indices + // when searching a RuneReader. + dst := make([]byte, idx[1]-idx[0]) + n, _, err := input.Cursor().Read(dst) + if err != nil { + // If we can't access those same bytes again, something is wrong. + return false, gigaparsec.Result[byte, []byte]{}, fmt.Errorf("Regex: unexpected error: %w", err) + } + result = gigaparsec.Result[byte, []byte]{ + State: input.At(input.Pos() + n), + Value: dst, + Message: gigaparsec.MessageOK(input.Pos()), + } + return true, result, nil + } +} diff --git a/gigaparsec.go b/gigaparsec.go index f13e9bd..1d2eaf8 100644 --- a/gigaparsec.go +++ b/gigaparsec.go @@ -57,6 +57,10 @@ func (s State[In]) Pos() uint64 { return s.cursor.Pos() } +func (s State[In]) At(pos uint64) State[In] { + return State[In]{cursor: s.cursor.At(pos)} +} + type Parser[In, Out any] func(State[In]) (consumed bool, reply Result[In, Out], err error) func Return[In, Out any](value Out) Parser[In, Out] { diff --git a/parser_test.go b/parser_test.go index a56ede5..d1de26a 100644 --- a/parser_test.go +++ b/parser_test.go @@ -62,3 +62,7 @@ func TestSlice(t *testing.T) { test.EqOp(t, uint64(len(s)), result.State.Pos()) })) } + +func TestChoose(t *testing.T) { + Todo(t) +}