Compare commits

..

29 Commits

Author SHA1 Message Date
e60f6ae015 Added Bracket, Where, and Token (without tests) 2025-04-11 19:03:45 -06:00
035fa7da14 Added Lazy combinator 2025-04-02 15:50:12 -06:00
82ed6b5546 Regexp: don't return error when match fails at EOF 2025-04-02 14:24:21 -06:00
4e157f7a0e Regexp: succeed on empty match at end of input 2025-04-02 14:06:12 -06:00
1892a97070 Add some TODOs 2025-04-02 13:27:16 -06:00
981edc92f7 Add Seq benchmarks 2024-11-29 23:38:38 -07:00
74237e2593 Merge branch 'master' of git.codemonkeysoftware.net:b/gigaparsec 2024-11-29 11:00:38 -07:00
17c468a3f7 Add naïve Seq series 2024-11-29 11:00:34 -07:00
ac739c0f3b Update todo list 2024-11-29 08:04:50 +00:00
b3a6bfc02e Don't be so negative, man 2024-11-29 00:55:59 -07:00
c85d0d280e Move naïve Bind and Seq into a separate package 2024-11-29 00:53:11 -07:00
fa6c15566d Fix formatting in inspiration.md 2024-11-28 22:11:05 +00:00
13d83e70ad Add missing SPDX headers 2024-10-17 16:02:29 -06:00
56536b04f6 Turn up Bind/Seq count and write a simple Bind benchmark 2024-10-17 16:01:27 -06:00
bc2f7aa911 Added Try tests 2024-10-17 08:57:45 -06:00
abef123f8a Removed unused Todo function 2024-10-16 13:52:13 -06:00
f3a37f5fb6 Test Regexp failure message 2024-10-16 13:50:14 -06:00
c22246b7de Added some Regexp tests 2024-10-16 13:32:25 -06:00
9c5e8fff0e Added Repeat tests 2024-10-16 11:58:12 -06:00
bfc9a9ae58 Test Repeat success and next state 2024-10-16 09:06:25 -06:00
59903ba151 Check source files for missing SPDX headers 2024-09-30 18:56:06 -06:00
6e572d2748 Document State methods 2024-09-30 15:42:26 -06:00
526e40323d Clarify and fix State's EOF behavior 2024-09-30 15:33:24 -06:00
c29be1a7b6 Re-added SliceReaderAt 2024-09-30 13:02:57 -06:00
5e6eafef64 Test Repeat success/failure 2024-09-27 10:40:18 -06:00
2d6f091e0b Update TODO 2024-09-27 09:43:00 -06:00
776b513c44 Return correct next state from Read 2024-09-27 09:39:20 -06:00
e6debbd7dc Improve MatchSlice test output 2024-09-27 09:37:35 -06:00
82ade62274 Combine Cursor with State 2024-09-27 09:29:27 -06:00
18 changed files with 1798 additions and 335 deletions

View File

@ -1,8 +1,8 @@
# Gigaparsec # Gigaparsec
[![Go Reference](https://pkg.go.dev/badge/git.codemonkeysoftware.net/b/gigaparsec.svg)](https://pkg.go.dev/git.codemonkeysoftware.net/b/gigaparsec) [![Go Reference](https://pkg.go.dev/badge/git.codemonkeysoftware.net/b/gigaparsec.svg)](https://pkg.go.dev/git.codemonkeysoftware.net/b/gigaparsec)
![Total garbage.](https://img.shields.io/badge/Total-garbage-red)
by Brandon Dyck <[brandon@dyck.us](mailto:brandon@dyck.us)>
Monadic parser combinators in Go Monadic parser combinators in Go
**I don't recommend using this yet. It is very unfinished and it will break.**

View File

@ -1,8 +1,9 @@
Write Repeat tests
Clean up cursor tests
Combine Cursor with State
Think about not requiring so much Pos() when making messages Think about not requiring so much Pos() when making messages
Think about changing "consume" to "commit"
Rename Seq2 to Seq Rename Seq2 to Seq
Document Seq Document Seq
Should MakeState be private now that there's Run? Should MakeState be private now that there's Run?
What's Megaparsec got that we ain't got? What's Megaparsec got that we ain't got?
Add and benchmark naïve Seq
chainl
whitespace handling

418
bind.go
View File

@ -278,3 +278,421 @@ func Bind5[In, Out, T, T2, T3, T4, T5 any](
return Succeed(anyConsumed, val6, next, MessageOK(s.Pos())), nil return Succeed(anyConsumed, val6, next, MessageOK(s.Pos())), nil
} }
} }
// Bind6 is equivalent to 6 nested calls to Bind.
func Bind6[In, Out, T, T2, T3, T4, T5, T6 any](
p Parser[In, T],
f func(T) Parser[In, T2],
f2 func(T2) Parser[In, T3],
f3 func(T3) Parser[In, T4],
f4 func(T4) Parser[In, T5],
f5 func(T5) Parser[In, T6],
f6 func(T6) Parser[In, Out],
) Parser[In, Out] {
return func(s State[In]) (Result[In, Out], error) {
var anyConsumed bool
var next = s
r, err := p(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r.Consumed()
success, val, next := r.Status()
if !success {
return Fail[In, Out](anyConsumed, r.Message()), nil
}
r2, err := f(val)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r2.Consumed()
success, val2, next := r2.Status()
if !success {
return Fail[In, Out](anyConsumed, r2.Message()), nil
}
r3, err := f2(val2)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r3.Consumed()
success, val3, next := r3.Status()
if !success {
return Fail[In, Out](anyConsumed, r3.Message()), nil
}
r4, err := f3(val3)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r4.Consumed()
success, val4, next := r4.Status()
if !success {
return Fail[In, Out](anyConsumed, r4.Message()), nil
}
r5, err := f4(val4)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r5.Consumed()
success, val5, next := r5.Status()
if !success {
return Fail[In, Out](anyConsumed, r5.Message()), nil
}
r6, err := f5(val5)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r6.Consumed()
success, val6, next := r6.Status()
if !success {
return Fail[In, Out](anyConsumed, r6.Message()), nil
}
r7, err := f6(val6)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r7.Consumed()
success, val7, next := r7.Status()
if !success {
return Fail[In, Out](anyConsumed, r7.Message()), nil
}
return Succeed(anyConsumed, val7, next, MessageOK(s.Pos())), nil
}
}
// Bind7 is equivalent to 7 nested calls to Bind.
func Bind7[In, Out, T, T2, T3, T4, T5, T6, T7 any](
p Parser[In, T],
f func(T) Parser[In, T2],
f2 func(T2) Parser[In, T3],
f3 func(T3) Parser[In, T4],
f4 func(T4) Parser[In, T5],
f5 func(T5) Parser[In, T6],
f6 func(T6) Parser[In, T7],
f7 func(T7) Parser[In, Out],
) Parser[In, Out] {
return func(s State[In]) (Result[In, Out], error) {
var anyConsumed bool
var next = s
r, err := p(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r.Consumed()
success, val, next := r.Status()
if !success {
return Fail[In, Out](anyConsumed, r.Message()), nil
}
r2, err := f(val)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r2.Consumed()
success, val2, next := r2.Status()
if !success {
return Fail[In, Out](anyConsumed, r2.Message()), nil
}
r3, err := f2(val2)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r3.Consumed()
success, val3, next := r3.Status()
if !success {
return Fail[In, Out](anyConsumed, r3.Message()), nil
}
r4, err := f3(val3)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r4.Consumed()
success, val4, next := r4.Status()
if !success {
return Fail[In, Out](anyConsumed, r4.Message()), nil
}
r5, err := f4(val4)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r5.Consumed()
success, val5, next := r5.Status()
if !success {
return Fail[In, Out](anyConsumed, r5.Message()), nil
}
r6, err := f5(val5)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r6.Consumed()
success, val6, next := r6.Status()
if !success {
return Fail[In, Out](anyConsumed, r6.Message()), nil
}
r7, err := f6(val6)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r7.Consumed()
success, val7, next := r7.Status()
if !success {
return Fail[In, Out](anyConsumed, r7.Message()), nil
}
r8, err := f7(val7)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r8.Consumed()
success, val8, next := r8.Status()
if !success {
return Fail[In, Out](anyConsumed, r8.Message()), nil
}
return Succeed(anyConsumed, val8, next, MessageOK(s.Pos())), nil
}
}
// Bind8 is equivalent to 8 nested calls to Bind.
func Bind8[In, Out, T, T2, T3, T4, T5, T6, T7, T8 any](
p Parser[In, T],
f func(T) Parser[In, T2],
f2 func(T2) Parser[In, T3],
f3 func(T3) Parser[In, T4],
f4 func(T4) Parser[In, T5],
f5 func(T5) Parser[In, T6],
f6 func(T6) Parser[In, T7],
f7 func(T7) Parser[In, T8],
f8 func(T8) Parser[In, Out],
) Parser[In, Out] {
return func(s State[In]) (Result[In, Out], error) {
var anyConsumed bool
var next = s
r, err := p(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r.Consumed()
success, val, next := r.Status()
if !success {
return Fail[In, Out](anyConsumed, r.Message()), nil
}
r2, err := f(val)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r2.Consumed()
success, val2, next := r2.Status()
if !success {
return Fail[In, Out](anyConsumed, r2.Message()), nil
}
r3, err := f2(val2)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r3.Consumed()
success, val3, next := r3.Status()
if !success {
return Fail[In, Out](anyConsumed, r3.Message()), nil
}
r4, err := f3(val3)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r4.Consumed()
success, val4, next := r4.Status()
if !success {
return Fail[In, Out](anyConsumed, r4.Message()), nil
}
r5, err := f4(val4)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r5.Consumed()
success, val5, next := r5.Status()
if !success {
return Fail[In, Out](anyConsumed, r5.Message()), nil
}
r6, err := f5(val5)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r6.Consumed()
success, val6, next := r6.Status()
if !success {
return Fail[In, Out](anyConsumed, r6.Message()), nil
}
r7, err := f6(val6)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r7.Consumed()
success, val7, next := r7.Status()
if !success {
return Fail[In, Out](anyConsumed, r7.Message()), nil
}
r8, err := f7(val7)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r8.Consumed()
success, val8, next := r8.Status()
if !success {
return Fail[In, Out](anyConsumed, r8.Message()), nil
}
r9, err := f8(val8)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r9.Consumed()
success, val9, next := r9.Status()
if !success {
return Fail[In, Out](anyConsumed, r9.Message()), nil
}
return Succeed(anyConsumed, val9, next, MessageOK(s.Pos())), nil
}
}
// Bind9 is equivalent to 9 nested calls to Bind.
func Bind9[In, Out, T, T2, T3, T4, T5, T6, T7, T8, T9 any](
p Parser[In, T],
f func(T) Parser[In, T2],
f2 func(T2) Parser[In, T3],
f3 func(T3) Parser[In, T4],
f4 func(T4) Parser[In, T5],
f5 func(T5) Parser[In, T6],
f6 func(T6) Parser[In, T7],
f7 func(T7) Parser[In, T8],
f8 func(T8) Parser[In, T9],
f9 func(T9) Parser[In, Out],
) Parser[In, Out] {
return func(s State[In]) (Result[In, Out], error) {
var anyConsumed bool
var next = s
r, err := p(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r.Consumed()
success, val, next := r.Status()
if !success {
return Fail[In, Out](anyConsumed, r.Message()), nil
}
r2, err := f(val)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r2.Consumed()
success, val2, next := r2.Status()
if !success {
return Fail[In, Out](anyConsumed, r2.Message()), nil
}
r3, err := f2(val2)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r3.Consumed()
success, val3, next := r3.Status()
if !success {
return Fail[In, Out](anyConsumed, r3.Message()), nil
}
r4, err := f3(val3)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r4.Consumed()
success, val4, next := r4.Status()
if !success {
return Fail[In, Out](anyConsumed, r4.Message()), nil
}
r5, err := f4(val4)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r5.Consumed()
success, val5, next := r5.Status()
if !success {
return Fail[In, Out](anyConsumed, r5.Message()), nil
}
r6, err := f5(val5)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r6.Consumed()
success, val6, next := r6.Status()
if !success {
return Fail[In, Out](anyConsumed, r6.Message()), nil
}
r7, err := f6(val6)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r7.Consumed()
success, val7, next := r7.Status()
if !success {
return Fail[In, Out](anyConsumed, r7.Message()), nil
}
r8, err := f7(val7)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r8.Consumed()
success, val8, next := r8.Status()
if !success {
return Fail[In, Out](anyConsumed, r8.Message()), nil
}
r9, err := f8(val8)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r9.Consumed()
success, val9, next := r9.Status()
if !success {
return Fail[In, Out](anyConsumed, r9.Message()), nil
}
r10, err := f9(val9)(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r10.Consumed()
success, val10, next := r10.Status()
if !success {
return Fail[In, Out](anyConsumed, r10.Message()), nil
}
return Succeed(anyConsumed, val10, next, MessageOK(s.Pos())), nil
}
}

16
bytes/bytes.go Normal file
View File

@ -0,0 +1,16 @@
package bytes
import (
"git.codemonkeysoftware.net/b/gigaparsec"
)
func Token[Out, WSOut any](whitespace gigaparsec.Parser[byte, WSOut]) func(p gigaparsec.Parser[byte, Out]) gigaparsec.Parser[byte, Out] {
mappedWS := gigaparsec.Map(whitespace, func(WSOut) struct{} { return struct{}{} })
var ignoreWS gigaparsec.Parser[byte, struct{}] = func(s gigaparsec.State[byte]) (gigaparsec.Result[byte, struct{}], error) {
result, err := mappedWS(s)
return result.Consume(false), err
}
return func(p gigaparsec.Parser[byte, Out]) gigaparsec.Parser[byte, Out] {
return gigaparsec.Seq2(p, gigaparsec.Repeat(0, ignoreWS), func(val Out, _ []struct{}) Out { return val })
}
}

View File

@ -11,19 +11,18 @@ import (
"unicode/utf8" "unicode/utf8"
"git.codemonkeysoftware.net/b/gigaparsec" "git.codemonkeysoftware.net/b/gigaparsec"
"git.codemonkeysoftware.net/b/gigaparsec/cursor"
) )
// RuneReader is an io.RuneReader backed by a Cursor, for compatibility // RuneReader is an io.RuneReader backed by a Cursor, for compatibility
// with the regexp package. // with the regexp package.
type RuneReader struct { type RuneReader struct {
cursor cursor.Cursor[byte] state gigaparsec.State[byte]
start uint64 start uint64
err error err error
} }
func NewRuneReader(c cursor.Cursor[byte]) *RuneReader { func NewRuneReader(state gigaparsec.State[byte]) *RuneReader {
return &RuneReader{cursor: c, start: c.Pos()} return &RuneReader{state: state, start: state.Pos()}
} }
func (rr *RuneReader) ReadRune() (r rune, size int, err error) { func (rr *RuneReader) ReadRune() (r rune, size int, err error) {
@ -32,9 +31,9 @@ func (rr *RuneReader) ReadRune() (r rune, size int, err error) {
}() }()
var b [4]byte var b [4]byte
s := b[:] s := b[:]
n, next, err := rr.cursor.Read(s) n, next, err := rr.state.Read(s)
if err != nil && !errors.Is(err, io.EOF) { if err != nil && !errors.Is(err, io.EOF) {
rr.cursor = next rr.state = next
return 0, 0, fmt.Errorf("ReadRune: %w", err) return 0, 0, fmt.Errorf("ReadRune: %w", err)
} }
if n == 0 { if n == 0 {
@ -42,12 +41,12 @@ func (rr *RuneReader) ReadRune() (r rune, size int, err error) {
} }
s = s[:n] s = s[:n]
r, size = utf8.DecodeRune(s) r, size = utf8.DecodeRune(s)
rr.cursor = rr.cursor.At(rr.cursor.Pos() + uint64(size)) rr.state = rr.state.At(rr.state.Pos() + uint64(size))
return r, size, nil return r, size, nil
} }
func (rr *RuneReader) Cursor() cursor.Cursor[byte] { func (rr *RuneReader) State() gigaparsec.State[byte] {
return rr.cursor return rr.state
} }
func (rr *RuneReader) Error() error { func (rr *RuneReader) Error() error {
@ -55,7 +54,7 @@ func (rr *RuneReader) Error() error {
} }
func (rr *RuneReader) Count() uint64 { func (rr *RuneReader) Count() uint64 {
return rr.cursor.Pos() - rr.start return rr.state.Pos() - rr.start
} }
func Regexp(pattern string) gigaparsec.Parser[byte, string] { func Regexp(pattern string) gigaparsec.Parser[byte, string] {
@ -63,15 +62,18 @@ func Regexp(pattern string) gigaparsec.Parser[byte, string] {
re := regexp.MustCompile(pattern) re := regexp.MustCompile(pattern)
expected := fmt.Sprintf("match `%s`", pattern) expected := fmt.Sprintf("match `%s`", pattern)
return func(input gigaparsec.State[byte]) (gigaparsec.Result[byte, string], error) { return func(input gigaparsec.State[byte]) (gigaparsec.Result[byte, string], error) {
r := NewRuneReader(input.Cursor()) r := NewRuneReader(input)
idx := re.FindReaderIndex(r) idx := re.FindReaderIndex(r)
err := r.Error() err := r.Error()
if err != nil && !errors.Is(err, io.EOF) { if err != nil && !errors.Is(err, io.EOF) {
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: reader error: %w", err) return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: reader error: %w", err)
} }
if idx == nil { if idx == nil {
if err == io.EOF {
return gigaparsec.Fail[byte, string](false, gigaparsec.MessageEnd(input.Pos())), nil
}
got := make([]byte, r.Count()) got := make([]byte, r.Count())
_, _, err = input.Cursor().Read(got) _, _, err = input.Read(got)
if err != nil { if err != nil {
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err) return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err)
} }
@ -80,8 +82,8 @@ func Regexp(pattern string) gigaparsec.Parser[byte, string] {
// Alas, this is a little wasteful because a Regexp can only return indices // Alas, this is a little wasteful because a Regexp can only return indices
// when searching a RuneReader. // when searching a RuneReader.
dst := make([]byte, idx[1]-idx[0]) dst := make([]byte, idx[1]-idx[0])
n, _, err := input.Cursor().Read(dst) n, _, err := input.Read(dst)
if err != nil { if err != nil && (!errors.Is(err, io.EOF) || n < uint64(len(dst))) {
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err) return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err)
} }
next := input.At(input.Pos() + n) next := input.At(input.Pos() + n)

View File

@ -4,12 +4,12 @@ package bytes_test
import ( import (
"bytes" "bytes"
"errors"
"strings" "strings"
"testing" "testing"
"git.codemonkeysoftware.net/b/gigaparsec" "git.codemonkeysoftware.net/b/gigaparsec"
pbytes "git.codemonkeysoftware.net/b/gigaparsec/bytes" pbytes "git.codemonkeysoftware.net/b/gigaparsec/bytes"
"git.codemonkeysoftware.net/b/gigaparsec/cursor"
ptest "git.codemonkeysoftware.net/b/gigaparsec/test" ptest "git.codemonkeysoftware.net/b/gigaparsec/test"
pgen "git.codemonkeysoftware.net/b/gigaparsec/test/generator" pgen "git.codemonkeysoftware.net/b/gigaparsec/test/generator"
"github.com/shoenig/test" "github.com/shoenig/test"
@ -17,30 +17,70 @@ import (
"pgregory.net/rapid" "pgregory.net/rapid"
) )
func Todo(t *testing.T) {
t.Fatalf("TODO")
}
func TestRegexp(t *testing.T) { func TestRegexp(t *testing.T) {
t.Run("only searches the beginning of input", Todo) alpha := rapid.SliceOfBytesMatching(`[A-Za-z]{1,100}`)
t.Run("position is correct after match", Todo) t.Run("position and value are correct after match", rapid.MakeCheck(func(t *rapid.T) {
t.Run("fails on unexpected error", Todo) needle := alpha.Draw(t, "needle")
t.Run("returns a useful Got value", rapid.MakeCheck(func(t *rapid.T) { input := rapid.Map(alpha, func(suffix []byte) []byte { return append(needle, suffix...) }).
Draw(t, "input")
p := pbytes.Regexp(string(needle))
result, err := p(gigaparsec.MakeState(bytes.NewReader(input)))
succeeded, val, next := result.Status()
}))
t.Run("basically works", func(t *testing.T) {
result, err := pbytes.Regexp("a")(gigaparsec.MakeState(cursor.NewReaderAt(strings.NewReader("a"))))
must.NoError(t, err) must.NoError(t, err)
success, value, _ := result.Status() test.True(t, succeeded)
test.True(t, success, test.Sprint(result.Message())) test.EqOp(t, string(needle), val)
test.EqOp(t, "a", value) ptest.StateIsAt(t, next, uint64(len(needle)))
test.True(t, result.Consumed()) }))
t.Run("only searches the beginning of input", rapid.MakeCheck(func(t *rapid.T) {
needle := alpha.Draw(t, "needle")
input := rapid.Map(alpha, func(prefix []byte) []byte { return append(prefix, needle...) }).
Filter(func(b []byte) bool { return !bytes.HasPrefix(b, needle) }).
Draw(t, "input")
p := pbytes.Regexp(string(needle))
result, err := p(gigaparsec.MakeState(bytes.NewReader(input)))
succeeded, _, _ := result.Status()
must.NoError(t, err)
test.False(t, succeeded)
}))
t.Run("fails on unexpected error", func(t *testing.T) {
expectedErr := errors.New("it broke")
p := pbytes.Regexp("nope")
result, err := p(gigaparsec.MakeState(ptest.ErrReaderAt(expectedErr)))
succeeded, _, _ := result.Status()
test.ErrorIs(t, err, expectedErr)
test.False(t, succeeded)
})
t.Run("returns a useful Got value", func(t *testing.T) {
p := pbytes.Regexp("hello")
result, err := p(gigaparsec.MakeState(strings.NewReader("hellaparsec")))
must.NoError(t, err)
test.StrContains(t, result.Message().Got(), "hella")
})
t.Run("succeeds on empty matches", func(t *testing.T) {
p := pbytes.Regexp(".*")
result, err := p(gigaparsec.MakeState(strings.NewReader("")))
succeeded, value, _ := result.Status()
must.NoError(t, err)
must.True(t, succeeded)
must.EqOp(t, "", value)
})
t.Run("fails without an error at EOF", func(t *testing.T) {
p := pbytes.Regexp("a")
result, err := p(gigaparsec.MakeState(strings.NewReader("")))
succeeded, _, _ := result.Status()
must.NoError(t, err)
must.False(t, succeeded)
}) })
} }
func TestRuneReader(t *testing.T) { func TestRuneReader(t *testing.T) {
var s = "abcdefghijklmnopqrstuvwxyz" var s = "abcdefghijklmnopqrstuvwxyz"
rr := pbytes.NewRuneReader(cursor.NewReaderAt(strings.NewReader(s))) rr := pbytes.NewRuneReader(gigaparsec.MakeState(strings.NewReader(s)))
for i, b := range s { for i, b := range s {
r, n, err := rr.ReadRune() r, n, err := rr.ReadRune()
test.NoError(t, err) test.NoError(t, err)
@ -56,7 +96,7 @@ func TestMatchString(t *testing.T) {
t.Run("fails on unexpected error", rapid.MakeCheck(func(t *rapid.T) { t.Run("fails on unexpected error", rapid.MakeCheck(func(t *rapid.T) {
s := rapid.StringN(-1, -1, 100).Draw(t, "s") s := rapid.StringN(-1, -1, 100).Draw(t, "s")
readErr := pgen.Error().Draw(t, "readErr") readErr := pgen.Error().Draw(t, "readErr")
result, err := pbytes.MatchString(s)(gigaparsec.MakeState(cursor.NewReaderAt(ptest.ErrReaderAt(readErr)))) result, err := pbytes.MatchString(s)(gigaparsec.MakeState(ptest.ErrReaderAt(readErr)))
test.ErrorIs(t, err, readErr) test.ErrorIs(t, err, readErr)
success, _, _ := result.Status() success, _, _ := result.Status()
test.False(t, success) test.False(t, success)
@ -68,7 +108,7 @@ func TestMatchString(t *testing.T) {
notPrefix := func(b []byte) bool { return !bytes.HasPrefix(input, b) } notPrefix := func(b []byte) bool { return !bytes.HasPrefix(input, b) }
s := string(bgen.Filter(notPrefix).Draw(t, "s")) s := string(bgen.Filter(notPrefix).Draw(t, "s"))
result, err := pbytes.MatchString(s)(gigaparsec.MakeState(cursor.NewReaderAt(bytes.NewReader(input)))) result, err := pbytes.MatchString(s)(gigaparsec.MakeState(bytes.NewReader(input)))
test.NoError(t, err) test.NoError(t, err)
success, _, _ := result.Status() success, _, _ := result.Status()
test.False(t, success) test.False(t, success)
@ -78,7 +118,7 @@ func TestMatchString(t *testing.T) {
input := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "input") input := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "input")
slen := rapid.IntRange(0, len(input)).Draw(t, "slen") slen := rapid.IntRange(0, len(input)).Draw(t, "slen")
s := string(input[:slen]) s := string(input[:slen])
result, err := pbytes.MatchString(s)(gigaparsec.MakeState(cursor.NewReaderAt(bytes.NewReader(input)))) result, err := pbytes.MatchString(s)(gigaparsec.MakeState(bytes.NewReader(input)))
must.NoError(t, err) must.NoError(t, err)
success, value, next := result.Status() success, value, next := result.Status()
must.True(t, success) must.True(t, success)

View File

@ -1,80 +0,0 @@
// SPDX-License-Identifier: Unlicense
package cursor
import (
"errors"
"io"
)
// Cursor reads data from a specific spot in a data source.
type Cursor[Datum any] interface {
// I almost parameterized Cursor by its implementation (i.e. the Curiously
// Recurring Template Pattern), but then each parser would need that parameter.
// That might work well in a language with much stronger type inference, but
// not in Go. The upside would have been that for each implementation Impl,
// Impl.Read could have returned an unboxed Impl, which would have slightly
// simplified testing and maybe slightly reduced allocs.
// Read fill dst with data from this Cursor's position in the underlying
// source. It returns the number of data it read and a new Cursor for
// the position at which the read ended, or an error if the read failed.
// All calls to a given Cursor will return data from the same position.
// If n < len(dst) or if the cursor's position is at the end of the data source,
// Read will return an error explaining why it read fewer bytes than requested.
// If the error was due to the cursor reaching the end of the data source,
// err will be io.EOF.
Read(dst []Datum) (n uint64, next Cursor[Datum], err error)
// Pos returns the Cursor's position within the source.
Pos() uint64
// At returns a new cursor at the position pos.
At(pos uint64) Cursor[Datum]
}
type ReaderAt[T any] interface {
ReadAt(p []T, off int64) (n int, err error)
}
type SliceReaderAt[T any] []T
func (s SliceReaderAt[T]) ReadAt(dst []T, off int64) (n int, err error) {
if off < 0 {
return 0, errors.New("SliceReaderAt.ReadAt: negative offset")
}
if off >= int64(len(s)) {
return 0, io.EOF
}
n = copy(dst, s[off:])
if n < len(dst) {
err = io.EOF
}
return n, err
}
type ReaderAtCursor[T any] struct {
r ReaderAt[T]
pos uint64
}
func NewReaderAt[T any](r ReaderAt[T]) ReaderAtCursor[T] {
return ReaderAtCursor[T]{r: r}
}
func (rac ReaderAtCursor[T]) Read(dst []T) (uint64, Cursor[T], error) {
n, err := rac.r.ReadAt(dst, int64(rac.pos))
if n > 0 {
rac.pos += uint64(n)
}
return uint64(n), rac, err
}
func (rac ReaderAtCursor[T]) Pos() uint64 {
return rac.pos
}
func (rac ReaderAtCursor[T]) At(pos uint64) Cursor[T] {
rac.pos = pos
return rac
}

View File

@ -1,117 +0,0 @@
// SPDX-License-Identifier: Unlicense
package cursor_test
import (
"bytes"
"io"
"testing"
"git.codemonkeysoftware.net/b/gigaparsec/cursor"
ptest "git.codemonkeysoftware.net/b/gigaparsec/test"
pgen "git.codemonkeysoftware.net/b/gigaparsec/test/generator"
"github.com/shoenig/test"
"github.com/shoenig/test/must"
"pgregory.net/rapid"
)
func Todo(t *testing.T) {
t.Errorf("TODO")
}
func testCursor[C cursor.Cursor[byte]](t *testing.T, makeCursor func([]byte) C) {
t.Helper()
t.Run("cursor reads the same position every time", rapid.MakeCheck(func(t *rapid.T) {
data := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "data")
dst := pgen.SliceOfNZero[byte](0, len(data)-1).Draw(t, "dst")
expected := data[:len(dst)]
c := makeCursor(data)
_, next, err := c.Read(dst)
must.NoError(t, err)
must.SliceEqOp(t, expected, dst)
next.Read(dst)
_, _, err = c.Read(dst)
must.NoError(t, err)
must.SliceEqOp(t, expected, dst)
}))
t.Run("Read returns io.EOF iff it overruns source", rapid.MakeCheck(func(t *rapid.T) {
data := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "data")
dst := pgen.SliceOfNZero[byte](0, 200).Draw(t, "dst")
c := makeCursor(data)
n, _, err := c.Read(dst)
t.Logf("n=%d", n)
must.EqOp(t, min(len(data), len(dst)), int(n))
if len(dst) > len(data) || c.Pos() == uint64(len(data)) {
must.ErrorIs(t, err, io.EOF)
} else {
must.NoError(t, err)
}
}))
t.Run("next cursor reads next input", rapid.MakeCheck(func(t *rapid.T) {
const maxLen = 100
data := rapid.SliceOfN(rapid.Byte(), 1, maxLen).Draw(t, "data")
skip := rapid.IntRange(0, len(data)-1).Draw(t, "skip")
c := makeCursor(data)
_, next, err := c.Read(make([]byte, skip))
must.NoError(t, err)
must.EqOp(t, skip, int(next.Pos()))
dst := make([]byte, maxLen)
n, _, _ := next.Read(dst)
must.SliceEqOp(t, data[skip:skip+int(n)], dst[:n])
}))
t.Run("Read returns an error if n is less than requested", rapid.MakeCheck(func(t *rapid.T) {
data := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "data")
c := makeCursor(data)
n, _, err := c.Read(make([]byte, len(data)+1))
test.ErrorIs(t, err, io.EOF)
test.EqOp(t, len(data), int(n))
}))
t.Run("At sets cursor position", rapid.MakeCheck(func(t *rapid.T) {
data := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "data")
pos := rapid.Uint64Range(0, uint64(len(data)-1)).Draw(t, "pos")
c := makeCursor(data).At(pos)
dst := make([]byte, 1)
n, _, err := c.Read(dst)
test.EqOp(t, 1, n)
test.NoError(t, err)
test.EqOp(t, data[pos], dst[0])
}))
t.Run("Pos returns correct position after At", rapid.MakeCheck(func(t *rapid.T) {
var data []byte
pos := rapid.Uint64().Draw(t, "pos")
c := makeCursor(data).At(pos)
test.EqOp(t, pos, c.Pos())
}))
t.Run("Pos returns correct position after Read", rapid.MakeCheck(func(t *rapid.T) {
const maxLen = 100
data := rapid.SliceOfN(rapid.Byte(), 1, maxLen).Draw(t, "data")
skip := rapid.Uint64Range(0, uint64(len(data)-1)).Draw(t, "skip")
c := makeCursor(data)
_, next, err := c.Read(make([]byte, skip))
must.NoError(t, err)
test.EqOp(t, skip, next.Pos())
}))
}
func TestReaderAtCursor(t *testing.T) {
testCursor(t, func(b []byte) cursor.ReaderAtCursor[byte] {
return cursor.NewReaderAt(bytes.NewReader(b))
})
t.Run("Read returns an error if the ReaderAt fails", rapid.MakeCheck(func(t *rapid.T) {
expectedErr := pgen.Error().Draw(t, "expectedErr")
startPos := rapid.Uint64().Draw(t, "startPos")
dst := pgen.SliceOfNZero[byte](0, 100).Draw(t, "dst")
c := cursor.NewReaderAt(ptest.ErrReaderAt(expectedErr)).At(startPos)
n, next, err := c.Read(dst)
test.ErrorIs(t, err, expectedErr)
test.EqOp(t, startPos, next.Pos())
test.Zero(t, n)
}))
}

View File

@ -2,15 +2,14 @@
package gigaparsec package gigaparsec
//go:generate go run ./internal/bindgen -bindpath bind.go -seqpath seq.go -max 5 -pkg gigaparsec //go:generate go run ./internal/bindgen -bindpath bind.go -seqpath seq.go -max 9 -pkg gigaparsec
import ( import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"math"
"slices" "slices"
"strings" "strings"
"git.codemonkeysoftware.net/b/gigaparsec/cursor"
) )
type Result[In, Out any] struct { type Result[In, Out any] struct {
@ -114,29 +113,67 @@ func MessageEnd(pos uint64, expected ...string) Message {
return Message{pos: pos, got: "end of input", expected: expected} return Message{pos: pos, got: "end of input", expected: expected}
} }
func MakeState[In any](c cursor.Cursor[In]) State[In] { type ReaderAt[T any] interface {
return State[In]{cursor: c} ReadAt(p []T, off int64) (n int, err error)
}
type SliceReaderAt[T any] []T
func (s SliceReaderAt[T]) ReadAt(dst []T, off int64) (n int, err error) {
if off < 0 {
return 0, errors.New("SliceReaderAt.ReadAt: negative offset")
}
if off >= int64(len(s)) {
return 0, io.EOF
}
n = copy(dst, s[off:])
if n < len(dst) {
err = io.EOF
}
return n, err
}
func MakeState[In any](r ReaderAt[In]) State[In] {
return State[In]{r: r}
} }
type State[In any] struct { type State[In any] struct {
cursor cursor.Cursor[In] r ReaderAt[In]
} pos uint64
func (s State[In]) Cursor() cursor.Cursor[In] {
return s.cursor
} }
// Read fills dst with data from this State's position in the underlying source.
// It returns the number of data it read and a new State for the position at which
// the read ended, and an error if the read either (1) failed or (2) reached the
// end of the source before filling dst. All reads from a given State will return
// data from the same position the source.
// If the source had too few data left to fill dst, or if the State's position is
// at or past the end of the source, err will be io.EOF.
func (s State[In]) Read(dst []In) (n uint64, next State[In], err error) { func (s State[In]) Read(dst []In) (n uint64, next State[In], err error) {
n, c, err := s.cursor.Read(dst) if s.pos > math.MaxInt64 {
return n, State[In]{cursor: c}, err return 0, s, io.EOF
}
nread, err := s.r.ReadAt(dst, int64(s.pos))
if nread > 0 {
s.pos += uint64(nread)
}
if nread == len(dst) && err == io.EOF {
if nread == 0 {
return 0, s, io.EOF
}
return uint64(nread), s, nil
}
return uint64(nread), s, err
} }
// Pos returns this State's position.
func (s State[In]) Pos() uint64 { func (s State[In]) Pos() uint64 {
return s.cursor.Pos() return s.pos
} }
// At returns a State pointing at pos in the same data source.
func (s State[In]) At(pos uint64) State[In] { func (s State[In]) At(pos uint64) State[In] {
return State[In]{cursor: s.cursor.At(pos)} return State[In]{r: s.r, pos: pos}
} }
type Parser[In, Out any] func(State[In]) (Result[In, Out], error) type Parser[In, Out any] func(State[In]) (Result[In, Out], error)
@ -157,14 +194,25 @@ func (p Parser[In, Out]) Label(label string) Parser[In, Out] {
} }
} }
func (p Parser[In, Out]) Where(pred func(Out) bool) Parser[In, Out] {
return func(s State[In]) (Result[In, Out], error) {
result, err := p(s)
if result.success && !pred(result.value) {
result.success = false
result.message.got = "failed Where predicate"
}
return result, err
}
}
type ParseError Message type ParseError Message
func (pe ParseError) Error() string { func (pe ParseError) Error() string {
return Message(pe).String() return Message(pe).String()
} }
func Run[In, Out any](p Parser[In, Out], c cursor.Cursor[In]) (out Out, err error) { func Run[In, Out any](p Parser[In, Out], r ReaderAt[In]) (out Out, err error) {
start := MakeState(c) start := MakeState(r)
result, err := p(start) result, err := p(start)
if err != nil { if err != nil {
err = fmt.Errorf("Run: %w", err) err = fmt.Errorf("Run: %w", err)
@ -315,7 +363,7 @@ func Map[In, Out1, Out2 any](p Parser[In, Out1], f func(Out1) Out2) Parser[In, O
} }
func end[In any](s State[In]) (Result[In, struct{}], error) { func end[In any](s State[In]) (Result[In, struct{}], error) {
_, _, err := s.cursor.Read([]In{}) _, _, err := s.Read([]In{})
if errors.Is(err, io.EOF) { if errors.Is(err, io.EOF) {
return Succeed(true, struct{}{}, s, MessageOK(s.Pos())), nil return Succeed(true, struct{}{}, s, MessageOK(s.Pos())), nil
} }
@ -342,26 +390,40 @@ func Pipe[In, Ignore, Through any](p Parser[In, Ignore]) func(Through) Parser[In
// It succeeds if and only if p succeeds at least minCount times. // It succeeds if and only if p succeeds at least minCount times.
// It consumes if and only if at least one of the applications of p consumes. // It consumes if and only if at least one of the applications of p consumes.
func Repeat[In, Out any](minCount int, p Parser[In, Out]) Parser[In, []Out] { func Repeat[In, Out any](minCount int, p Parser[In, Out]) Parser[In, []Out] {
return func(s State[In]) (Result[In, []Out], error) { return func(state State[In]) (Result[In, []Out], error) {
var values []Out var values []Out
var consumed bool var consumed bool
next := s currState := state
for { for {
result, err := p(next) result, err := p(currState)
if err != nil { if err != nil {
return Result[In, []Out]{}, fmt.Errorf("AtLeastN: %w", err) return Result[In, []Out]{}, fmt.Errorf("AtLeastN: %w", err)
} }
consumed = consumed || result.Consumed() consumed = consumed || result.Consumed()
var value Out var value Out
var success bool var success bool
success, value, next = result.Status() success, value, nextState := result.Status()
if !success { if !success {
if len(values) >= minCount { if len(values) >= minCount {
return Succeed(consumed, values, next, MessageOK(s.Pos())), nil return Succeed(consumed, values, currState, MessageOK(state.Pos())), nil
} }
return Fail[In, []Out](consumed, result.Message()), nil return Fail[In, []Out](consumed, result.Message()), nil
} }
currState = nextState
values = append(values, value) values = append(values, value)
} }
} }
} }
// Lazy delays creating a parser from p until the parser is called.
// This is useful for preventing recursive function calls in the
// definition of a recursive parser.
func Lazy[In, Out any](p func() Parser[In, Out]) Parser[In, Out] {
return func(s State[In]) (Result[In, Out], error) {
return p()(s)
}
}
func Bracket[In, Out, LOut, ROut any](left Parser[In, LOut], p Parser[In, Out], right Parser[In, ROut]) Parser[In, Out] {
return Seq3(left, p, right, func(_ LOut, val Out, _ ROut) Out { return val })
}

View File

@ -1,19 +1,19 @@
- Hutton & Meijer, _[Monadic Parser Combinators](http://www.cs.nott.ac.uk/~pszgmh/monparsing.pdf)_ - Hutton & Meijer, _[Monadic Parser Combinators](http://www.cs.nott.ac.uk/~pszgmh/monparsing.pdf)_
The original paper on monadic parser combinators. The original paper on monadic parser combinators.
- Leijen & Meijer, _[Parsec: Direct Style Monadic Parser Combinators For The Real World](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/parsec-paper-letter.pdf)_ - Leijen & Meijer, _[Parsec: Direct Style Monadic Parser Combinators For The Real World](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/parsec-paper-letter.pdf)_
Describes Parsec, a monadic parser combinator library that limits backtracking by default and adds better error messages. Describes Parsec, a monadic parser combinator library that limits backtracking by default and adds better error messages.
- [Megaparsec](https://hackage.haskell.org/package/megaparsec) - [Megaparsec](https://hackage.haskell.org/package/megaparsec)
Improved fork of Megaparsec. Improved fork of Megaparsec.
- [FParsec](https://github.com/stephan-tolksdorf/fparsec) - [FParsec](https://github.com/stephan-tolksdorf/fparsec)
Parsec implementation in F#. Parsec implementation in F#.
- [Sprache](https://github.com/sprache/Sprache) - [Sprache](https://github.com/sprache/Sprache)
Parsec implementation in C#. Parsec implementation in C#.

90
internal/check/check.go Normal file
View File

@ -0,0 +1,90 @@
// SPDX-License-Identifier: Unlicense
package main
import (
"bufio"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"regexp"
"strings"
)
var goSpdxHeader = `// SPDX-License-Identifier: Unlicense`
var tmplSpdxHeader = `{{/* SPDX-License-Identifier: Unlicense */`
type MissingSPDXError struct {
Name string
}
func (m MissingSPDXError) Error() string {
return fmt.Sprintf("missing or incorrect SPDX header: %s", m.Name)
}
func checkFileSPDX(header string, name string) error {
f, err := os.Open(name)
if err != nil {
return err
}
defer f.Close()
r := bufio.NewReader(f)
pattern := `(?m:^` + regexp.QuoteMeta(header) + `)`
matched, err := regexp.MatchReader(pattern, r)
if err != nil {
return err
}
if !matched {
return MissingSPDXError{Name: name}
}
return nil
}
func walkSPDX(header string, extension string) error {
var errs []error
filepath.WalkDir(".", func(path string, d fs.DirEntry, err error) error {
if path != "." && strings.HasPrefix(path, ".") {
if d.IsDir() {
return filepath.SkipDir
}
return nil
}
if err != nil {
errs = append(errs, err)
return nil
}
if filepath.Ext(path) != extension {
return nil
}
errs = append(errs, checkFileSPDX(header, path))
return nil
})
return errors.Join(errs...)
}
func checkSPDX() error {
err := errors.Join(
walkSPDX(goSpdxHeader, ".go"),
walkSPDX(tmplSpdxHeader, ".tmpl"),
)
if err != nil {
return fmt.Errorf("Check SPDX Headers:\n%w", err)
}
return nil
}
func run() error {
return checkSPDX()
}
func main() {
err := run()
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}

265
naive/naive.go Normal file
View File

@ -0,0 +1,265 @@
// SPDX-License-Identifier: Unlicense
// Package naive contains naïve implementations of the Bind and Seq combinators.
// The accompanying tests include simple benchmarks comparing their performance.
package naive
import gp "git.codemonkeysoftware.net/b/gigaparsec"
func Bind2[In, Out, T, T2 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(p, f), f2)
}
func Bind3[In, Out, T, T2, T3, T4, T5, T6, T7, T8, T9 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, T3],
f3 func(T3) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(gp.Bind(p, f), f2), f3)
}
func Bind4[In, Out, T, T2, T3, T4 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, T3],
f3 func(T3) gp.Parser[In, T4],
f4 func(T4) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(gp.Bind(gp.Bind(p, f), f2), f3), f4)
}
func Bind5[In, Out, T, T2, T3, T4, T5 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, T3],
f3 func(T3) gp.Parser[In, T4],
f4 func(T4) gp.Parser[In, T5],
f5 func(T5) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(p, f), f2), f3), f4), f5)
}
func Bind6[In, Out, T, T2, T3, T4, T5, T6 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, T3],
f3 func(T3) gp.Parser[In, T4],
f4 func(T4) gp.Parser[In, T5],
f5 func(T5) gp.Parser[In, T6],
f6 func(T6) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(p, f), f2), f3), f4), f5), f6)
}
func Bind7[In, Out, T, T2, T3, T4, T5, T6, T7 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, T3],
f3 func(T3) gp.Parser[In, T4],
f4 func(T4) gp.Parser[In, T5],
f5 func(T5) gp.Parser[In, T6],
f6 func(T6) gp.Parser[In, T7],
f7 func(T7) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(p, f), f2), f3), f4), f5), f6), f7)
}
func Bind8[In, Out, T, T2, T3, T4, T5, T6, T7, T8 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, T3],
f3 func(T3) gp.Parser[In, T4],
f4 func(T4) gp.Parser[In, T5],
f5 func(T5) gp.Parser[In, T6],
f6 func(T6) gp.Parser[In, T7],
f7 func(T7) gp.Parser[In, T8],
f8 func(T8) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(p, f), f2), f3), f4), f5), f6), f7), f8)
}
func Bind9[In, Out, T, T2, T3, T4, T5, T6, T7, T8, T9 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, T3],
f3 func(T3) gp.Parser[In, T4],
f4 func(T4) gp.Parser[In, T5],
f5 func(T5) gp.Parser[In, T6],
f6 func(T6) gp.Parser[In, T7],
f7 func(T7) gp.Parser[In, T8],
f8 func(T8) gp.Parser[In, T9],
f9 func(T9) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(p, f), f2), f3), f4), f5), f6), f7), f8), f9)
}
func Seq2[In, Out, T, T2 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
f func(T, T2) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2))
})
})
}
func Seq3[In, Out, T, T2, T3 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
p3 gp.Parser[In, T3],
f func(T, T2, T3) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Bind(p3, func(x3 T3) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2, x3))
})
})
})
}
func Seq4[In, Out, T, T2, T3, T4 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
p3 gp.Parser[In, T3],
p4 gp.Parser[In, T4],
f func(T, T2, T3, T4) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Bind(p3, func(x3 T3) gp.Parser[In, Out] {
return gp.Bind(p4, func(x4 T4) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2, x3, x4))
})
})
})
})
}
func Seq5[In, Out, T, T2, T3, T4, T5 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
p3 gp.Parser[In, T3],
p4 gp.Parser[In, T4],
p5 gp.Parser[In, T5],
f func(T, T2, T3, T4, T5) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Bind(p3, func(x3 T3) gp.Parser[In, Out] {
return gp.Bind(p4, func(x4 T4) gp.Parser[In, Out] {
return gp.Bind(p5, func(x5 T5) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2, x3, x4, x5))
})
})
})
})
})
}
func Seq6[In, Out, T, T2, T3, T4, T5, T6 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
p3 gp.Parser[In, T3],
p4 gp.Parser[In, T4],
p5 gp.Parser[In, T5],
p6 gp.Parser[In, T6],
f func(T, T2, T3, T4, T5, T6) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Bind(p3, func(x3 T3) gp.Parser[In, Out] {
return gp.Bind(p4, func(x4 T4) gp.Parser[In, Out] {
return gp.Bind(p5, func(x5 T5) gp.Parser[In, Out] {
return gp.Bind(p6, func(x6 T6) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2, x3, x4, x5, x6))
})
})
})
})
})
})
}
func Seq7[In, Out, T, T2, T3, T4, T5, T6, T7 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
p3 gp.Parser[In, T3],
p4 gp.Parser[In, T4],
p5 gp.Parser[In, T5],
p6 gp.Parser[In, T6],
p7 gp.Parser[In, T7],
f func(T, T2, T3, T4, T5, T6, T7) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Bind(p3, func(x3 T3) gp.Parser[In, Out] {
return gp.Bind(p4, func(x4 T4) gp.Parser[In, Out] {
return gp.Bind(p5, func(x5 T5) gp.Parser[In, Out] {
return gp.Bind(p6, func(x6 T6) gp.Parser[In, Out] {
return gp.Bind(p7, func(x7 T7) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2, x3, x4, x5, x6, x7))
})
})
})
})
})
})
})
}
func Seq8[In, Out, T, T2, T3, T4, T5, T6, T7, T8 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
p3 gp.Parser[In, T3],
p4 gp.Parser[In, T4],
p5 gp.Parser[In, T5],
p6 gp.Parser[In, T6],
p7 gp.Parser[In, T7],
p8 gp.Parser[In, T8],
f func(T, T2, T3, T4, T5, T6, T7, T8) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Bind(p3, func(x3 T3) gp.Parser[In, Out] {
return gp.Bind(p4, func(x4 T4) gp.Parser[In, Out] {
return gp.Bind(p5, func(x5 T5) gp.Parser[In, Out] {
return gp.Bind(p6, func(x6 T6) gp.Parser[In, Out] {
return gp.Bind(p7, func(x7 T7) gp.Parser[In, Out] {
return gp.Bind(p8, func(x8 T8) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2, x3, x4, x5, x6, x7, x8))
})
})
})
})
})
})
})
})
}
func Seq9[In, Out, T, T2, T3, T4, T5, T6, T7, T8, T9 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
p3 gp.Parser[In, T3],
p4 gp.Parser[In, T4],
p5 gp.Parser[In, T5],
p6 gp.Parser[In, T6],
p7 gp.Parser[In, T7],
p8 gp.Parser[In, T8],
p9 gp.Parser[In, T9],
f func(T, T2, T3, T4, T5, T6, T7, T8, T9) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Bind(p3, func(x3 T3) gp.Parser[In, Out] {
return gp.Bind(p4, func(x4 T4) gp.Parser[In, Out] {
return gp.Bind(p5, func(x5 T5) gp.Parser[In, Out] {
return gp.Bind(p6, func(x6 T6) gp.Parser[In, Out] {
return gp.Bind(p7, func(x7 T7) gp.Parser[In, Out] {
return gp.Bind(p8, func(x8 T8) gp.Parser[In, Out] {
return gp.Bind(p9, func(x9 T9) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2, x3, x4, x5, x6, x7, x8, x9))
})
})
})
})
})
})
})
})
})
}

108
naive/naive_test.go Normal file
View File

@ -0,0 +1,108 @@
// SPDX-License-Identifier: Unlicense
package naive_test
import (
"testing"
"git.codemonkeysoftware.net/b/gigaparsec"
"git.codemonkeysoftware.net/b/gigaparsec/naive"
)
func BenchmarkBind5(b *testing.B) {
type Bind5T = func(p gigaparsec.Parser[byte, byte], f func(byte) gigaparsec.Parser[byte, byte], f2 func(byte) gigaparsec.Parser[byte, byte], f3 func(byte) gigaparsec.Parser[byte, byte], f4 func(byte) gigaparsec.Parser[byte, byte], f5 func(byte) gigaparsec.Parser[byte, byte]) gigaparsec.Parser[byte, byte]
f := func(b byte) gigaparsec.Parser[byte, byte] {
return gigaparsec.Return[byte](b + 1)
}
p := func(bind5 Bind5T) gigaparsec.Parser[byte, byte] {
// gigaparsec.Bind5()
return bind5(gigaparsec.Match(byte(0)), f, f, f, f, f)
}
input := gigaparsec.SliceReaderAt[byte]{0}
b.Run("gigaparsec.Bind5", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(gigaparsec.Bind5), input)
}
})
b.Run("naïve.Bind5", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(naive.Bind5), input)
}
})
}
func BenchmarkBind9(b *testing.B) {
type Bind9T = func(p gigaparsec.Parser[byte, byte], f func(byte) gigaparsec.Parser[byte, byte], f2 func(byte) gigaparsec.Parser[byte, byte], f3 func(byte) gigaparsec.Parser[byte, byte], f4 func(byte) gigaparsec.Parser[byte, byte], f5 func(byte) gigaparsec.Parser[byte, byte], f6 func(byte) gigaparsec.Parser[byte, byte], f7 func(byte) gigaparsec.Parser[byte, byte], f8 func(byte) gigaparsec.Parser[byte, byte], f9 func(byte) gigaparsec.Parser[byte, byte]) gigaparsec.Parser[byte, byte]
f := func(b byte) gigaparsec.Parser[byte, byte] {
return gigaparsec.Return[byte](b + 1)
}
p := func(bind5 Bind9T) gigaparsec.Parser[byte, byte] {
// gigaparsec.Bind5()
return bind5(gigaparsec.Match(byte(0)), f, f, f, f, f, f, f, f, f)
}
input := gigaparsec.SliceReaderAt[byte]{0}
b.Run("gigaparsec.Bind9", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(gigaparsec.Bind9), input)
}
})
b.Run("naive.Bind9", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(naive.Bind9), input)
}
})
}
func BenchmarkSeq5(b *testing.B) {
type P = gigaparsec.Parser[byte, byte]
type Seq5T = func(P, P, P, P, P, func(byte, byte, byte, byte, byte) byte) P
zero := gigaparsec.Return[byte, byte](0)
f := func(a, b, c, d, e byte) byte {
return a + b + c + d + e
}
p := func(seq5 Seq5T) P {
return seq5(zero, zero, zero, zero, zero, f)
}
input := gigaparsec.SliceReaderAt[byte]{0}
b.Run("gigaparsec.Seq5", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(gigaparsec.Seq5), input)
}
})
b.Run("naive.Seq5", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(naive.Seq5), input)
}
})
}
func BenchmarkSeq9(b *testing.B) {
type P = gigaparsec.Parser[byte, byte]
type Seq9T = func(P, P, P, P, P, P, P, P, P, func(byte, byte, byte, byte, byte, byte, byte, byte, byte) byte) P
zero := gigaparsec.Return[byte, byte](0)
f := func(a, b, c, d, e, f, g, h, i byte) byte {
return a + b + c + d + e + f + g + h + i
}
p := func(seq9 Seq9T) P {
return seq9(zero, zero, zero, zero, zero, zero, zero, zero, zero, f)
}
input := gigaparsec.SliceReaderAt[byte]{0}
b.Run("gigaparsec.Seq9", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(gigaparsec.Seq9), input)
}
})
b.Run("naive.Seq9", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(naive.Seq9), input)
}
})
}

View File

@ -4,10 +4,11 @@ package gigaparsec_test
import ( import (
"bytes" "bytes"
"errors"
"fmt"
"testing" "testing"
"git.codemonkeysoftware.net/b/gigaparsec" "git.codemonkeysoftware.net/b/gigaparsec"
"git.codemonkeysoftware.net/b/gigaparsec/cursor"
ptest "git.codemonkeysoftware.net/b/gigaparsec/test" ptest "git.codemonkeysoftware.net/b/gigaparsec/test"
"git.codemonkeysoftware.net/b/gigaparsec/test/generator" "git.codemonkeysoftware.net/b/gigaparsec/test/generator"
"github.com/shoenig/test" "github.com/shoenig/test"
@ -30,7 +31,7 @@ func hasPrefix(prefix []byte) func([]byte) bool {
func TestSlice(t *testing.T) { func TestSlice(t *testing.T) {
assertParseFails := func(t rapid.TB, input []byte, p gigaparsec.Parser[byte, []byte]) { assertParseFails := func(t rapid.TB, input []byte, p gigaparsec.Parser[byte, []byte]) {
t.Helper() t.Helper()
start := gigaparsec.MakeState(cursor.NewReaderAt(bytes.NewReader(input))) start := gigaparsec.MakeState(bytes.NewReader(input))
result, err := p(start) result, err := p(start)
must.NoError(t, err) must.NoError(t, err)
success, _, _ := result.Status() success, _, _ := result.Status()
@ -49,24 +50,24 @@ func TestSlice(t *testing.T) {
})) }))
t.Run("fails at end of input", rapid.MakeCheck(func(t *rapid.T) { t.Run("fails at end of input", rapid.MakeCheck(func(t *rapid.T) {
s := rapid.SliceOfN(rapid.Byte(), 1, -1).Draw(t, "s") s := rapid.SliceOfN(rapid.Byte(), 1, -1).Draw(t, "s")
inputLen := rapid.IntRange(0, len(s)-1).Draw(t, "inputLen") input := rapid.Map(rapid.IntRange(0, len(s)-1),
input := s[:inputLen] func(n int) []byte { return s[:n] }).Draw(t, "inputLen")
assertParseFails(t, input, gigaparsec.MatchSlice(s)) assertParseFails(t, input, gigaparsec.MatchSlice(s))
})) }))
t.Run("fails when read fails", rapid.MakeCheck(func(t *rapid.T) { t.Run("fails when read fails", rapid.MakeCheck(func(t *rapid.T) {
expectedErr := generator.Error().Draw(t, "expectedErr") expectedErr := generator.Error().Draw(t, "expectedErr")
c := ptest.ErrCursor[byte](expectedErr) r := ptest.ErrReaderAt(expectedErr)
s := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "s") s := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "s")
result, err := gigaparsec.MatchSlice(s)(gigaparsec.MakeState(c)) result, err := gigaparsec.MatchSlice(s)(gigaparsec.MakeState(r))
test.ErrorIs(t, err, expectedErr) test.ErrorIs(t, err, expectedErr)
success, _, _ := result.Status() success, _, _ := result.Status()
test.False(t, success) test.False(t, success)
})) }))
t.Run("succeeds when contents match", rapid.MakeCheck(func(t *rapid.T) { t.Run("succeeds when contents match", rapid.MakeCheck(func(t *rapid.T) {
input := rapid.SliceOfN(rapid.Byte(), 1, -1).Draw(t, "input") input := rapid.SliceOfN(rapid.Byte(), 1, -1).Draw(t, "input")
sLen := rapid.IntRange(0, len(input)).Draw(t, "sLen") s := rapid.Map(rapid.IntRange(0, len(input)),
s := input[:sLen] func(n int) []byte { return input[:n] }).Draw(t, "s")
start := gigaparsec.MakeState(cursor.NewReaderAt(bytes.NewReader(input))) start := gigaparsec.MakeState(bytes.NewReader(input))
result, err := gigaparsec.MatchSlice(s)(start) result, err := gigaparsec.MatchSlice(s)(start)
must.NoError(t, err) must.NoError(t, err)
@ -108,7 +109,7 @@ func TestBind(t *testing.T) {
p := makeParser(pConsume) p := makeParser(pConsume)
q := func(struct{}) gigaparsec.Parser[byte, struct{}] { return makeParser(qConsume) } q := func(struct{}) gigaparsec.Parser[byte, struct{}] { return makeParser(qConsume) }
result, err := gigaparsec.Bind(p, q)(gigaparsec.MakeState(cursor.NewReaderAt(bytes.NewReader(nil)))) result, err := gigaparsec.Bind(p, q)(gigaparsec.MakeState(bytes.NewReader(nil)))
must.NoError(t, err) must.NoError(t, err)
must.EqOp(t, pConsume || qConsume, result.Consumed()) must.EqOp(t, pConsume || qConsume, result.Consumed())
})) }))
@ -126,8 +127,32 @@ func TestSatisfy(t *testing.T) {
Todo(t) Todo(t)
} }
func Try(t *testing.T) { func TestTry(t *testing.T) {
Todo(t) type R = ptest.ForcedResult
var cases = []struct{ P, TryP R }{
{P: R{Succeed: false, Consume: false}, TryP: R{Succeed: false, Consume: false}},
{P: R{Succeed: false, Consume: true}, TryP: R{Succeed: false, Consume: false}},
{P: R{Succeed: true, Consume: false}, TryP: R{Succeed: true, Consume: false}},
{P: R{Succeed: true, Consume: true}, TryP: R{Succeed: true, Consume: true}},
}
for _, c := range cases {
t.Run(fmt.Sprintf("%+v", c.P), func(t *testing.T) {
start := gigaparsec.MakeState(gigaparsec.SliceReaderAt[R]{c.P})
result, err := gigaparsec.Try(ptest.ForceResult)(start)
succeeded, _, _ := result.Status()
must.NoError(t, err)
test.EqOp(t, c.TryP.Succeed, succeeded)
test.EqOp(t, c.TryP.Consume, result.Consumed())
})
}
t.Run("fails on error", func(t *testing.T) {
expectedErr := errors.New("it broke")
p := gigaparsec.Try(gigaparsec.Match(byte(0)))
result, err := p(gigaparsec.MakeState(ptest.ErrReaderAt(expectedErr)))
succeeded, _, _ := result.Status()
test.ErrorIs(t, err, expectedErr)
test.False(t, succeeded)
})
} }
func TestLabel(t *testing.T) { func TestLabel(t *testing.T) {
@ -139,10 +164,57 @@ func TestEnd(t *testing.T) {
} }
func TestRepeat(t *testing.T) { func TestRepeat(t *testing.T) {
t.Run("fails when number of successes is less than minCount", Todo) const maxParses = 100
t.Run("succeeds when number of successes is greater than minCount", Todo) t.Run("succeeds iff number of successes minCount", rapid.MakeCheck(func(t *rapid.T) {
t.Run("consumes iff at least one application consumes", Todo) minCount := rapid.IntRange(0, maxParses).Draw(t, "minCount")
t.Run("fails on error", Todo) successes := rapid.IntRange(0, maxParses).Draw(t, "successes")
t.Run("position is unchanged on failure", Todo) shouldSucceed := successes >= minCount
t.Run("position follows last success on overall success", Todo)
input := append(ptest.SliceOfN(true, successes), false)
p := gigaparsec.Repeat(minCount, gigaparsec.Match(true))
result, err := p(gigaparsec.MakeState(gigaparsec.SliceReaderAt[bool](input)))
must.NoError(t, err)
success, _, next := result.Status()
test.EqOp(t, shouldSucceed, success)
if success {
test.EqOp(t, uint64(successes), next.Pos())
}
}))
t.Run("consumes iff at least one application consumes", rapid.MakeCheck(func(t *rapid.T) {
input := rapid.Map(rapid.SliceOfN(rapid.Just(ptest.ForcedResult{Succeed: true}), 0, 100),
func(ts []ptest.ForcedResult) []ptest.ForcedResult { return append(ts, ptest.ForcedResult{}) }).Draw(t, "input")
consumeAt := rapid.Ptr(rapid.IntRange(0, len(input)-1), true).Draw(t, "consumeAt")
if consumeAt != nil {
input[*consumeAt].Consume = true
}
shouldConsume := consumeAt != nil
result, err := gigaparsec.Repeat(0, ptest.ForceResult)(gigaparsec.MakeState(gigaparsec.SliceReaderAt[ptest.ForcedResult](input)))
must.NoError(t, err)
test.EqOp(t, shouldConsume, result.Consumed())
}))
t.Run("does not consume on empty input", func(t *testing.T) {
p := gigaparsec.Repeat(0, gigaparsec.Match(0))
result, err := p(gigaparsec.MakeState(gigaparsec.SliceReaderAt[int](nil)))
must.NoError(t, err)
must.False(t, result.Consumed())
})
t.Run("fails on error", func(t *testing.T) {
expectedErr := errors.New("it broke")
p := gigaparsec.Repeat(0, gigaparsec.Match(byte(0)))
result, err := p(gigaparsec.MakeState(ptest.ErrReaderAt(expectedErr)))
succeeded, _, _ := result.Status()
test.ErrorIs(t, err, expectedErr)
test.False(t, succeeded)
})
}
func TestBracket(t *testing.T) {
Todo(t)
}
func TestWhere(t *testing.T) {
Todo(t)
} }

378
seq.go
View File

@ -205,3 +205,381 @@ func Seq5[In, Out, T, T2, T3, T4, T5 any](
return Succeed(anyConsumed, final, next, MessageOK(s.Pos())), nil return Succeed(anyConsumed, final, next, MessageOK(s.Pos())), nil
} }
} }
func Seq6[In, Out, T, T2, T3, T4, T5, T6 any](
p Parser[In, T],
p2 Parser[In, T2],
p3 Parser[In, T3],
p4 Parser[In, T4],
p5 Parser[In, T5],
p6 Parser[In, T6],
f func(T, T2, T3, T4, T5, T6) Out,
) Parser[In, Out] {
return func(s State[In]) (Result[In, Out], error) {
var anyConsumed bool
var next = s
r, err := p(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r.Consumed()
success, val, next := r.Status()
if !success {
return Fail[In, Out](anyConsumed, r.Message()), nil
}
r2, err := p2(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r2.Consumed()
success, val2, next := r2.Status()
if !success {
return Fail[In, Out](anyConsumed, r2.Message()), nil
}
r3, err := p3(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r3.Consumed()
success, val3, next := r3.Status()
if !success {
return Fail[In, Out](anyConsumed, r3.Message()), nil
}
r4, err := p4(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r4.Consumed()
success, val4, next := r4.Status()
if !success {
return Fail[In, Out](anyConsumed, r4.Message()), nil
}
r5, err := p5(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r5.Consumed()
success, val5, next := r5.Status()
if !success {
return Fail[In, Out](anyConsumed, r5.Message()), nil
}
r6, err := p6(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r6.Consumed()
success, val6, next := r6.Status()
if !success {
return Fail[In, Out](anyConsumed, r6.Message()), nil
}
final := f(val, val2, val3, val4, val5, val6)
return Succeed(anyConsumed, final, next, MessageOK(s.Pos())), nil
}
}
func Seq7[In, Out, T, T2, T3, T4, T5, T6, T7 any](
p Parser[In, T],
p2 Parser[In, T2],
p3 Parser[In, T3],
p4 Parser[In, T4],
p5 Parser[In, T5],
p6 Parser[In, T6],
p7 Parser[In, T7],
f func(T, T2, T3, T4, T5, T6, T7) Out,
) Parser[In, Out] {
return func(s State[In]) (Result[In, Out], error) {
var anyConsumed bool
var next = s
r, err := p(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r.Consumed()
success, val, next := r.Status()
if !success {
return Fail[In, Out](anyConsumed, r.Message()), nil
}
r2, err := p2(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r2.Consumed()
success, val2, next := r2.Status()
if !success {
return Fail[In, Out](anyConsumed, r2.Message()), nil
}
r3, err := p3(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r3.Consumed()
success, val3, next := r3.Status()
if !success {
return Fail[In, Out](anyConsumed, r3.Message()), nil
}
r4, err := p4(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r4.Consumed()
success, val4, next := r4.Status()
if !success {
return Fail[In, Out](anyConsumed, r4.Message()), nil
}
r5, err := p5(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r5.Consumed()
success, val5, next := r5.Status()
if !success {
return Fail[In, Out](anyConsumed, r5.Message()), nil
}
r6, err := p6(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r6.Consumed()
success, val6, next := r6.Status()
if !success {
return Fail[In, Out](anyConsumed, r6.Message()), nil
}
r7, err := p7(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r7.Consumed()
success, val7, next := r7.Status()
if !success {
return Fail[In, Out](anyConsumed, r7.Message()), nil
}
final := f(val, val2, val3, val4, val5, val6, val7)
return Succeed(anyConsumed, final, next, MessageOK(s.Pos())), nil
}
}
func Seq8[In, Out, T, T2, T3, T4, T5, T6, T7, T8 any](
p Parser[In, T],
p2 Parser[In, T2],
p3 Parser[In, T3],
p4 Parser[In, T4],
p5 Parser[In, T5],
p6 Parser[In, T6],
p7 Parser[In, T7],
p8 Parser[In, T8],
f func(T, T2, T3, T4, T5, T6, T7, T8) Out,
) Parser[In, Out] {
return func(s State[In]) (Result[In, Out], error) {
var anyConsumed bool
var next = s
r, err := p(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r.Consumed()
success, val, next := r.Status()
if !success {
return Fail[In, Out](anyConsumed, r.Message()), nil
}
r2, err := p2(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r2.Consumed()
success, val2, next := r2.Status()
if !success {
return Fail[In, Out](anyConsumed, r2.Message()), nil
}
r3, err := p3(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r3.Consumed()
success, val3, next := r3.Status()
if !success {
return Fail[In, Out](anyConsumed, r3.Message()), nil
}
r4, err := p4(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r4.Consumed()
success, val4, next := r4.Status()
if !success {
return Fail[In, Out](anyConsumed, r4.Message()), nil
}
r5, err := p5(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r5.Consumed()
success, val5, next := r5.Status()
if !success {
return Fail[In, Out](anyConsumed, r5.Message()), nil
}
r6, err := p6(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r6.Consumed()
success, val6, next := r6.Status()
if !success {
return Fail[In, Out](anyConsumed, r6.Message()), nil
}
r7, err := p7(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r7.Consumed()
success, val7, next := r7.Status()
if !success {
return Fail[In, Out](anyConsumed, r7.Message()), nil
}
r8, err := p8(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r8.Consumed()
success, val8, next := r8.Status()
if !success {
return Fail[In, Out](anyConsumed, r8.Message()), nil
}
final := f(val, val2, val3, val4, val5, val6, val7, val8)
return Succeed(anyConsumed, final, next, MessageOK(s.Pos())), nil
}
}
func Seq9[In, Out, T, T2, T3, T4, T5, T6, T7, T8, T9 any](
p Parser[In, T],
p2 Parser[In, T2],
p3 Parser[In, T3],
p4 Parser[In, T4],
p5 Parser[In, T5],
p6 Parser[In, T6],
p7 Parser[In, T7],
p8 Parser[In, T8],
p9 Parser[In, T9],
f func(T, T2, T3, T4, T5, T6, T7, T8, T9) Out,
) Parser[In, Out] {
return func(s State[In]) (Result[In, Out], error) {
var anyConsumed bool
var next = s
r, err := p(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r.Consumed()
success, val, next := r.Status()
if !success {
return Fail[In, Out](anyConsumed, r.Message()), nil
}
r2, err := p2(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r2.Consumed()
success, val2, next := r2.Status()
if !success {
return Fail[In, Out](anyConsumed, r2.Message()), nil
}
r3, err := p3(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r3.Consumed()
success, val3, next := r3.Status()
if !success {
return Fail[In, Out](anyConsumed, r3.Message()), nil
}
r4, err := p4(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r4.Consumed()
success, val4, next := r4.Status()
if !success {
return Fail[In, Out](anyConsumed, r4.Message()), nil
}
r5, err := p5(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r5.Consumed()
success, val5, next := r5.Status()
if !success {
return Fail[In, Out](anyConsumed, r5.Message()), nil
}
r6, err := p6(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r6.Consumed()
success, val6, next := r6.Status()
if !success {
return Fail[In, Out](anyConsumed, r6.Message()), nil
}
r7, err := p7(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r7.Consumed()
success, val7, next := r7.Status()
if !success {
return Fail[In, Out](anyConsumed, r7.Message()), nil
}
r8, err := p8(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r8.Consumed()
success, val8, next := r8.Status()
if !success {
return Fail[In, Out](anyConsumed, r8.Message()), nil
}
r9, err := p9(next)
if err != nil {
return Result[In, Out]{}, err
}
anyConsumed = anyConsumed || r9.Consumed()
success, val9, next := r9.Status()
if !success {
return Fail[In, Out](anyConsumed, r9.Message()), nil
}
final := f(val, val2, val3, val4, val5, val6, val7, val8, val9)
return Succeed(anyConsumed, final, next, MessageOK(s.Pos())), nil
}
}

204
state_test.go Normal file
View File

@ -0,0 +1,204 @@
// SPDX-License-Identifier: Unlicense
package gigaparsec_test
import (
"bytes"
"cmp"
"io"
"math"
"testing"
"git.codemonkeysoftware.net/b/gigaparsec"
ptest "git.codemonkeysoftware.net/b/gigaparsec/test"
"git.codemonkeysoftware.net/b/gigaparsec/test/generator"
pgen "git.codemonkeysoftware.net/b/gigaparsec/test/generator"
"github.com/shoenig/test"
"github.com/shoenig/test/must"
"pgregory.net/rapid"
)
type customEOFReaderAt struct {
r *bytes.Reader
eofAtExactFit bool
}
func newCustomEOFReaderAt(b []byte, eofAtExactFit bool) customEOFReaderAt {
return customEOFReaderAt{
r: bytes.NewReader(b),
eofAtExactFit: eofAtExactFit,
}
}
func (r customEOFReaderAt) ReadAt(p []byte, off int64) (n int, err error) {
n, err = r.r.ReadAt(p, off)
if int64(len(p))+off >= r.r.Size() {
if r.eofAtExactFit {
err = io.EOF
} else {
err = nil
}
}
return n, err
}
func TestState(t *testing.T) {
t.Run("state reads the same position every time", rapid.MakeCheck(func(t *rapid.T) {
data := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "data")
dst := pgen.SliceOfNZero[byte](0, len(data)-1).Draw(t, "dst")
expected := data[:len(dst)]
st := gigaparsec.MakeState(bytes.NewReader(data))
_, next, err := st.Read(dst)
must.NoError(t, err)
must.SliceEqOp(t, expected, dst)
next.Read(dst)
_, _, err = st.Read(dst)
must.NoError(t, err)
must.SliceEqOp(t, expected, dst)
}))
t.Run("Read ends before end of source", rapid.MakeCheck(func(t *rapid.T) {
src := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "src")
endReadAt := rapid.IntRange(0, len(src)-1).Draw(t, "endReadAt")
pos := rapid.Uint64Range(0, uint64(endReadAt)).Draw(t, "pos")
dst := pgen.SliceOfNZero[byte](0, endReadAt-int(pos)).Draw(t, "dst")
st := gigaparsec.MakeState(bytes.NewReader(src)).At(pos)
n, next, err := st.At(pos).Read(dst)
test.EqOp(t, uint64(len(dst)), n)
ptest.StateIsAt(t, next, pos+n)
test.NoError(t, err)
test.SliceEqOp(t, src[pos:pos+n], dst)
}))
t.Run("Non-empty Read ends at end of source", rapid.MakeCheck(func(t *rapid.T) {
readerReturnsEOF := rapid.Bool().Draw(t, "readerReturnsEOF")
src := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "src")
dst := pgen.SliceOfNZero[byte](1, len(src)).Draw(t, "dst")
pos := uint64(len(src) - len(dst))
st := gigaparsec.MakeState(newCustomEOFReaderAt(src, readerReturnsEOF))
n, next, err := st.At(pos).Read(dst)
test.EqOp(t, uint64(len(dst)), n)
ptest.StateIsAt(t, next, pos+n)
test.NoError(t, err)
test.SliceEqOp(t, src[pos:pos+n], dst)
}))
t.Run("Read overruns source", rapid.MakeCheck(func(t *rapid.T) {
src := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "src")
pos := rapid.Uint64Range(0, uint64(len(src))-1).Draw(t, "pos")
minDstLen := len(src) - int(pos) + 1
dst := pgen.SliceOfNZero[byte](minDstLen, minDstLen+10).Draw(t, "dst")
st := gigaparsec.MakeState(bytes.NewReader(src)).At(pos)
n, next, err := st.Read(dst)
test.EqOp(t, uint64(len(src)), n+pos)
ptest.StateIsAt(t, next, pos+n)
test.ErrorIs(t, err, io.EOF)
test.SliceEqOp(t, src[pos:pos+n], dst[:n])
}))
t.Run("Read starts after end of source", rapid.MakeCheck(func(t *rapid.T) {
src := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "src")
dst := pgen.SliceOfNZero[byte](0, 100).Draw(t, "dst")
pos := rapid.Uint64Min(uint64(len(src))).Draw(t, "pos")
st := gigaparsec.MakeState(bytes.NewReader(src)).At(pos)
n, next, err := st.Read(dst)
test.EqOp(t, 0, n)
ptest.StateIsAt(t, next, pos)
test.ErrorIs(t, err, io.EOF)
}))
t.Run("next state reads next input", rapid.MakeCheck(func(t *rapid.T) {
const maxLen = 100
data := rapid.SliceOfN(rapid.Byte(), 1, maxLen).Draw(t, "data")
skip := rapid.IntRange(0, len(data)-1).Draw(t, "skip")
st := gigaparsec.MakeState(bytes.NewReader(data))
_, next, err := st.Read(make([]byte, skip))
must.NoError(t, err)
must.EqOp(t, skip, int(next.Pos()))
dst := make([]byte, maxLen)
n, _, _ := next.Read(dst)
must.SliceEqOp(t, data[skip:skip+int(n)], dst[:n])
}))
t.Run("At sets state position", rapid.MakeCheck(func(t *rapid.T) {
data := rapid.SliceOfN(rapid.Byte(), 1, 100).Draw(t, "data")
pos := rapid.Uint64Range(0, uint64(len(data)-1)).Draw(t, "pos")
st := gigaparsec.MakeState(bytes.NewReader(data)).At(pos)
dst := make([]byte, 1)
n, _, err := st.Read(dst)
test.EqOp(t, 1, n)
test.NoError(t, err)
test.EqOp(t, data[pos], dst[0])
}))
t.Run("Pos returns correct position after At", rapid.MakeCheck(func(t *rapid.T) {
var data []byte
pos := rapid.Uint64().Draw(t, "pos")
st := gigaparsec.MakeState(bytes.NewReader(data)).At(pos)
test.EqOp(t, pos, st.Pos())
}))
t.Run("Pos returns correct position after Read", rapid.MakeCheck(func(t *rapid.T) {
const maxLen = 100
data := rapid.SliceOfN(rapid.Byte(), 1, maxLen).Draw(t, "data")
skip := rapid.Uint64Range(0, uint64(len(data)-1)).Draw(t, "skip")
st := gigaparsec.MakeState(bytes.NewReader(data))
_, next, err := st.Read(make([]byte, skip))
must.NoError(t, err)
test.EqOp(t, skip, next.Pos())
}))
t.Run("Read returns an error if the ReaderAt fails", rapid.MakeCheck(func(t *rapid.T) {
expectedErr := pgen.Error().Draw(t, "expectedErr")
startPos := rapid.Uint64Max(math.MaxInt64).Draw(t, "startPos")
dst := pgen.SliceOfNZero[byte](0, 100).Draw(t, "dst")
st := gigaparsec.MakeState(ptest.ErrReaderAt(expectedErr)).At(startPos)
n, next, err := st.Read(dst)
test.ErrorIs(t, err, expectedErr)
test.EqOp(t, startPos, next.Pos())
test.Zero(t, n)
}))
}
func TestSliceReaderAt(t *testing.T) {
const maxLen = 100
t.Run("offset ≥ 0", rapid.MakeCheck(func(t *rapid.T) {
src := rapid.SliceOfN(rapid.Byte(), 0, maxLen).Draw(t, "src")
dst := generator.SliceOfNZero[byte](0, maxLen).Draw(t, "dst")
offset := rapid.Int64Range(0, int64(len(src))+10).Draw(t, "offset")
n, err := gigaparsec.SliceReaderAt[byte](src).ReadAt(dst, offset)
switch cmp.Compare(len(src), int(offset)+len(dst)) {
case -1:
// Read overruns src.
test.ErrorIs(t, err, io.EOF)
test.EqOp(t, max(0, len(src)-int(offset)), n)
case 0:
// Read exactly reaches end of source.
// io.ReaderAt spec allows error to be either io.EOF or nil.
test.EqOp(t, len(dst), n)
case 1:
// Read ends before end of source.
test.NoError(t, err)
test.EqOp(t, len(dst), n)
}
if offset < int64(len(src)) {
test.SliceEqOp(t, src[offset:offset+int64(n)], dst[:n])
}
}))
t.Run("offset < 0", rapid.MakeCheck(func(t *rapid.T) {
src := rapid.SliceOfN(rapid.Byte(), 0, maxLen).Draw(t, "src")
dst := generator.SliceOfNZero[byte](0, maxLen).Draw(t, "dst")
offset := rapid.Int64Max(-1).Draw(t, "offset")
n, err := gigaparsec.SliceReaderAt[byte](src).ReadAt(dst, offset)
test.Error(t, err)
test.EqOp(t, 0, n)
}))
}

56
test/helpers.go Normal file
View File

@ -0,0 +1,56 @@
// SPDX-License-Identifier: Unlicense
// Package test contains helpers for testing parsers.
package test
import (
"errors"
"io"
"git.codemonkeysoftware.net/b/gigaparsec"
"github.com/shoenig/test"
)
type errReaderAt struct {
err error
}
func (r errReaderAt) ReadAt([]byte, int64) (int, error) {
return 0, r.err
}
// ErrReaderAt returns an [io.ReaderAt] with a ReadAt method that always returns err.
func ErrReaderAt(err error) io.ReaderAt {
return errReaderAt{err: err}
}
func StateIsAt[Input any](t test.T, s gigaparsec.State[Input], pos uint64) {
test.EqOp(t, pos, s.Pos(), test.Sprintf("expected parser state to be at position %d, got %d", pos, s.Pos()))
}
func SliceOfN[T any](value T, n int) []T {
s := make([]T, n)
for i := range s {
s[i] = value
}
return s
}
type ForcedResult struct{ Succeed, Consume bool }
func ForceResult(state gigaparsec.State[ForcedResult]) (gigaparsec.Result[ForcedResult, struct{}], error) {
buf := make([]ForcedResult, 1)
_, next, err := state.Read(buf)
if errors.Is(err, io.EOF) {
return gigaparsec.Fail[ForcedResult, struct{}](false, gigaparsec.MessageEnd(state.Pos())), nil
}
if err != nil {
return gigaparsec.Result[ForcedResult, struct{}]{}, err
}
tok := buf[0]
if tok.Succeed {
return gigaparsec.Succeed(tok.Consume, struct{}{}, next, gigaparsec.MessageOK(state.Pos())), nil
} else {
return gigaparsec.Fail[ForcedResult, struct{}](tok.Consume, gigaparsec.MakeMessage(state.Pos(), "Succeed=false", "Succeed=true")), nil
}
}

View File

@ -1,52 +0,0 @@
// SPDX-License-Identifier: Unlicense
// Package test contains helpers for testing parsers.
package test
import (
"io"
"git.codemonkeysoftware.net/b/gigaparsec"
"git.codemonkeysoftware.net/b/gigaparsec/cursor"
"github.com/shoenig/test"
)
type errReaderAt struct {
err error
}
func (r errReaderAt) ReadAt([]byte, int64) (int, error) {
return 0, r.err
}
// ErrReaderAt returns an [io.ReaderAt] with a ReadAt method that always returns err.
func ErrReaderAt(err error) io.ReaderAt {
return errReaderAt{err: err}
}
type errCursor[T any] struct {
err error
pos uint64
}
func (c errCursor[T]) Read([]T) (uint64, cursor.Cursor[T], error) {
return 0, c, c.err
}
func (c errCursor[T]) At(pos uint64) cursor.Cursor[T] {
c.pos = pos
return c
}
func (c errCursor[T]) Pos() uint64 {
return c.pos
}
// ErrCursor return a [cursor.Cursor] with a Read method that always returns err.
func ErrCursor[T any](err error) cursor.Cursor[T] {
return errCursor[T]{err: err}
}
func StateIsAt[Input any](t test.T, s gigaparsec.State[Input], pos uint64) {
test.EqOp(t, pos, s.Pos(), test.Sprintf("expected parser state to be at position %d, got %d", pos, s.Pos()))
}