Compare commits

..

12 Commits

11 changed files with 400 additions and 103 deletions

View File

@ -1,8 +1,8 @@
# Gigaparsec
[![Go Reference](https://pkg.go.dev/badge/git.codemonkeysoftware.net/b/gigaparsec.svg)](https://pkg.go.dev/git.codemonkeysoftware.net/b/gigaparsec)
![Total garbage.](https://img.shields.io/badge/Total-garbage-red)
by Brandon Dyck <[brandon@dyck.us](mailto:brandon@dyck.us)>
Monadic parser combinators in Go
**I don't recommend using this yet. It is very unfinished and it will break.**

View File

@ -4,3 +4,6 @@ Rename Seq2 to Seq
Document Seq
Should MakeState be private now that there's Run?
What's Megaparsec got that we ain't got?
Add and benchmark naïve Seq
chainl
whitespace handling

16
bytes/bytes.go Normal file
View File

@ -0,0 +1,16 @@
package bytes
import (
"git.codemonkeysoftware.net/b/gigaparsec"
)
func Token[Out, WSOut any](whitespace gigaparsec.Parser[byte, WSOut]) func(p gigaparsec.Parser[byte, Out]) gigaparsec.Parser[byte, Out] {
mappedWS := gigaparsec.Map(whitespace, func(WSOut) struct{} { return struct{}{} })
var ignoreWS gigaparsec.Parser[byte, struct{}] = func(s gigaparsec.State[byte]) (gigaparsec.Result[byte, struct{}], error) {
result, err := mappedWS(s)
return result.Consume(false), err
}
return func(p gigaparsec.Parser[byte, Out]) gigaparsec.Parser[byte, Out] {
return gigaparsec.Seq2(p, gigaparsec.Repeat(0, ignoreWS), func(val Out, _ []struct{}) Out { return val })
}
}

View File

@ -69,6 +69,9 @@ func Regexp(pattern string) gigaparsec.Parser[byte, string] {
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: reader error: %w", err)
}
if idx == nil {
if err == io.EOF {
return gigaparsec.Fail[byte, string](false, gigaparsec.MessageEnd(input.Pos())), nil
}
got := make([]byte, r.Count())
_, _, err = input.Read(got)
if err != nil {
@ -80,7 +83,7 @@ func Regexp(pattern string) gigaparsec.Parser[byte, string] {
// when searching a RuneReader.
dst := make([]byte, idx[1]-idx[0])
n, _, err := input.Read(dst)
if err != nil {
if err != nil && (!errors.Is(err, io.EOF) || n < uint64(len(dst))) {
return gigaparsec.Result[byte, string]{}, fmt.Errorf("Regexp: unexpected error: %w", err)
}
next := input.At(input.Pos() + n)

View File

@ -61,6 +61,21 @@ func TestRegexp(t *testing.T) {
must.NoError(t, err)
test.StrContains(t, result.Message().Got(), "hella")
})
t.Run("succeeds on empty matches", func(t *testing.T) {
p := pbytes.Regexp(".*")
result, err := p(gigaparsec.MakeState(strings.NewReader("")))
succeeded, value, _ := result.Status()
must.NoError(t, err)
must.True(t, succeeded)
must.EqOp(t, "", value)
})
t.Run("fails without an error at EOF", func(t *testing.T) {
p := pbytes.Regexp("a")
result, err := p(gigaparsec.MakeState(strings.NewReader("")))
succeeded, _, _ := result.Status()
must.NoError(t, err)
must.False(t, succeeded)
})
}
func TestRuneReader(t *testing.T) {

View File

@ -194,6 +194,17 @@ func (p Parser[In, Out]) Label(label string) Parser[In, Out] {
}
}
func (p Parser[In, Out]) Where(pred func(Out) bool) Parser[In, Out] {
return func(s State[In]) (Result[In, Out], error) {
result, err := p(s)
if result.success && !pred(result.value) {
result.success = false
result.message.got = "failed Where predicate"
}
return result, err
}
}
type ParseError Message
func (pe ParseError) Error() string {
@ -403,3 +414,16 @@ func Repeat[In, Out any](minCount int, p Parser[In, Out]) Parser[In, []Out] {
}
}
}
// Lazy delays creating a parser from p until the parser is called.
// This is useful for preventing recursive function calls in the
// definition of a recursive parser.
func Lazy[In, Out any](p func() Parser[In, Out]) Parser[In, Out] {
return func(s State[In]) (Result[In, Out], error) {
return p()(s)
}
}
func Bracket[In, Out, LOut, ROut any](left Parser[In, LOut], p Parser[In, Out], right Parser[In, ROut]) Parser[In, Out] {
return Seq3(left, p, right, func(_ LOut, val Out, _ ROut) Out { return val })
}

View File

@ -1,88 +0,0 @@
// SPDX-License-Identifier: Unlicense
package gigaparsec
func Bind2Naïve[In, Out, T, T2 any](
p Parser[In, T],
f func(T) Parser[In, T2],
f2 func(T2) Parser[In, Out],
) Parser[In, Out] {
return Bind(Bind(p, f), f2)
}
func Bind3Naïve[In, Out, T, T2, T3, T4, T5, T6, T7, T8, T9 any](
p Parser[In, T],
f func(T) Parser[In, T2],
f2 func(T2) Parser[In, T3],
f3 func(T3) Parser[In, Out],
) Parser[In, Out] {
return Bind(Bind(Bind(p, f), f2), f3)
}
func Bind4Naïve[In, Out, T, T2, T3, T4 any](
p Parser[In, T],
f func(T) Parser[In, T2],
f2 func(T2) Parser[In, T3],
f3 func(T3) Parser[In, T4],
f4 func(T4) Parser[In, Out],
) Parser[In, Out] {
return Bind(Bind(Bind(Bind(p, f), f2), f3), f4)
}
func Bind5Naïve[In, Out, T, T2, T3, T4, T5 any](
p Parser[In, T],
f func(T) Parser[In, T2],
f2 func(T2) Parser[In, T3],
f3 func(T3) Parser[In, T4],
f4 func(T4) Parser[In, T5],
f5 func(T5) Parser[In, Out],
) Parser[In, Out] {
return Bind(Bind(Bind(Bind(Bind(p, f), f2), f3), f4), f5)
}
func Bind6Naïve[In, Out, T, T2, T3, T4, T5, T6 any](
p Parser[In, T],
f func(T) Parser[In, T2],
f2 func(T2) Parser[In, T3],
f3 func(T3) Parser[In, T4],
f4 func(T4) Parser[In, T5],
f5 func(T5) Parser[In, T6],
f6 func(T6) Parser[In, Out],
) Parser[In, Out] {
return Bind(Bind(Bind(Bind(Bind(Bind(p, f), f2), f3), f4), f5), f6)
}
func Bind7Naïve[In, Out, T, T2, T3, T4, T5, T6, T7 any](
p Parser[In, T],
f func(T) Parser[In, T2],
f2 func(T2) Parser[In, T3],
f3 func(T3) Parser[In, T4],
f4 func(T4) Parser[In, T5],
f5 func(T5) Parser[In, T6],
f6 func(T6) Parser[In, T7],
f7 func(T7) Parser[In, Out],
) Parser[In, Out] {
return Bind(Bind(Bind(Bind(Bind(Bind(Bind(p, f), f2), f3), f4), f5), f6), f7)
}
func Bind8Naïve[In, Out, T, T2, T3, T4, T5, T6, T7, T8 any](
p Parser[In, T],
f func(T) Parser[In, T2],
f2 func(T2) Parser[In, T3],
f3 func(T3) Parser[In, T4],
f4 func(T4) Parser[In, T5],
f5 func(T5) Parser[In, T6],
f6 func(T6) Parser[In, T7],
f7 func(T7) Parser[In, T8],
f8 func(T8) Parser[In, Out],
) Parser[In, Out] {
return Bind(Bind(Bind(Bind(Bind(Bind(Bind(Bind(p, f), f2), f3), f4), f5), f6), f7), f8)
}
func Bind9Naïve[In, Out, T, T2, T3, T4, T5, T6, T7, T8, T9 any](
p Parser[In, T],
f func(T) Parser[In, T2],
f2 func(T2) Parser[In, T3],
f3 func(T3) Parser[In, T4],
f4 func(T4) Parser[In, T5],
f5 func(T5) Parser[In, T6],
f6 func(T6) Parser[In, T7],
f7 func(T7) Parser[In, T8],
f8 func(T8) Parser[In, T9],
f9 func(T9) Parser[In, Out],
) Parser[In, Out] {
return Bind(Bind(Bind(Bind(Bind(Bind(Bind(Bind(Bind(p, f), f2), f3), f4), f5), f6), f7), f8), f9)
}

265
naive/naive.go Normal file
View File

@ -0,0 +1,265 @@
// SPDX-License-Identifier: Unlicense
// Package naive contains naïve implementations of the Bind and Seq combinators.
// The accompanying tests include simple benchmarks comparing their performance.
package naive
import gp "git.codemonkeysoftware.net/b/gigaparsec"
func Bind2[In, Out, T, T2 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(p, f), f2)
}
func Bind3[In, Out, T, T2, T3, T4, T5, T6, T7, T8, T9 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, T3],
f3 func(T3) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(gp.Bind(p, f), f2), f3)
}
func Bind4[In, Out, T, T2, T3, T4 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, T3],
f3 func(T3) gp.Parser[In, T4],
f4 func(T4) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(gp.Bind(gp.Bind(p, f), f2), f3), f4)
}
func Bind5[In, Out, T, T2, T3, T4, T5 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, T3],
f3 func(T3) gp.Parser[In, T4],
f4 func(T4) gp.Parser[In, T5],
f5 func(T5) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(p, f), f2), f3), f4), f5)
}
func Bind6[In, Out, T, T2, T3, T4, T5, T6 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, T3],
f3 func(T3) gp.Parser[In, T4],
f4 func(T4) gp.Parser[In, T5],
f5 func(T5) gp.Parser[In, T6],
f6 func(T6) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(p, f), f2), f3), f4), f5), f6)
}
func Bind7[In, Out, T, T2, T3, T4, T5, T6, T7 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, T3],
f3 func(T3) gp.Parser[In, T4],
f4 func(T4) gp.Parser[In, T5],
f5 func(T5) gp.Parser[In, T6],
f6 func(T6) gp.Parser[In, T7],
f7 func(T7) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(p, f), f2), f3), f4), f5), f6), f7)
}
func Bind8[In, Out, T, T2, T3, T4, T5, T6, T7, T8 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, T3],
f3 func(T3) gp.Parser[In, T4],
f4 func(T4) gp.Parser[In, T5],
f5 func(T5) gp.Parser[In, T6],
f6 func(T6) gp.Parser[In, T7],
f7 func(T7) gp.Parser[In, T8],
f8 func(T8) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(p, f), f2), f3), f4), f5), f6), f7), f8)
}
func Bind9[In, Out, T, T2, T3, T4, T5, T6, T7, T8, T9 any](
p gp.Parser[In, T],
f func(T) gp.Parser[In, T2],
f2 func(T2) gp.Parser[In, T3],
f3 func(T3) gp.Parser[In, T4],
f4 func(T4) gp.Parser[In, T5],
f5 func(T5) gp.Parser[In, T6],
f6 func(T6) gp.Parser[In, T7],
f7 func(T7) gp.Parser[In, T8],
f8 func(T8) gp.Parser[In, T9],
f9 func(T9) gp.Parser[In, Out],
) gp.Parser[In, Out] {
return gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(gp.Bind(p, f), f2), f3), f4), f5), f6), f7), f8), f9)
}
func Seq2[In, Out, T, T2 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
f func(T, T2) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2))
})
})
}
func Seq3[In, Out, T, T2, T3 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
p3 gp.Parser[In, T3],
f func(T, T2, T3) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Bind(p3, func(x3 T3) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2, x3))
})
})
})
}
func Seq4[In, Out, T, T2, T3, T4 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
p3 gp.Parser[In, T3],
p4 gp.Parser[In, T4],
f func(T, T2, T3, T4) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Bind(p3, func(x3 T3) gp.Parser[In, Out] {
return gp.Bind(p4, func(x4 T4) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2, x3, x4))
})
})
})
})
}
func Seq5[In, Out, T, T2, T3, T4, T5 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
p3 gp.Parser[In, T3],
p4 gp.Parser[In, T4],
p5 gp.Parser[In, T5],
f func(T, T2, T3, T4, T5) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Bind(p3, func(x3 T3) gp.Parser[In, Out] {
return gp.Bind(p4, func(x4 T4) gp.Parser[In, Out] {
return gp.Bind(p5, func(x5 T5) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2, x3, x4, x5))
})
})
})
})
})
}
func Seq6[In, Out, T, T2, T3, T4, T5, T6 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
p3 gp.Parser[In, T3],
p4 gp.Parser[In, T4],
p5 gp.Parser[In, T5],
p6 gp.Parser[In, T6],
f func(T, T2, T3, T4, T5, T6) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Bind(p3, func(x3 T3) gp.Parser[In, Out] {
return gp.Bind(p4, func(x4 T4) gp.Parser[In, Out] {
return gp.Bind(p5, func(x5 T5) gp.Parser[In, Out] {
return gp.Bind(p6, func(x6 T6) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2, x3, x4, x5, x6))
})
})
})
})
})
})
}
func Seq7[In, Out, T, T2, T3, T4, T5, T6, T7 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
p3 gp.Parser[In, T3],
p4 gp.Parser[In, T4],
p5 gp.Parser[In, T5],
p6 gp.Parser[In, T6],
p7 gp.Parser[In, T7],
f func(T, T2, T3, T4, T5, T6, T7) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Bind(p3, func(x3 T3) gp.Parser[In, Out] {
return gp.Bind(p4, func(x4 T4) gp.Parser[In, Out] {
return gp.Bind(p5, func(x5 T5) gp.Parser[In, Out] {
return gp.Bind(p6, func(x6 T6) gp.Parser[In, Out] {
return gp.Bind(p7, func(x7 T7) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2, x3, x4, x5, x6, x7))
})
})
})
})
})
})
})
}
func Seq8[In, Out, T, T2, T3, T4, T5, T6, T7, T8 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
p3 gp.Parser[In, T3],
p4 gp.Parser[In, T4],
p5 gp.Parser[In, T5],
p6 gp.Parser[In, T6],
p7 gp.Parser[In, T7],
p8 gp.Parser[In, T8],
f func(T, T2, T3, T4, T5, T6, T7, T8) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Bind(p3, func(x3 T3) gp.Parser[In, Out] {
return gp.Bind(p4, func(x4 T4) gp.Parser[In, Out] {
return gp.Bind(p5, func(x5 T5) gp.Parser[In, Out] {
return gp.Bind(p6, func(x6 T6) gp.Parser[In, Out] {
return gp.Bind(p7, func(x7 T7) gp.Parser[In, Out] {
return gp.Bind(p8, func(x8 T8) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2, x3, x4, x5, x6, x7, x8))
})
})
})
})
})
})
})
})
}
func Seq9[In, Out, T, T2, T3, T4, T5, T6, T7, T8, T9 any](
p gp.Parser[In, T],
p2 gp.Parser[In, T2],
p3 gp.Parser[In, T3],
p4 gp.Parser[In, T4],
p5 gp.Parser[In, T5],
p6 gp.Parser[In, T6],
p7 gp.Parser[In, T7],
p8 gp.Parser[In, T8],
p9 gp.Parser[In, T9],
f func(T, T2, T3, T4, T5, T6, T7, T8, T9) Out,
) gp.Parser[In, Out] {
return gp.Bind(p, func(x T) gp.Parser[In, Out] {
return gp.Bind(p2, func(x2 T2) gp.Parser[In, Out] {
return gp.Bind(p3, func(x3 T3) gp.Parser[In, Out] {
return gp.Bind(p4, func(x4 T4) gp.Parser[In, Out] {
return gp.Bind(p5, func(x5 T5) gp.Parser[In, Out] {
return gp.Bind(p6, func(x6 T6) gp.Parser[In, Out] {
return gp.Bind(p7, func(x7 T7) gp.Parser[In, Out] {
return gp.Bind(p8, func(x8 T8) gp.Parser[In, Out] {
return gp.Bind(p9, func(x9 T9) gp.Parser[In, Out] {
return gp.Return[In](f(x, x2, x3, x4, x5, x6, x7, x8, x9))
})
})
})
})
})
})
})
})
})
}

View File

@ -1,11 +1,12 @@
// SPDX-License-Identifier: Unlicense
package gigaparsec_test
package naive_test
import (
"testing"
"git.codemonkeysoftware.net/b/gigaparsec"
"git.codemonkeysoftware.net/b/gigaparsec/naive"
)
func BenchmarkBind5(b *testing.B) {
@ -19,15 +20,15 @@ func BenchmarkBind5(b *testing.B) {
return bind5(gigaparsec.Match(byte(0)), f, f, f, f, f)
}
input := gigaparsec.SliceReaderAt[byte]{0}
b.Run("Bind5", func(b *testing.B) {
b.Run("gigaparsec.Bind5", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(gigaparsec.Bind5), input)
}
})
b.Run("Bind5Naïve", func(b *testing.B) {
b.Run("naïve.Bind5", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(gigaparsec.Bind5Naïve), input)
gigaparsec.Run(p(naive.Bind5), input)
}
})
}
@ -43,15 +44,65 @@ func BenchmarkBind9(b *testing.B) {
return bind5(gigaparsec.Match(byte(0)), f, f, f, f, f, f, f, f, f)
}
input := gigaparsec.SliceReaderAt[byte]{0}
b.Run("Bind9", func(b *testing.B) {
b.Run("gigaparsec.Bind9", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(gigaparsec.Bind9), input)
}
})
b.Run("Bind9Naïve", func(b *testing.B) {
b.Run("naive.Bind9", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(gigaparsec.Bind9Naïve), input)
gigaparsec.Run(p(naive.Bind9), input)
}
})
}
func BenchmarkSeq5(b *testing.B) {
type P = gigaparsec.Parser[byte, byte]
type Seq5T = func(P, P, P, P, P, func(byte, byte, byte, byte, byte) byte) P
zero := gigaparsec.Return[byte, byte](0)
f := func(a, b, c, d, e byte) byte {
return a + b + c + d + e
}
p := func(seq5 Seq5T) P {
return seq5(zero, zero, zero, zero, zero, f)
}
input := gigaparsec.SliceReaderAt[byte]{0}
b.Run("gigaparsec.Seq5", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(gigaparsec.Seq5), input)
}
})
b.Run("naive.Seq5", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(naive.Seq5), input)
}
})
}
func BenchmarkSeq9(b *testing.B) {
type P = gigaparsec.Parser[byte, byte]
type Seq9T = func(P, P, P, P, P, P, P, P, P, func(byte, byte, byte, byte, byte, byte, byte, byte, byte) byte) P
zero := gigaparsec.Return[byte, byte](0)
f := func(a, b, c, d, e, f, g, h, i byte) byte {
return a + b + c + d + e + f + g + h + i
}
p := func(seq9 Seq9T) P {
return seq9(zero, zero, zero, zero, zero, zero, zero, zero, zero, f)
}
input := gigaparsec.SliceReaderAt[byte]{0}
b.Run("gigaparsec.Seq9", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(gigaparsec.Seq9), input)
}
})
b.Run("naive.Seq9", func(b *testing.B) {
for range b.N {
gigaparsec.Run(p(naive.Seq9), input)
}
})
}

View File

@ -210,3 +210,11 @@ func TestRepeat(t *testing.T) {
test.False(t, succeeded)
})
}
func TestBracket(t *testing.T) {
Todo(t)
}
func TestWhere(t *testing.T) {
Todo(t)
}