Skip to content

Commit

Permalink
Merge pull request #355 from okp4/feat/uri_encoded
Browse files Browse the repository at this point in the history
🧠 Logic: 🔗 implement `uri_encoded/3`
  • Loading branch information
bdeneux authored May 12, 2023
2 parents 1ab81d9 + 43b4cdf commit f7b9d5d
Show file tree
Hide file tree
Showing 3 changed files with 391 additions and 0 deletions.
1 change: 1 addition & 0 deletions x/logic/interpreter/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ var registry = map[string]any{
"bech32_address/2": predicate.Bech32Address,
"source_file/1": predicate.SourceFile,
"json_prolog/2": predicate.JSONProlog,
"uri_encoded/3": predicate.URIEncoded,
}

// RegistryNames is the list of the predicate names in the Registry.
Expand Down
171 changes: 171 additions & 0 deletions x/logic/predicate/uri.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
package predicate

import (
"context"
"fmt"
"net/url"

"github.com/ichiban/prolog/engine"
"github.com/okp4/okp4d/x/logic/util"
)

type Component string

const (
QueryComponent Component = "query"
FragmentComponent Component = "fragment"
PathComponent Component = "path"
SegmentComponent Component = "segment"
)

const upperhex = "0123456789ABCDEF"

func NewComponent(v string) (Component, error) {
switch v {
case string(QueryComponent):
return QueryComponent, nil
case string(FragmentComponent):
return FragmentComponent, nil
case string(PathComponent):
return PathComponent, nil
case string(SegmentComponent):
return SegmentComponent, nil
default:
return "", fmt.Errorf("invalid component name %s, expected `query`, `fragment`, `path` or `segment`", v)
}
}

// Return true if the specified character should be escaped when
// appearing in a URL string depending on the targeted URI component, according
// to [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986).
//
// This is a re-implementation of url.shouldEscape of net/url. Needed since the native implementation doesn't follow
// exactly the [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986) and also because the implementation of component
// escaping is only public for Path component (who in reality is SegmentPath component) and Query component. Otherwise,
// escaping doesn't fit to the SWI-Prolog escaping due to RFC discrepancy between those two implementations.
//
// Another discrepancy is on the query component that escape the space character ' ' to a '+' (plus sign) on the
// golang library and to '%20' escaping on the
// [SWI-Prolog implementation](https://www.swi-prolog.org/pldoc/doc/_SWI_/library/uri.pl?show=src#uri_encoded/3).
//
// Here some reported issues on golang about the RFC non-compliance.
// - golang.org/issue/5684.
// - https://github.com/golang/go/issues/27559
//
//nolint:gocognit
func shouldEscape(c byte, comp Component) bool {
// §2.3 Unreserved characters (alphanum)
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
return false
}

switch c {
case '-', '.', '_', '~': // §2.3 Unreserved characters (mark)
return false

case '!', '$', '&', '\'', '(', ')', '*', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
// Different sections of the URL allow a few of
// the reserved characters to appear unescaped.
switch comp {
case PathComponent: // §3.3
return c == '?' || c == ':'

case SegmentComponent: // §3.3
// The RFC allows : @ & = + $
// meaning to individual path segments.
return c == '/' || c == '?' || c == ':'

case QueryComponent: // §3.4
return c == '&' || c == '+' || c == ':' || c == ';' || c == '='
case FragmentComponent: // §4.1
return false
}
}

// Everything else must be escaped.
return true
}

// Escape return the given input string by adding percent encoding depending on the current component where it's
// supposed to be put.
// This is a re-implementation of native url.escape. See shouldEscape() comment's for more details.
func (comp Component) Escape(v string) string {
hexCount := 0
for i := 0; i < len(v); i++ {
ch := v[i]
if shouldEscape(ch, comp) {
hexCount++
}
}

if hexCount == 0 {
return v
}

var buf [64]byte
var t []byte

required := len(v) + 2*hexCount
if required <= len(buf) {
t = buf[:required]
} else {
t = make([]byte, required)
}

j := 0
for i := 0; i < len(v); i++ {
switch ch := v[i]; {
case shouldEscape(ch, comp):
t[j] = '%'
t[j+1] = upperhex[ch>>4]
t[j+2] = upperhex[ch&15]
j += 3
default:
t[j] = v[i]
j++
}
}
return string(t)
}

func (comp Component) Unescape(v string) (string, error) {
return url.PathUnescape(v)
}

func URIEncoded(vm *engine.VM, component, decoded, encoded engine.Term, cont engine.Cont, env *engine.Env) *engine.Promise {
return engine.Delay(func(ctx context.Context) *engine.Promise {
var comp Component
switch c := env.Resolve(component).(type) {
case engine.Atom:
cc, err := NewComponent(c.String())
if err != nil {
return engine.Error(fmt.Errorf("uri_encoded/3: %w", err))
}
comp = cc
default:
return engine.Error(fmt.Errorf("uri_encoded/3: invalid component type: %T, should be Atom", component))
}

var dec string
switch d := env.Resolve(decoded).(type) {
case engine.Variable:
case engine.Atom:
dec = comp.Escape(d.String())
default:
return engine.Error(fmt.Errorf("uri_encoded/3: invalid decoded type: %T, should be Variable or Atom", d))
}

switch e := env.Resolve(encoded).(type) {
case engine.Variable:
return engine.Unify(vm, encoded, util.StringToTerm(dec), cont, env)
case engine.Atom:
enc, err := comp.Unescape(e.String())
if err != nil {
return engine.Error(fmt.Errorf("uri_encoded/3: %w", err))
}
return engine.Unify(vm, decoded, util.StringToTerm(enc), cont, env)
default:
return engine.Error(fmt.Errorf("uri_encoded/3: invalid encoded type: %T, should be Variable or Atom", e))
}
})
}
219 changes: 219 additions & 0 deletions x/logic/predicate/uri_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
//nolint:gocognit,lll
package predicate

import (
"fmt"
"testing"

tmdb "github.com/cometbft/cometbft-db"
"github.com/cometbft/cometbft/libs/log"
tmproto "github.com/cometbft/cometbft/proto/tendermint/types"
"github.com/cosmos/cosmos-sdk/store"
sdk "github.com/cosmos/cosmos-sdk/types"
"github.com/ichiban/prolog/engine"
"github.com/okp4/okp4d/x/logic/testutil"
"github.com/okp4/okp4d/x/logic/types"

. "github.com/smartystreets/goconvey/convey"
)

func TestURIEncoded(t *testing.T) {
Convey("Given a test cases", t, func() {
cases := []struct {
program string
query string
wantResult []types.TermResults
wantError error
wantSuccess bool
}{
{
query: `uri_encoded(hey, foo, Decoded).`,
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid component name hey, expected `query`, `fragment`, `path` or `segment`"),
},
{
query: `uri_encoded(path, Decoded, foo).`,
wantSuccess: true,
wantResult: []types.TermResults{{
"Decoded": "foo",
}},
},
{
query: `uri_encoded(path, Decoded, 'foo%20bar').`,
wantSuccess: true,
wantResult: []types.TermResults{{
"Decoded": "'foo bar'",
}},
},
{
query: `uri_encoded(path, foo, Encoded).`,
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "foo",
}},
},
{
query: `uri_encoded(query, 'foo bar', Encoded).`,
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'foo%20bar'",
}},
},
{
query: "uri_encoded(query, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).",
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'%20!%22%23$%25%26\\'()*%2B,-./0123456789%3A%3B%3C%3D%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'",
}},
},
{
query: "uri_encoded(path, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).",
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'%20!%22%23$%25&\\'()*+,-./0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'",
}},
},
{
query: "uri_encoded(segment, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).",
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'%20!%22%23$%25&\\'()*+,-.%2F0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'",
}},
},
{
query: "uri_encoded(fragment, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).",
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'%20!%22%23$%25&\\'()*+,-./0123456789:;%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'",
}},
},
{
query: "uri_encoded(query, Decoded, '%20!%22%23$%25%26\\'()*%2B,-./0123456789%3A%3B%3C%3D%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~+').",
wantSuccess: true,
wantResult: []types.TermResults{{
"Decoded": "' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~+'",
}},
},
{
query: "uri_encoded(path, Decoded, '%20!%22%23$%25&\\'()*+,-./0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~').",
wantSuccess: true,
wantResult: []types.TermResults{{
"Decoded": "' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~'",
}},
},
{
query: "uri_encoded(segment, Decoded, '%20!%22%23$%25&\\'()*+,-.%2F0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~').",
wantSuccess: true,
wantResult: []types.TermResults{{
"Decoded": "' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~'",
}},
},
{
query: "uri_encoded(fragment, Decoded, '%20!%22%23$%25&\\'()*+,-./0123456789:;%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~').",
wantSuccess: true,
wantResult: []types.TermResults{{
"Decoded": "' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~'",
}},
},
{
query: "uri_encoded(query, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~+', '%20!%22%23$%25%26\\'()*%2B,-./0123456789%3A%3B%3C%3D%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~+').",
wantSuccess: true,
wantResult: []types.TermResults{{}},
},
{
query: "uri_encoded(path, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', '%20!%22%23$%25&\\'()*+,-./0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~').",
wantSuccess: true,
wantResult: []types.TermResults{{}},
},
{
query: "uri_encoded(segment, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', '%20!%22%23$%25&\\'()*+,-.%2F0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~').",
wantSuccess: true,
wantResult: []types.TermResults{{}},
},
{
query: "uri_encoded(fragment, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', '%20!%22%23$%25&\\'()*+,-./0123456789:;%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~').",
wantSuccess: true,
wantResult: []types.TermResults{{}},
},
{
query: "uri_encoded(fragment, 'foo bar', 'bar%20foo').",
wantSuccess: false,
},
{
query: "uri_encoded(Var, 'foo bar', 'bar%20foo').",
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid component type: engine.Variable, should be Atom"),
},
{
query: "uri_encoded(path, compound(2), 'bar%20foo').",
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid decoded type: *engine.compound, should be Variable or Atom"),
},
{
query: "uri_encoded(path, 'foo', compound(2)).",
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid encoded type: *engine.compound, should be Variable or Atom"),
},
{
query: "uri_encoded(path, Decoded, 'bar%%3foo').",
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid URL escape \"%%%%3\""),
},
}
for nc, tc := range cases {
Convey(fmt.Sprintf("Given the query #%d: %s", nc, tc.query), func() {
Convey("and a context", func() {
db := tmdb.NewMemDB()
stateStore := store.NewCommitMultiStore(db)
ctx := sdk.NewContext(stateStore, tmproto.Header{}, false, log.NewNopLogger())

Convey("and a vm", func() {
interpreter := testutil.NewLightInterpreterMust(ctx)
interpreter.Register3(engine.NewAtom("uri_encoded"), URIEncoded)

err := interpreter.Compile(ctx, tc.program)
So(err, ShouldBeNil)

Convey("When the predicate is called", func() {
sols, err := interpreter.QueryContext(ctx, tc.query)

Convey("Then the error should be nil", func() {
So(err, ShouldBeNil)
So(sols, ShouldNotBeNil)

Convey("and the bindings should be as expected", func() {
var got []types.TermResults
for sols.Next() {
m := types.TermResults{}
err := sols.Scan(m)
So(err, ShouldBeNil)

got = append(got, m)
}
if tc.wantError != nil {
So(sols.Err(), ShouldNotBeNil)
So(sols.Err().Error(), ShouldEqual, tc.wantError.Error())
} else {
So(sols.Err(), ShouldBeNil)

if tc.wantSuccess {
So(len(got), ShouldBeGreaterThan, 0)
So(len(got), ShouldEqual, len(tc.wantResult))
for iGot, resultGot := range got {
for varGot, termGot := range resultGot {
So(testutil.ReindexUnknownVariables(termGot), ShouldEqual, tc.wantResult[iGot][varGot])
}
}
} else {
So(len(got), ShouldEqual, 0)
}
}
})
})
})
})
})
})
}
})
}

0 comments on commit f7b9d5d

Please sign in to comment.