Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🧠 Logic: 🔗 implement uri_encoded/3 #355

Merged
merged 7 commits into from
May 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions x/logic/interpreter/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ var registry = map[string]any{
"bech32_address/2": predicate.Bech32Address,
"source_file/1": predicate.SourceFile,
"json_prolog/2": predicate.JSONProlog,
"uri_encoded/3": predicate.URIEncoded,
}

// RegistryNames is the list of the predicate names in the Registry.
Expand Down
171 changes: 171 additions & 0 deletions x/logic/predicate/uri.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
package predicate

import (
"context"
"fmt"
"net/url"

"github.com/ichiban/prolog/engine"
"github.com/okp4/okp4d/x/logic/util"
)

type Component string

const (
QueryComponent Component = "query"
FragmentComponent Component = "fragment"
PathComponent Component = "path"
SegmentComponent Component = "segment"
)

const upperhex = "0123456789ABCDEF"

func NewComponent(v string) (Component, error) {
switch v {
case string(QueryComponent):
return QueryComponent, nil
case string(FragmentComponent):
return FragmentComponent, nil
case string(PathComponent):
return PathComponent, nil
case string(SegmentComponent):
return SegmentComponent, nil
default:
return "", fmt.Errorf("invalid component name %s, expected `query`, `fragment`, `path` or `segment`", v)
}
}

// Return true if the specified character should be escaped when
// appearing in a URL string depending on the targeted URI component, according
// to [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986).
//
// This is a re-implementation of url.shouldEscape of net/url. Needed since the native implementation doesn't follow
// exactly the [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986) and also because the implementation of component
// escaping is only public for Path component (who in reality is SegmentPath component) and Query component. Otherwise,
// escaping doesn't fit to the SWI-Prolog escaping due to RFC discrepancy between those two implementations.
//
// Another discrepancy is on the query component that escape the space character ' ' to a '+' (plus sign) on the
// golang library and to '%20' escaping on the
// [SWI-Prolog implementation](https://www.swi-prolog.org/pldoc/doc/_SWI_/library/uri.pl?show=src#uri_encoded/3).
//
// Here some reported issues on golang about the RFC non-compliance.
// - golang.org/issue/5684.
// - https://github.com/golang/go/issues/27559
//
//nolint:gocognit
func shouldEscape(c byte, comp Component) bool {
// §2.3 Unreserved characters (alphanum)
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
return false
}

switch c {
case '-', '.', '_', '~': // §2.3 Unreserved characters (mark)
return false

case '!', '$', '&', '\'', '(', ')', '*', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
// Different sections of the URL allow a few of
// the reserved characters to appear unescaped.
switch comp {
case PathComponent: // §3.3
return c == '?' || c == ':'

case SegmentComponent: // §3.3
// The RFC allows : @ & = + $
// meaning to individual path segments.
return c == '/' || c == '?' || c == ':'

case QueryComponent: // §3.4
return c == '&' || c == '+' || c == ':' || c == ';' || c == '='
case FragmentComponent: // §4.1
return false
}
}

// Everything else must be escaped.
return true
}

// Escape return the given input string by adding percent encoding depending on the current component where it's
// supposed to be put.
// This is a re-implementation of native url.escape. See shouldEscape() comment's for more details.
func (comp Component) Escape(v string) string {
hexCount := 0
for i := 0; i < len(v); i++ {
ch := v[i]
if shouldEscape(ch, comp) {
hexCount++
}
}

if hexCount == 0 {
return v
}

var buf [64]byte
var t []byte

required := len(v) + 2*hexCount
if required <= len(buf) {
t = buf[:required]
} else {
t = make([]byte, required)
}

j := 0
for i := 0; i < len(v); i++ {
switch ch := v[i]; {
case shouldEscape(ch, comp):
t[j] = '%'
t[j+1] = upperhex[ch>>4]
t[j+2] = upperhex[ch&15]
j += 3
default:
t[j] = v[i]
j++
}
}
return string(t)
}

func (comp Component) Unescape(v string) (string, error) {
return url.PathUnescape(v)
}

func URIEncoded(vm *engine.VM, component, decoded, encoded engine.Term, cont engine.Cont, env *engine.Env) *engine.Promise {
return engine.Delay(func(ctx context.Context) *engine.Promise {
var comp Component
switch c := env.Resolve(component).(type) {
case engine.Atom:
cc, err := NewComponent(c.String())
if err != nil {
return engine.Error(fmt.Errorf("uri_encoded/3: %w", err))
}
comp = cc
default:
return engine.Error(fmt.Errorf("uri_encoded/3: invalid component type: %T, should be Atom", component))
}

var dec string
switch d := env.Resolve(decoded).(type) {
case engine.Variable:
case engine.Atom:
dec = comp.Escape(d.String())
default:
return engine.Error(fmt.Errorf("uri_encoded/3: invalid decoded type: %T, should be Variable or Atom", d))
}

switch e := env.Resolve(encoded).(type) {
case engine.Variable:
return engine.Unify(vm, encoded, util.StringToTerm(dec), cont, env)
case engine.Atom:
enc, err := comp.Unescape(e.String())
if err != nil {
return engine.Error(fmt.Errorf("uri_encoded/3: %w", err))
}
return engine.Unify(vm, decoded, util.StringToTerm(enc), cont, env)
default:
return engine.Error(fmt.Errorf("uri_encoded/3: invalid encoded type: %T, should be Variable or Atom", e))
}
})
}
219 changes: 219 additions & 0 deletions x/logic/predicate/uri_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
//nolint:gocognit,lll
package predicate

import (
"fmt"
"testing"

tmdb "github.com/cometbft/cometbft-db"
"github.com/cometbft/cometbft/libs/log"
tmproto "github.com/cometbft/cometbft/proto/tendermint/types"
"github.com/cosmos/cosmos-sdk/store"
sdk "github.com/cosmos/cosmos-sdk/types"
"github.com/ichiban/prolog/engine"
"github.com/okp4/okp4d/x/logic/testutil"
"github.com/okp4/okp4d/x/logic/types"

. "github.com/smartystreets/goconvey/convey"
)

func TestURIEncoded(t *testing.T) {
Convey("Given a test cases", t, func() {
cases := []struct {
program string
query string
wantResult []types.TermResults
wantError error
wantSuccess bool
}{
{
query: `uri_encoded(hey, foo, Decoded).`,
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid component name hey, expected `query`, `fragment`, `path` or `segment`"),
},
{
query: `uri_encoded(path, Decoded, foo).`,
wantSuccess: true,
wantResult: []types.TermResults{{
"Decoded": "foo",
}},
},
{
query: `uri_encoded(path, Decoded, 'foo%20bar').`,
wantSuccess: true,
wantResult: []types.TermResults{{
"Decoded": "'foo bar'",
}},
},
{
query: `uri_encoded(path, foo, Encoded).`,
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "foo",
}},
},
{
query: `uri_encoded(query, 'foo bar', Encoded).`,
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'foo%20bar'",
}},
},
{
query: "uri_encoded(query, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).",
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'%20!%22%23$%25%26\\'()*%2B,-./0123456789%3A%3B%3C%3D%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'",
}},
},
{
query: "uri_encoded(path, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).",
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'%20!%22%23$%25&\\'()*+,-./0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'",
}},
},
{
query: "uri_encoded(segment, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).",
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'%20!%22%23$%25&\\'()*+,-.%2F0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'",
}},
},
{
query: "uri_encoded(fragment, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).",
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'%20!%22%23$%25&\\'()*+,-./0123456789:;%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'",
}},
},
{
query: "uri_encoded(query, Decoded, '%20!%22%23$%25%26\\'()*%2B,-./0123456789%3A%3B%3C%3D%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~+').",
wantSuccess: true,
wantResult: []types.TermResults{{
"Decoded": "' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~+'",
}},
},
{
query: "uri_encoded(path, Decoded, '%20!%22%23$%25&\\'()*+,-./0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~').",
wantSuccess: true,
wantResult: []types.TermResults{{
"Decoded": "' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~'",
}},
},
{
query: "uri_encoded(segment, Decoded, '%20!%22%23$%25&\\'()*+,-.%2F0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~').",
wantSuccess: true,
wantResult: []types.TermResults{{
"Decoded": "' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~'",
}},
},
{
query: "uri_encoded(fragment, Decoded, '%20!%22%23$%25&\\'()*+,-./0123456789:;%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~').",
wantSuccess: true,
wantResult: []types.TermResults{{
"Decoded": "' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~'",
}},
},
{
query: "uri_encoded(query, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~+', '%20!%22%23$%25%26\\'()*%2B,-./0123456789%3A%3B%3C%3D%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~+').",
wantSuccess: true,
wantResult: []types.TermResults{{}},
},
{
query: "uri_encoded(path, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', '%20!%22%23$%25&\\'()*+,-./0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~').",
wantSuccess: true,
wantResult: []types.TermResults{{}},
},
{
query: "uri_encoded(segment, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', '%20!%22%23$%25&\\'()*+,-.%2F0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~').",
wantSuccess: true,
wantResult: []types.TermResults{{}},
},
{
query: "uri_encoded(fragment, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', '%20!%22%23$%25&\\'()*+,-./0123456789:;%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~').",
wantSuccess: true,
wantResult: []types.TermResults{{}},
},
{
query: "uri_encoded(fragment, 'foo bar', 'bar%20foo').",
wantSuccess: false,
},
{
query: "uri_encoded(Var, 'foo bar', 'bar%20foo').",
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid component type: engine.Variable, should be Atom"),
},
{
query: "uri_encoded(path, compound(2), 'bar%20foo').",
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid decoded type: *engine.compound, should be Variable or Atom"),
},
{
query: "uri_encoded(path, 'foo', compound(2)).",
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid encoded type: *engine.compound, should be Variable or Atom"),
},
{
query: "uri_encoded(path, Decoded, 'bar%%3foo').",
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid URL escape \"%%%%3\""),
},
}
for nc, tc := range cases {
Convey(fmt.Sprintf("Given the query #%d: %s", nc, tc.query), func() {
Convey("and a context", func() {
db := tmdb.NewMemDB()
stateStore := store.NewCommitMultiStore(db)
ctx := sdk.NewContext(stateStore, tmproto.Header{}, false, log.NewNopLogger())

Convey("and a vm", func() {
interpreter := testutil.NewLightInterpreterMust(ctx)
interpreter.Register3(engine.NewAtom("uri_encoded"), URIEncoded)

err := interpreter.Compile(ctx, tc.program)
So(err, ShouldBeNil)

Convey("When the predicate is called", func() {
sols, err := interpreter.QueryContext(ctx, tc.query)

Convey("Then the error should be nil", func() {
So(err, ShouldBeNil)
So(sols, ShouldNotBeNil)

Convey("and the bindings should be as expected", func() {
var got []types.TermResults
for sols.Next() {
m := types.TermResults{}
err := sols.Scan(m)
So(err, ShouldBeNil)

got = append(got, m)
}
if tc.wantError != nil {
So(sols.Err(), ShouldNotBeNil)
So(sols.Err().Error(), ShouldEqual, tc.wantError.Error())
} else {
So(sols.Err(), ShouldBeNil)

if tc.wantSuccess {
So(len(got), ShouldBeGreaterThan, 0)
So(len(got), ShouldEqual, len(tc.wantResult))
for iGot, resultGot := range got {
for varGot, termGot := range resultGot {
So(testutil.ReindexUnknownVariables(termGot), ShouldEqual, tc.wantResult[iGot][varGot])
}
}
} else {
So(len(got), ShouldEqual, 0)
}
}
})
})
})
})
})
})
}
})
}