From b74380ee2c89115fa6d3b30b2ca5851d6a0a7286 Mon Sep 17 00:00:00 2001 From: jchappelow <140431406+jchappelow@users.noreply.github.com> Date: Tue, 19 Mar 2024 10:10:48 -0500 Subject: [PATCH] engine: create a max procedure call stack depth (#609) --- common/testdata/procedures.go | 33 +++++++++++++++++ common/testdata/schema.go | 3 ++ extensions/precompiles/actions.go | 19 ++++++---- internal/engine/execution/dataset.go | 6 ++-- internal/engine/execution/execution_test.go | 38 ++++++++++++++++++++ internal/engine/execution/global.go | 1 + internal/engine/execution/procedure.go | 39 ++++++++++++++++++--- 7 files changed, 126 insertions(+), 13 deletions(-) diff --git a/common/testdata/procedures.go b/common/testdata/procedures.go index 581cc1276..f78a969b0 100644 --- a/common/testdata/procedures.go +++ b/common/testdata/procedures.go @@ -82,4 +82,37 @@ var ( "SELECT * FROM users;", }, } + + // ProcedureRecursive is a recursive procedure that should hit a max stack + // depth error before using the system's max stack memory, which is fatal. + ProcedureRecursive = &types.Procedure{ + Name: "recursive_procedure", + Args: []string{"$id", "$a", "$b"}, + Public: true, + Statements: []string{ + "recursive_procedure($id, $a, $b);", + }, + } + + // ProcedureRecursiveSneakyA is procedure that calls + // ProcedureRecursiveSneakyB, which calls ProcedureRecursiveSneakyA, which + // calls ProcedureRecursiveSneakyB, which calls... + ProcedureRecursiveSneakyA = &types.Procedure{ + Name: "recursive_procedure_a", + Args: []string{}, + Public: true, + Statements: []string{ + "recursive_procedure_b();", + }, + } + + // ProcedureRecursiveSneakyB is procedure that calls ProcedureRecursiveSneakyA. + ProcedureRecursiveSneakyB = &types.Procedure{ + Name: "recursive_procedure_b", + Args: []string{}, + Public: true, + Statements: []string{ + "recursive_procedure_a();", + }, + } ) diff --git a/common/testdata/schema.go b/common/testdata/schema.go index def0d3a45..409710a55 100644 --- a/common/testdata/schema.go +++ b/common/testdata/schema.go @@ -20,6 +20,9 @@ var TestSchema = &types.Schema{ ProcedureAdminDeleteUser, ProcedureCallsPrivate, ProcedurePrivate, + ProcedureRecursive, + ProcedureRecursiveSneakyA, + ProcedureRecursiveSneakyB, }, Extensions: []*types.Extension{}, } diff --git a/extensions/precompiles/actions.go b/extensions/precompiles/actions.go index 049e43c51..0309bebef 100644 --- a/extensions/precompiles/actions.go +++ b/extensions/precompiles/actions.go @@ -62,6 +62,10 @@ type ProcedureContext struct { Procedure string // Result is the result of the most recent SQL query. Result *sql.ResultSet + + // StackDepth tracks the current depth of the procedure call stack. It is + // incremented each time a procedure calls another procedure. + StackDepth int } // SetValue sets a value in the scope. @@ -96,15 +100,16 @@ func (p *ProcedureContext) Values() map[string]any { // NewScope creates a new procedure context for a child procedure. // It will not inherit the values or last result from the parent. -// It will inherit the dbid and procedure from the parent. +// It will inherit the dbid, procedure, and stack depth from the parent. func (p *ProcedureContext) NewScope() *ProcedureContext { return &ProcedureContext{ - Ctx: p.Ctx, - Signer: p.Signer, - Caller: p.Caller, - values: make(map[string]any), - DBID: p.DBID, - Procedure: p.Procedure, + Ctx: p.Ctx, + Signer: p.Signer, + Caller: p.Caller, + values: make(map[string]any), + DBID: p.DBID, + Procedure: p.Procedure, + StackDepth: p.StackDepth, } } diff --git a/internal/engine/execution/dataset.go b/internal/engine/execution/dataset.go index 423045547..e457cf25b 100644 --- a/internal/engine/execution/dataset.go +++ b/internal/engine/execution/dataset.go @@ -8,7 +8,7 @@ import ( ) // baseDataset is a deployed database schema. -// It implements the Dataset interface. +// It implements the precompiles.Instance interface. type baseDataset struct { // schema is the schema of the dataset. schema *common.Schema @@ -23,9 +23,11 @@ type baseDataset struct { global *GlobalContext } +var _ precompiles.Instance = (*baseDataset)(nil) + // Call calls a procedure from the dataset. // If the procedure is not public, it will return an error. -// It implements the Namespace interface. +// It satisfies precompiles.Instance. func (d *baseDataset) Call(caller *precompiles.ProcedureContext, app *common.App, method string, inputs []any) ([]any, error) { proc, ok := d.procedures[method] if !ok { diff --git a/internal/engine/execution/execution_test.go b/internal/engine/execution/execution_test.go index d5785b211..de60482f4 100644 --- a/internal/engine/execution/execution_test.go +++ b/internal/engine/execution/execution_test.go @@ -118,6 +118,44 @@ func Test_Execution(t *testing.T) { assert.Error(t, err) }, }, + { + name: "call a recursive procedure", + fn: func(t *testing.T, eng *GlobalContext) { + ctx := context.Background() + db := newDB(false) + + err := eng.CreateDataset(ctx, db, testdata.TestSchema, testdata.TestSchema.Owner) + assert.NoError(t, err) + + _, err = eng.Procedure(ctx, db, &common.ExecutionData{ + Dataset: testdata.TestSchema.DBID(), + Procedure: testdata.ProcedureRecursive.Name, + Args: []any{"id000000", "asdfasdfasdfasdf", "bigbigbigbigbigbigbigbigbigbig"}, + Signer: testdata.TestSchema.Owner, + Caller: string(testdata.TestSchema.Owner), + }) + assert.ErrorIs(t, err, ErrMaxStackDepth) + }, + }, + { + name: "call a procedure that hits max call stack depth less directly", + fn: func(t *testing.T, eng *GlobalContext) { + ctx := context.Background() + db := newDB(false) + + err := eng.CreateDataset(ctx, db, testdata.TestSchema, testdata.TestSchema.Owner) + assert.NoError(t, err) + + _, err = eng.Procedure(ctx, db, &common.ExecutionData{ + Dataset: testdata.TestSchema.DBID(), + Procedure: testdata.ProcedureRecursiveSneakyA.Name, + Args: []any{}, + Signer: testdata.TestSchema.Owner, + Caller: string(testdata.TestSchema.Owner), + }) + assert.ErrorIs(t, err, ErrMaxStackDepth) + }, + }, { name: "call a non-view action fails if not mutative; view action succeeds", fn: func(t *testing.T, eng *GlobalContext) { diff --git a/internal/engine/execution/global.go b/internal/engine/execution/global.go index 8f3452403..5cd514c4b 100644 --- a/internal/engine/execution/global.go +++ b/internal/engine/execution/global.go @@ -153,6 +153,7 @@ func (g *GlobalContext) Procedure(ctx context.Context, tx sql.DB, options *commo Caller: options.Caller, DBID: options.Dataset, Procedure: options.Procedure, + // starting with stack depth 0, increment in each action call } _, err = dataset.Call(procedureCtx, &common.App{ diff --git a/internal/engine/execution/procedure.go b/internal/engine/execution/procedure.go index dd840529f..bf88e6d22 100644 --- a/internal/engine/execution/procedure.go +++ b/internal/engine/execution/procedure.go @@ -18,10 +18,28 @@ import ( "github.com/kwilteam/kwil-db/parse/sql/tree" ) +// MaxStackDepth is the limit on the number of nested procedure calls allowed. +// This is different from the Go call stack depth, which may be much higher as +// it depends on the program design. The value 1,000 was empirically selected to +// be a call stack size of about 1MB and to provide a very high limit that no +// reasonable schema would exceed (even 100 would suggest a poorly designed +// schema). +// +// In addition to exorbitant memory required to support a call stack 1 million +// deep (>1GB), the execution of that many calls can take seconds, even if they +// do nothing else. +// +// Progressive gas metering may be used in the future to limit resources used by +// abusive recursive calls, but a hard upper limit will likely be necessary +// unless the price of an action call is extremely expensive or rises +// exponentially at each level of the call stack. +const MaxStackDepth = 1000 + var ( - ErrIncorrectNumberOfArguments = fmt.Errorf("incorrect number of arguments") - ErrPrivateProcedure = fmt.Errorf("procedure is private") - ErrMutativeProcedure = fmt.Errorf("procedure is mutative") + ErrIncorrectNumberOfArguments = errors.New("incorrect number of arguments") + ErrPrivateProcedure = errors.New("procedure is private") + ErrMutativeProcedure = errors.New("procedure is mutative") + ErrMaxStackDepth = errors.New("max call stack depth reached") ) // instruction is an instruction that can be executed. @@ -264,9 +282,21 @@ type callMethod struct { // If no namespace is specified, the local namespace is used. // It will pass all arguments to the method, and assign the return values to the receivers. func (e *callMethod) execute(scope *precompiles.ProcedureContext, global *GlobalContext, db sql.DB) error { + // This instruction is about to call into another procedure in this dataset + // or another baseDataset. Check current call stack depth first. + if scope.StackDepth >= MaxStackDepth { + // NOTE: the actual Go call stack depth can be much more (e.g. more than + // double) the procedure call depth depending on program design and the + // number of Go function calls for each procedure. As of writing, it is + // approximately double plus a handful from the caller: + // + // var pcs [4096]uintptr; fmt.Println("call stack depth", runtime.Callers(0, pcs[:])) + return ErrMaxStackDepth + } + dataset, ok := global.datasets[scope.DBID] if !ok { - return fmt.Errorf(`dataset "%s" not found`, scope.DBID) + return fmt.Errorf("%w: %s", ErrDatasetNotFound, scope.DBID) } // getting these types to match the type required by the the ultimate DML @@ -291,6 +321,7 @@ func (e *callMethod) execute(scope *precompiles.ProcedureContext, global *Global var err error newScope := scope.NewScope() + newScope.StackDepth++ // not done by NewScope since (*baseDataset).Call would do it again // if no namespace is specified, we call a local procedure. // this can access public and private procedures.