Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify interface and implementation of BulkInsertWithAssigningIDs #53

Open
wants to merge 12 commits into
base: epic-assigning-ids
Choose a base branch
from
83 changes: 27 additions & 56 deletions bulk_insert.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,79 +25,58 @@ import (
func BulkInsert(db *gorm.DB, objects []interface{}, chunkSize int, excludeColumns ...string) error {
// Split records with specified size not to exceed Database parameter limit
for _, objSet := range splitObjects(objects, chunkSize) {
if err := insertObjSet(db, objSet, excludeColumns...); err != nil {
_, err := insertObjSet(db, objSet, excludeColumns...)
if err != nil {
return err
}
}
return nil
}

// BulkInsertWithAssigningIDs executes the query to insert multiple records at once.
// it will scan the result of `returning id` or `returning *` to [returnedValue] after every insert.
// it's necessary to set "gorm:insert_option"="returning id" in *gorm.DB
//
// [returnedValue] slice of primary_key or model, must be a *[]uint(for integer), *[]string(for uuid), *[]struct(for `returning *`)
// BulkInsertWithReturningValues executes the query to insert multiple records at once.
// This will scan the returned value into `dstValues`.
// It's necessary to set "gorm:insert_option" to execute "returning" query.
//
// [objects] must be a slice of struct.
//
// [dstValues] must be a point to a slice of struct. Struct properties must correspond to returning results.
//
// [chunkSize] is a number of variables embedded in query. To prevent the error which occurs embedding a large number of variables at once
// and exceeds the limit of prepared statement. Larger size normally leads to better performance, in most cases 2000 to 3000 is reasonable.
//
// [excludeColumns] is column names to exclude from insert.
func BulkInsertWithAssigningIDs(db *gorm.DB, returnedValue interface{}, objects []interface{}, chunkSize int, excludeColumns ...string) error {
typ := reflect.TypeOf(returnedValue)
if typ.Kind() != reflect.Ptr || typ.Elem().Kind() != reflect.Slice {
return errors.New("returningId must be a slice ptr")
func BulkInsertWithReturningValues(db *gorm.DB, objects []interface{}, returnedVals interface{}, chunkSize int, excludeColumns ...string) error {
bombsimon marked this conversation as resolved.
Show resolved Hide resolved
typ := reflect.TypeOf(returnedVals)
if typ.Kind() != reflect.Ptr || typ.Elem().Kind() != reflect.Slice || typ.Elem().Elem().Kind() != reflect.Struct {
return errors.New("returnedVals must be a pointer to a slice of struct")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can also accept *[]uint i think, it will be more useful.

Copy link
Owner Author

@t-tiger t-tiger Nov 9, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not a fan of accepting a different type in a single method, rather I'd like to have a dedicated method if we accept a slice.

The reason why I changed not to accept slice is, returning statement is not only for PK, we can get values with multiple fields. Therefore, I thought we should support a wide range of use cases. However, I guess one of the major use cases is returning id that you proposed in the previous PR, so I can re-consider having a dedicated method like this.

func BulkInsertWithReturningIDs(db *gorm.DB, objects []interface{}, ids interface{}, chunkSize int, excludeColumns ...string) error

// Or we should rename to use "PK" instead of "ID"? But PK is not always a single column🤔
func BulkInsertWithReturningPKs(db *gorm.DB, objects []interface{}, pks interface{}, chunkSize int, excludeColumns ...string) error

What do you think?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about

func BulkInsertPluckingReturning(db *gorm.DB, objects []interface{}, column string, columnData interface{}, chunkSize int, excludeColumns ...string) error

Copy link
Owner Author

@t-tiger t-tiger Nov 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I feel BulkInsertWithReturningIDs seems better. If the caller wants to get values except for id column, it's still able to use BulkInsertWithReturningValues. If you agree to this, I'll implement as soon as possible and merge it.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea

}

allIds := reflect.Indirect(reflect.ValueOf(returnedValue))
typ = allIds.Type()

// Deference value of slice
valueTyp := typ.Elem()
for valueTyp.Kind() == reflect.Ptr {
valueTyp = valueTyp.Elem()
}
refDst := reflect.Indirect(reflect.ValueOf(returnedVals))
typ = refDst.Type()

// Split records with specified size not to exceed Database parameter limit
for _, objSet := range splitObjects(objects, chunkSize) {
returnValueSlice := reflect.New(typ)
var scanReturningId func(*gorm.DB) error
switch valueTyp.Kind() {
case reflect.Struct:
// If user want to scan `returning *` with returnedValue=[]struct{...}
scanReturningId = func(db *gorm.DB) error {
return db.Scan(returnValueSlice.Interface()).Error
}
default:
// If user want to scan primary key `returning pk` with returnedValue=[]struct{...}
pk := db.NewScope(objects[0]).PrimaryKey()
scanReturningId = func(db *gorm.DB) error {
return db.Pluck(pk, returnValueSlice.Interface()).Error
}
db, err := insertObjSet(db, objSet, excludeColumns...)
if err != nil {
return err
}

if err := insertObjSetWithCallback(db, objSet, scanReturningId, excludeColumns...); err != nil {
scanned := reflect.New(typ)
if err := db.Scan(scanned.Interface()).Error; err != nil {
return err
}

allIds.Set(reflect.AppendSlice(allIds, returnValueSlice.Elem()))
refDst.Set(reflect.AppendSlice(refDst, scanned.Elem()))
}
return nil
}

func insertObjSet(db *gorm.DB, objects []interface{}, excludeColumns ...string) error {
return insertObjSetWithCallback(db, objects, nil, excludeColumns...)
}

func insertObjSetWithCallback(db *gorm.DB, objects []interface{}, postInsert func(*gorm.DB) error, excludeColumns ...string) error {
func insertObjSet(db *gorm.DB, objects []interface{}, excludeColumns ...string) (*gorm.DB, error) {
bombsimon marked this conversation as resolved.
Show resolved Hide resolved
if len(objects) == 0 {
return nil
return db, nil
}

firstAttrs, err := extractMapValue(objects[0], excludeColumns)
if err != nil {
return err
return db, err
}

attrSize := len(firstAttrs)
Expand All @@ -116,12 +95,12 @@ func insertObjSetWithCallback(db *gorm.DB, objects []interface{}, postInsert fun
for _, obj := range objects {
objAttrs, err := extractMapValue(obj, excludeColumns)
if err != nil {
return err
return db, err
}

// If object sizes are different, SQL statement loses consistency
if len(objAttrs) != attrSize {
return errors.New("attribute sizes are inconsistent")
return db, errors.New("attribute sizes are inconsistent")
}

scope := db.NewScope(obj)
Expand All @@ -144,7 +123,7 @@ func insertObjSetWithCallback(db *gorm.DB, objects []interface{}, postInsert fun
if val, ok := db.Get("gorm:insert_option"); ok {
strVal, ok := val.(string)
if !ok {
return errors.New("gorm:insert_option should be a string")
return db, errors.New("gorm:insert_option should be a string")
}
insertOption = strVal
}
Expand All @@ -157,18 +136,10 @@ func insertObjSetWithCallback(db *gorm.DB, objects []interface{}, postInsert fun
))

db = db.Raw(mainScope.SQL, mainScope.SQLVars...)

if err := db.Error; err != nil {
return err
}

if postInsert != nil {
if err := postInsert(db); err != nil {
return err
}
return db, err
}

return nil
return db, nil
}

// Obtain columns and values required for insert from interface
Expand Down
128 changes: 50 additions & 78 deletions bulk_insert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,55 @@ type fakeTable struct {
UpdatedAt time.Time
}

func TestBulkInsertWithReturningValues(t *testing.T) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still think this test case should be simplified without column options and fields not needed.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean, we don't need insert_option related to #53 (comment) ? Sorry, I could not understand very well.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a super nit but I just mean the test is copied when testing custom field tags so this test is also using the field tag gorm:"column:ThisIsCamelCase". I think that's confusing since we already test that feature in a different test, this test could use a simpler struct.

type Table struct {
	ID        uint `gorm:"primary_key;auto_increment"`
	ColumnOne string
	ColumnTwo string
}

And then just refer to the default column names column_one and column_two.

type Table struct {
ID uint `gorm:"primary_key;auto_increment"`
RegularColumn string
Custom string `gorm:"column:ThisIsCamelCase"`
}

db, mock, err := sqlmock.New()
require.NoError(t, err)

defer db.Close()

gdb, err := gorm.Open("mysql", db)
require.NoError(t, err)

mock.ExpectQuery(
"INSERT INTO `tables` \\(`ThisIsCamelCase`, `regular_column`\\)",
Comment on lines +43 to +44
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to test with actual DBs using Docker containers as a follow-up task.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personally I don't think that's necessary since this supports multiple dialects so it would require multiple containers. Also the only goal for this project is to generate SQL, not handle or parse the dialect nor handle connections.

An example directory with a docker-compose file starting the DBMs to use as integration test might make sense but I don't think the unit tests should rely on that.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have been testing manually to see if it works in various cases like ID is int or string, and composite PK, and so on. But when I make a change like this, I feel it's inconvenient without automated tests.

An example directory with a docker-compose file starting the DBMs to use as integration test might make sense but I don't think the unit tests should rely on that.

Can I ask why we should not use DBMs for unit tests? With docker-compose, we can easily test with DBs, and since there are only two public methods, I don't feel integration tests are necessary.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm just used to unit tests being small and quick, testing the implementation but not the dependencies. But it's easy enough to use Docker for unit tests as well so if you feel it makes sense go for it!

Are you planning on testing all DBMs that Gorm support or only a subset?

Copy link
Owner Author

@t-tiger t-tiger Nov 11, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you planning on testing all DBMs that Gorm support or only a subset?

I'm planning to test with MySQL and PostgreSQL since most of the users likely to use, I guess.

).WithArgs(
"first custom", "first regular",
"second custom", "second regular",
).WillReturnRows(
sqlmock.NewRows([]string{"id", "ThisIsCamelCase", "regular_column"}).
AddRow(1, "first custom", "first regular").
AddRow(2, "second custom", "second regular"),
)

var returnedVals []Table
obj := []interface{}{
Table{
RegularColumn: "first regular",
Custom: "first custom",
},
Table{
RegularColumn: "second regular",
Custom: "second custom",
},
}

gdb = gdb.Set("gorm_insert_option", "RETURNING id, ThisIsCamelCase, regular_column")
err = BulkInsertWithReturningValues(gdb, obj, &returnedVals, 1000)
require.NoError(t, err)

expected := []Table{
{ID: 1, RegularColumn: "first regular", Custom: "first custom"},
{ID: 2, RegularColumn: "second regular", Custom: "second custom"},
}
assert.Equal(t, expected, returnedVals)
}

func Test_extractMapValue(t *testing.T) {
collectKeys := func(val map[string]interface{}) []string {
keys := make([]string, 0, len(val))
Expand Down Expand Up @@ -99,7 +148,7 @@ func Test_insertObject(t *testing.T) {
sqlmock.NewResult(1, 1),
)

err = insertObjSet(gdb, []interface{}{
_, err = insertObjSet(gdb, []interface{}{
Table{
RegularColumn: "first regular",
Custom: "first custom",
Expand All @@ -113,59 +162,6 @@ func Test_insertObject(t *testing.T) {
require.NoError(t, err)
}

func Test_insertObjSetWithCallback(t *testing.T) {
type Table struct {
ID uint `gorm:"primary_key;auto_increment"`
RegularColumn string
Custom string `gorm:"column:ThisIsCamelCase"`
}

db, mock, err := sqlmock.New()
require.NoError(t, err)

defer db.Close()

gdb, err := gorm.Open("mysql", db)
require.NoError(t, err)

mock.ExpectQuery(
"INSERT INTO `tables` \\(`ThisIsCamelCase`, `regular_column`\\)",
).WithArgs(
"first custom", "first regular",
"second custom", "second regular",
).WillReturnRows(
sqlmock.NewRows([]string{"id"}).AddRow(1).AddRow(2),
)

returningIdScope := func(db *gorm.DB) *gorm.DB {
return db.Set("gorm:insert_option", "returning id")
}

err = insertObjSetWithCallback(gdb.Scopes(returningIdScope), []interface{}{
Table{
RegularColumn: "first regular",
Custom: "first custom",
},
Table{
RegularColumn: "second regular",
Custom: "second custom",
},
}, func(db *gorm.DB) error {
var ids []uint
if err := db.Pluck("id", &ids).Error; err != nil {
return err
}
require.Len(t, ids, 2, "must return 2 ids")
return nil
})

if err != nil {
t.Fatal(err)
}

require.NoError(t, err)
}

func Test_fieldIsAutoIncrement(t *testing.T) {
type explicitSetTable struct {
ID int `gorm:"column:id;auto_increment"`
Expand Down Expand Up @@ -195,27 +191,3 @@ func Test_fieldIsAutoIncrement(t *testing.T) {
}
}
}

func Test_fieldIsPrimaryAndBlank(t *testing.T) {
type notPrimaryTable struct {
Dummy int
}
type primaryKeyTable struct {
ID int `gorm:"column:id;primary_key"`
}

cases := []struct {
Value interface{}
Expected bool
}{
{notPrimaryTable{Dummy: 0}, false},
{notPrimaryTable{Dummy: 1}, false},
{primaryKeyTable{ID: 0}, true},
{primaryKeyTable{ID: 1}, false},
}
for _, c := range cases {
for _, field := range (&gorm.Scope{Value: c.Value}).Fields() {
assert.Equal(t, fieldIsPrimaryAndBlank(field), c.Expected)
}
}
}