From bba63d511b4603aa7d2a209b13776f8c98407ab8 Mon Sep 17 00:00:00 2001 From: Vee Zhang Date: Wed, 7 Dec 2022 15:57:23 +0800 Subject: [PATCH] feat: support alternative indices and default value --- README.md | 13 +++- README_zh-CN.md | 18 +++++- examples/v2/basic_type_test.csv | 5 ++ examples/v2/example.yaml | 13 ++++ pkg/config/config.go | 51 +++++++++++++--- pkg/config/config_test.go | 104 +++++++++++++++++++++++++++++++- 6 files changed, 190 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index aa938337..595fb054 100644 --- a/README.md +++ b/README.md @@ -203,13 +203,20 @@ schema: index: 1 - name: gender type: string + defaultValue: "male" - name: phone type: string nullable: true - - name: wechat + - name: email type: string nullable: true nullValue: "__NULL__" + - name: address + type: string + nullable: true + alternativeIndices: + - 7 + - 8 ``` ##### `schema.vertex.vid` @@ -233,7 +240,9 @@ Each tag contains the following two properties: * `type`: **Optional**. The property type, currently `bool`, `int`, `float`, `double`, `string`, `time`, `timestamp`, `date`, `datetime`, `geography`, `geography(point)`, `geography(linestring)` and `geography(polygon)` are supported. * `index`: **Optional**. The column number in the CSV file. * `nullable`: **Optional**. Whether this prop property can be `NULL`, optional values is `true` or `false`, default `false`. - * `nullValue`: **Optional**. If `nullable` is set to `true`, the property will be set to `NULL` when the value is equal to `nullValue`, default `""`. + * `nullValue`: **Optional**. Ignored when `nullable` is `false`. The property is set to `NULL` when the value is equal to `nullValue`, default `""`. + * `alternativeIndices`: **Optional**. Ignored when `nullable` is `false`. The property is fetched from csv according to the indices in order until not equal to `nullValue`. + * `defaultValue`: **Optional**. Ignored when `nullable` is `false`. The property default value, when all the values obtained by `index` and `alternativeIndices` are `nullValue`. > **NOTE**: The properties in the preceding `prop` parameter must be sorted in the **same** way as in the CSV data file. diff --git a/README_zh-CN.md b/README_zh-CN.md index 3539a58e..b0ee1b2c 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -177,6 +177,20 @@ schema: index: 1 - name: gender type: string + defaultValue: "male" + - name: phone + type: string + nullable: true + - name: email + type: string + nullable: true + nullValue: "__NULL__" + - name: address + type: string + nullable: true + alternativeIndices: + - 7 + - 8 ``` ##### `schema.vertex.vid` @@ -199,7 +213,9 @@ schema: - `type`:**必填**。属性类型,目前支持 `bool`、`int`、`float`、`double`、`timestamp`、`string`、`geography`、`geography(point)`、`geography(linestring)`和`geography(polygon)` 几种类型。 - `index`:**可选**。在 CSV 文件中的列标。 - `nullable`:**可选**。此属性是否可以为 `NULL`,可选 `true` 或者 `false`,默认值为 `false` 。 - - `nullValue`:**可选**。如果 `nullable` 设置为 `true`,则当值等于 `nullValue` 的时候属性将被设置为 `NULL` ,默认值为 `""`。 + - `nullValue`:**可选**。当 `nullable` 为 `false` 时被忽略。当值等于 `nullValue` 的时候属性将被设置为 `NULL` ,默认值为 `""`。 + - `alternativeIndices`: **可选**。当 `nullable` 为 `false` 时被忽略。该属性根据索引顺序从 csv 中获取,直到不等于 `nullValue`。 + - `defaultValue`: **可选**。当 `nullable` 为 `false` 时被忽略。属性默认值,当根据 `index` 和 `alternativeIndices` 获取的所有值为 `nullValue` 时设置默认值。 > **注意**:上述 `props` 中的属性描述**顺序**必须同数据文件中的对应数据排列顺序一致。 diff --git a/examples/v2/basic_type_test.csv b/examples/v2/basic_type_test.csv index 2961ca8f..008124a9 100644 --- a/examples/v2/basic_type_test.csv +++ b/examples/v2/basic_type_test.csv @@ -5,3 +5,8 @@ b4,false,3,2.0,3.3,0a bd b5,true,-3,2,3,abcd efg bnull1,,,,, bnull2,,,,,__NULL__ +bnull3,,-4,4,4.4, +bnull4,,-4,,4.4, +bnull5,,,4,, +bnull6,,,4,4.4, +bnull7,,,,4.4, diff --git a/examples/v2/example.yaml b/examples/v2/example.yaml index 292596f0..687b6a65 100644 --- a/examples/v2/example.yaml +++ b/examples/v2/example.yaml @@ -484,14 +484,21 @@ files: type: int index: 2 nullable: true + defaultValue: "0" - name: f type: float index: 3 nullable: true + alternativeIndices: + - 2 - name: d type: double index: 4 nullable: true + alternativeIndices: + - 3 + - 2 + defaultValue: "0" - name: s type: string index: 5 @@ -528,10 +535,16 @@ files: type: float index: 3 nullable: true + alternativeIndices: + - 2 + defaultValue: "0" - name: d type: double index: 4 nullable: true + alternativeIndices: + - 3 + - 2 - name: s type: string index: 5 diff --git a/pkg/config/config.go b/pkg/config/config.go index ccfad26e..4ed3dbc5 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -51,11 +51,13 @@ type NebulaClientSettings struct { } type Prop struct { - Name *string `json:"name" yaml:"name"` - Type *string `json:"type" yaml:"type"` - Index *int `json:"index" yaml:"index"` - Nullable bool `json:"nullable" yaml:"nullable"` - NullValue string `json:"nullValue" yaml:"nullValue"` + Name *string `json:"name" yaml:"name"` + Type *string `json:"type" yaml:"type"` + Index *int `json:"index" yaml:"index"` + Nullable bool `json:"nullable" yaml:"nullable"` + NullValue string `json:"nullValue" yaml:"nullValue"` + AlternativeIndices []int `json:"alternativeIndices" yaml:"alternativeIndices"` + DefaultValue *string `json:"defaultValue" yaml:"defaultValue"` } type VID struct { @@ -832,12 +834,12 @@ func (p *Prop) IsGeographyType() bool { } func (p *Prop) FormatValue(record base.Record) (string, error) { - if p.Index != nil && *p.Index >= len(record) { - return "", fmt.Errorf("Prop index %d out range %d of record(%v)", *p.Index, len(record), record) + r, isNull, err := p.getValue(record) + if err != nil { + return "", err } - r := record[*p.Index] - if p.Nullable && r == p.NullValue { - return dbNULL, nil + if isNull { + return r, err } if p.IsStringType() { return fmt.Sprintf("%q", r), nil @@ -856,6 +858,35 @@ func (p *Prop) FormatValue(record base.Record) (string, error) { return r, nil } +func (p *Prop) getValue(record base.Record) (string, bool, error) { + if p.Index != nil && *p.Index >= len(record) { + return "", false, fmt.Errorf("Prop index %d out range %d of record(%v)", *p.Index, len(record), record) + } + + r := record[*p.Index] + if !p.Nullable { + return r, false, nil + } + + if r != p.NullValue { + return r, false, nil + } + + for _, idx := range p.AlternativeIndices { + if idx >= len(record) { + return "", false, fmt.Errorf("Prop index %d out range %d of record(%v)", idx, len(record), record) + } + r = record[idx] + if r != p.NullValue { + return r, false, nil + } + } + if p.DefaultValue != nil { + return *p.DefaultValue, false, nil + } + return dbNULL, true, nil +} + func (p *Prop) String(prefix string) string { return fmt.Sprintf("%s.%s:%s", prefix, *p.Name, *p.Type) } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 45e235d4..53b4be59 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -582,6 +582,7 @@ func TestPropFormatValue(t *testing.T) { var ( idx0 = 0 idx1 = 1 + vZero = "0" tBool = "bool" tInt = "int" tFloat = "float" @@ -898,6 +899,107 @@ func TestPropFormatValue(t *testing.T) { record: base.Record{""}, want: dbNULL, }, + { + name: "alternative indices 0", + prop: Prop{ + Index: &idx0, + Type: &tInt, + Nullable: true, + AlternativeIndices: []int{}, + }, + record: base.Record{""}, + want: dbNULL, + }, + { + name: "alternative indices 1 out range", + prop: Prop{ + Index: &idx0, + Type: &tInt, + Nullable: true, + AlternativeIndices: []int{1}, + }, + record: base.Record{""}, + wantErrString: "out range", + }, + { + name: "alternative indices 1 use index", + prop: Prop{ + Index: &idx0, + Type: &tInt, + Nullable: true, + AlternativeIndices: []int{1}, + }, + record: base.Record{"1"}, + want: "1", + }, + { + name: "alternative indices 1 null", + prop: Prop{ + Index: &idx0, + Type: &tInt, + Nullable: true, + AlternativeIndices: []int{1}, + }, + record: base.Record{"", ""}, + want: dbNULL, + }, + { + name: "alternative indices 1 not null", + prop: Prop{ + Index: &idx0, + Type: &tInt, + Nullable: true, + AlternativeIndices: []int{2}, + }, + record: base.Record{"", "1", "2"}, + want: "2", + }, + { + name: "alternative indices n not null", + prop: Prop{ + Index: &idx0, + Type: &tInt, + Nullable: true, + AlternativeIndices: []int{3, 2, 1}, + }, + record: base.Record{"", "1", "2", ""}, + want: "2", + }, + { + name: "default value not nullable", + prop: Prop{ + Index: &idx0, + Type: &tInt, + Nullable: false, + AlternativeIndices: []int{1}, + DefaultValue: &vZero, + }, + record: base.Record{"", "1", "2"}, + want: "", + }, + { + name: "default value nullable", + prop: Prop{ + Index: &idx0, + Type: &tInt, + Nullable: true, + DefaultValue: &vZero, + }, + record: base.Record{""}, + want: "0", + }, + { + name: "default value nullable alternative indices", + prop: Prop{ + Index: &idx0, + Type: &tInt, + Nullable: true, + AlternativeIndices: []int{1, 2, 3, 4, 5, 6}, + DefaultValue: &vZero, + }, + record: base.Record{"", "", "", "", "", "", ""}, + want: "0", + }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { @@ -908,7 +1010,7 @@ func TestPropFormatValue(t *testing.T) { ast.Contains(err.Error(), tc.wantErrString) } else { ast.NoError(err) - ast.Contains(str, tc.want) + ast.Equal(str, tc.want) } }) }