-
Notifications
You must be signed in to change notification settings - Fork 78
/
Copy pathmain.go
226 lines (202 loc) · 6.82 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
// Copyright (C) 2019-2025 vdaas.org vald team <[email protected]>
//
// Licensed under the Apache License, Version 2.0 (the "License");
// You may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"context"
"encoding/json"
"flag"
"time"
"github.com/kpango/fuid"
"github.com/kpango/glg"
"github.com/vdaas/vald-client-go/v1/payload"
"github.com/vdaas/vald-client-go/v1/vald"
"gonum.org/v1/hdf5"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
const (
testCount = 20
removeCount = 200
)
var (
datasetPath string
grpcServerAddr string
insertCount uint
indexingWaitSeconds uint
)
func init() {
/**
Path option specifies hdf file by path. Default value is `fashion-mnist-784-euclidean.hdf5`.
Addr option specifies grpc server address. Default value is `127.0.0.1:8081`.
Insert option specifies insert count. Default value is `400`.
Wait option specifies indexing wait time (in seconds). Default value is `60`.
**/
flag.StringVar(&datasetPath, "path", "fashion-mnist-784-euclidean.hdf5", "dataset path")
flag.StringVar(&grpcServerAddr, "addr", "localhost:8081", "gRPC server address")
flag.UintVar(&insertCount, "insert", 400, "insert count")
flag.UintVar(&indexingWaitSeconds, "wait", 60, "indexing wait seconds")
flag.Parse()
}
func main() {
/**
Gets training data, test data and ids based on the dataset path.
the number of ids is equal to that of training dataset.
**/
ids, train, test, err := load(datasetPath)
if err != nil {
glg.Fatal(err)
}
ctx := context.Background()
// Create a Vald client for connecting to the Vald cluster.
conn, err := grpc.NewClient(grpcServerAddr, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
glg.Fatal(err)
}
// Creates Vald client for gRPC.
client := vald.NewValdClient(conn)
glg.Infof("Start Inserting %d training vector to Vald", insertCount)
// Insert 400 example vectors into Vald cluster
for i := range ids[:insertCount] {
// Calls `Insert` function of Vald client.
// Sends set of vector and id to server via gRPC.
_, err := client.Insert(ctx, &payload.Insert_Request{
Vector: &payload.Object_Vector{
Id: ids[i],
Vector: train[i],
},
Config: &payload.Insert_Config{
SkipStrictExistCheck: true,
},
})
if err != nil {
glg.Fatal(err)
}
if i%10 == 0 {
glg.Infof("Inserted: %d", i+10)
}
}
glg.Info("Finish Inserting dataset. \n\n")
// Vald starts indexing automatically after insert. It needs to wait until the indexing is completed before a search action is performed.
wt := time.Duration(indexingWaitSeconds) * time.Second
glg.Infof("Wait %s for indexing to finish", wt)
time.Sleep(wt)
/**
Gets approximate vectors, which is based on the value of `SearchConfig`, from the indexed tree based on the training data.
In this example, Vald gets 10 approximate vectors each search vector.
**/
glg.Infof("Start searching %d times", testCount)
for i, vec := range test[:testCount] {
// Send searching vector and configuration object to the Vald server via gRPC.
res, err := client.Search(ctx, &payload.Search_Request{
Vector: vec,
// Conditions for hitting the search.
Config: &payload.Search_Config{
Num: 10, // the number of search results
Radius: -1, // Radius is used to determine the space of search candidate radius for neighborhood vectors. -1 means infinite circle.
Epsilon: 0.1, // Epsilon is used to determines how much to expand from search candidate radius.
Timeout: 100000000, // Timeout is used for search time deadline. The unit is nano-seconds.
},
})
if err != nil {
glg.Fatal(err)
}
b, _ := json.MarshalIndent(res.GetResults(), "", " ")
glg.Infof("%d - Results : %s\n\n", i+1, string(b))
time.Sleep(1 * time.Second)
}
glg.Infof("Finish searching %d times", testCount)
glg.Info("Start removing vector")
// Remove indexed 200 vectors from vald cluster.
for i := range ids[:removeCount] {
// Call `Remove` function of Vald client.
// Sends id to server via gRPC.
_, err := client.Remove(ctx, &payload.Remove_Request{
Id: &payload.Object_ID{
Id: ids[i],
},
})
if err != nil {
glg.Fatal(err)
}
if i%10 == 0 {
glg.Infof("Removed: %d", i+10)
}
}
glg.Info("Finish removing vector")
glg.Info("Start flushing vector")
_, err = client.Flush(ctx, &payload.Flush_Request{})
if err != nil {
glg.Fatal(err)
}
glg.Info("Finish flushing vector")
}
// load function loads training and test vector from hdf file. The size of ids is same to the number of training data.
// Each id, which is an element of ids, will be set a random number.
func load(path string) (ids []string, train, test [][]float32, err error) {
var f *hdf5.File
f, err = hdf5.OpenFile(path, hdf5.F_ACC_RDONLY)
if err != nil {
return nil, nil, nil, err
}
defer f.Close()
// readFn function reads vectors of the hierarchy with the given the name.
readFn := func(name string) ([][]float32, error) {
// Opens and returns a named Dataset.
// The returned dataset must be closed by the user when it is no longer needed.
d, err := f.OpenDataset(name)
if err != nil {
return nil, err
}
defer d.Close()
// Space returns an identifier for a copy of the dataspace for a dataset.
sp := d.Space()
defer sp.Close()
// SimpleExtentDims returns dataspace dimension size and maximum size.
dims, _, _ := sp.SimpleExtentDims()
row, dim := int(dims[0]), int(dims[1])
// Gets the stored vector. All are represented as one-dimensional arrays.
// The type of the slice depends on your dataset.
// For fashion-mnist-784-euclidean.hdf5, the datatype is float32.
vec := make([]float32, sp.SimpleExtentNPoints())
if err := d.Read(&vec); err != nil {
return nil, err
}
// Converts a one-dimensional array to a two-dimensional array.
// Use the `dim` variable as a separator.
vecs := make([][]float32, row)
for i := 0; i < row; i++ {
vecs[i] = make([]float32, dim)
for j := 0; j < dim; j++ {
vecs[i][j] = float32(vec[i*dim+j])
}
}
return vecs, nil
}
// Gets vector of `train` hierarchy.
train, err = readFn("train")
if err != nil {
return nil, nil, nil, err
}
// Gets vector of `test` hierarchy.
test, err = readFn("test")
if err != nil {
return nil, nil, nil, err
}
// Generate as many random ids for training vectors.
ids = make([]string, 0, len(train))
for i := 0; i < len(train); i++ {
ids = append(ids, fuid.String())
}
return
}