forked from galsalomon66/s3select
-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added query generator which generates equivalent queries for aws and ceph. After runing these queries, their results are matched. Signed-off-by: Girjesh Rajoria <[email protected]>
- Loading branch information
Showing
4 changed files
with
409 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#include <iostream> | ||
#include <fstream> | ||
|
||
using namespace std; | ||
|
||
int main() | ||
{ | ||
fstream query_file, cmd_file; | ||
query_file.open("aws_queries.txt", ios::in); | ||
cmd_file.open("aws_cmds.sh", ios::out); | ||
cmd_file << "#!/bin/sh\nset -x\nset -e\n\n"; | ||
cmd_file << "mkdir -p aws_results\n"; | ||
string bucket, csv_file, query, aws_cmd; | ||
cout << "Enter bucket name: "; | ||
cin >> bucket; | ||
cout << "Enter file name: "; | ||
cin >> csv_file; | ||
for(int i = 1; getline(query_file, query); i++) | ||
{ | ||
aws_cmd = "aws s3api select-object-content --bucket " + bucket + " --key " + csv_file + " --expression-type \'SQL\' --input-serialization \'{\"CSV\": {}, \"CompressionType\": \"NONE\"}\' --output-serialization \'{\"CSV\": {}}\' --profile openshift-dev --expression \"" + query + "\" \"aws_results/output" + to_string(i) + ".csv\""; | ||
cmd_file << aws_cmd << endl; | ||
} | ||
cmd_file.close(); | ||
query_file.close(); | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,325 @@ | ||
#include <iostream> | ||
#include <fstream> | ||
#include <vector> | ||
#include <bits/stdc++.h> | ||
#define NUM_COLUMN 3 | ||
|
||
using namespace std; | ||
|
||
enum Return_type { INTEGER = 0, | ||
STRING = 1, | ||
TIMESTAMP = 2, | ||
MIX_COL_NUM = 3, | ||
COLUMN = 4, | ||
NUMBER = 5}; | ||
|
||
auto random_arth_op = [](){std::string op="+-*/";return op[rand()%op.size()];}; | ||
|
||
auto random_compare_op = []() | ||
{vector<string> op={">", "<", ">=", "<=", "==", "!="}; | ||
return op[ rand() % op.size() ]; | ||
}; | ||
|
||
auto random_date_part = []() | ||
{vector<string> op={"year", "month", "day", "hour", "minute", "second"}; | ||
return op[ rand() % op.size() ]; | ||
}; | ||
|
||
/*auto random_date_part_extract = []() | ||
{vector<string> op={"year", "month", "day", "hour", "minute", "second", | ||
"timezone_hour", "timezone_minute"}; | ||
return op[ rand() % op.size() ]; | ||
};*/ | ||
|
||
string random_timestamp_string(string& aws_expr) | ||
{ | ||
auto year = [](){return rand()%100 + 1900;}; | ||
auto month = [](){return 1 + rand()%12;}; | ||
auto day = [](){return 1 + rand()%28;}; | ||
auto hours = [](){return rand()%24;}; | ||
auto minutes = [](){return rand()%60;}; | ||
auto seconds = [](){return rand()%60;}; | ||
auto fraction_sec = [](){return rand()%1000000;}; | ||
stringstream timestamp_str; | ||
|
||
timestamp_str << year() << "-" << std::setw(2) << std::setfill('0') << month() << "-" << std::setw(2) << std::setfill('0') << day() << "T" <<std::setw(2) << std::setfill('0') << hours() << ":" << std::setw(2) << std::setfill('0') << minutes() << ":" << std::setw(2) << std::setfill('0') <<seconds() << "." << fraction_sec() << "Z"; | ||
aws_expr = timestamp_str.str(); | ||
return aws_expr; | ||
} | ||
|
||
string random_tm_format_string() | ||
{ | ||
auto random_format = []() | ||
{vector<string> op={"yyyyy ", "yyyy ", "yyy ", "yy ", "y ", "MMMMM ", "MMMM ", "MMM ", "MM ", "M ", "dd ", "d ", "a ", "hh ", "h ", "HH ", "H ", "mm ", "m ", "ss ", "s ", "SSSSSSSSS ", "SSSSSS ", "SSSSS ", "SSS ", "SS ", "S ", "n ", ": ", "- ", " "}; | ||
return op[ rand() % op.size() ]; | ||
}; | ||
int loop = rand() % 10; | ||
string frmt; | ||
while(loop) | ||
{ | ||
frmt += random_format(); | ||
loop--; | ||
} | ||
return frmt; | ||
} | ||
|
||
string random_col(string& aws_expr) | ||
{ | ||
int num = 1 + (rand() % NUM_COLUMN); | ||
aws_expr = "cast(_" + to_string(num) + " as int)"; | ||
return "int(_" + to_string(num) + ")"; | ||
} | ||
|
||
string random_number(string& aws_expr) | ||
{ | ||
int num = rand() % 10 + 1; | ||
aws_expr = to_string(num); | ||
return "int(" + to_string(num) + ")"; | ||
} | ||
|
||
string random_num_expr(int depth, string& aws_expr) | ||
{ | ||
string aws_expr1, aws_expr2, ceph_expr, op; | ||
if (depth == 0) | ||
{ | ||
ceph_expr = random_number(aws_expr1); | ||
aws_expr = aws_expr1; | ||
return ceph_expr; | ||
} | ||
op = random_arth_op(); | ||
ceph_expr = random_num_expr(depth-1, aws_expr1) + op + | ||
random_num_expr(depth-1, aws_expr2); | ||
aws_expr = aws_expr1 + op + aws_expr2; | ||
return ceph_expr; | ||
} | ||
|
||
string random_num_col_expr(int depth, string& aws_expr) | ||
{ | ||
string aws_expr1, aws_expr2, ceph_expr, op; | ||
if (depth == 0) | ||
{ | ||
if ((rand() % 2) == 0) | ||
{ | ||
ceph_expr = random_col(aws_expr1); | ||
aws_expr = aws_expr1; | ||
return ceph_expr; | ||
} | ||
else | ||
{ | ||
ceph_expr = random_number(aws_expr1); | ||
aws_expr = aws_expr1; | ||
return ceph_expr; | ||
} | ||
} | ||
op = random_arth_op(); | ||
ceph_expr = random_num_col_expr(depth-1, aws_expr1) + op + | ||
random_num_col_expr(depth-1, aws_expr2); | ||
aws_expr = aws_expr1 + op + aws_expr2; | ||
return ceph_expr; | ||
} | ||
|
||
string random_query_expr(int depth, string& input_str, int type, string& aws_expr) | ||
{ | ||
string ceph_expr; | ||
if (depth == 0) | ||
{ | ||
switch (type) | ||
{ | ||
case INTEGER: | ||
ceph_expr = random_number(aws_expr); | ||
break; | ||
case STRING: | ||
ceph_expr = "\'" + input_str + "\'"; | ||
aws_expr = "\'" + input_str + "\'"; | ||
break; | ||
case MIX_COL_NUM: | ||
ceph_expr = random_num_col_expr(depth, aws_expr); | ||
break; | ||
case TIMESTAMP: | ||
ceph_expr = "to_timestamp(\'" + random_timestamp_string(aws_expr) + "\')"; | ||
aws_expr = "to_timestamp(\'" + aws_expr + "\')"; | ||
break; | ||
} | ||
return ceph_expr; | ||
} | ||
|
||
int option; | ||
if (type == INTEGER) //return type is int | ||
{ | ||
string ceph_col, aws_col, aws_expr1, aws_expr2, op1, op2; | ||
switch (option = rand() % 9) | ||
{ | ||
case 0: | ||
ceph_col = random_col(aws_col); | ||
op1 = random_arth_op(); | ||
op2 = random_arth_op(); | ||
ceph_expr = "int(avg(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + | ||
") " + op2 + " " + random_num_expr(depth-1, aws_expr2) + ")"; | ||
aws_expr = "cast((avg(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2 + | ||
") as int)"; | ||
break; | ||
case 1: | ||
ceph_col = random_col(aws_col); | ||
op1 = random_arth_op(); | ||
op2 = random_arth_op(); | ||
ceph_expr = "count(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + | ||
") " + op2 + " " + random_num_expr(depth-1, aws_expr2); | ||
aws_expr = "count(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; | ||
break; | ||
case 2: | ||
ceph_col = random_col(aws_col); | ||
op1 = random_arth_op(); | ||
op2 = random_arth_op(); | ||
ceph_expr = "max(" + ceph_col + op1 + random_num_col_expr(depth-1,aws_expr1) + ") " + | ||
op2 + " " + random_num_expr(depth-1, aws_expr2); | ||
aws_expr = "max(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; | ||
break; | ||
case 3: | ||
ceph_col = random_col(aws_col); | ||
op1 = random_arth_op(); | ||
op2 = random_arth_op(); | ||
ceph_expr = "min(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + ") " + | ||
op2 + " " + random_num_expr(depth-1, aws_expr2); | ||
aws_expr = "min(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; | ||
break; | ||
case 4: | ||
ceph_col = random_col(aws_col); | ||
op1 = random_arth_op(); | ||
op2 = random_arth_op(); | ||
ceph_expr = "sum(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + | ||
") " + op2 + " " + random_num_expr(depth-1, aws_expr2); | ||
aws_expr = "sum(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; | ||
break; | ||
case 5: | ||
ceph_expr = "char_length(" + random_query_expr(depth-1, input_str, STRING, | ||
aws_expr1) + ")"; | ||
aws_expr = "char_length(" + aws_expr1 + ")"; | ||
break; | ||
case 6: | ||
ceph_expr = "character_length(" + random_query_expr(depth-1, input_str, STRING, | ||
aws_expr1) + ")"; | ||
aws_expr = "character_length(" + aws_expr1 + ")"; | ||
break; | ||
case 7: | ||
op1 = random_date_part(); | ||
ceph_expr = "extract(" + op1 + " from " + random_query_expr(depth-1, input_str, | ||
TIMESTAMP, aws_expr1) + ")"; | ||
aws_expr = "extract(" + op1 + " from " + aws_expr1 + ")"; | ||
break; | ||
case 8: | ||
op1 = random_date_part(); | ||
ceph_expr = "date_diff(" + op1 + ", " + random_query_expr(depth-1, input_str, | ||
TIMESTAMP, aws_expr1) + ", " + random_query_expr(depth-1, input_str, | ||
TIMESTAMP, aws_expr2) + ")"; | ||
aws_expr = "date_diff(" + op1 + ", " + aws_expr1 + ", " + aws_expr2 + ")"; | ||
break; | ||
} | ||
} | ||
else if (type == STRING) // return type is string | ||
{ | ||
string aws_expr1, aws_expr2, aws_expr3; | ||
switch (option = rand() % 4) | ||
{ | ||
case 0: | ||
ceph_expr = "lower(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) + | ||
")"; | ||
aws_expr = "lower(" + aws_expr1 + ")"; | ||
break; | ||
case 1: | ||
ceph_expr = "upper(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) + | ||
")"; | ||
aws_expr = "upper(" + aws_expr1 + ")"; | ||
break; | ||
case 2: | ||
ceph_expr = "substring(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) + | ||
", " + random_query_expr(depth-1, input_str, INTEGER, aws_expr2) + ", " + | ||
random_query_expr(depth-1, input_str, INTEGER, aws_expr3) + ")"; | ||
aws_expr = "substring(" + aws_expr1 + ", " + aws_expr2 + ", " + aws_expr3 + ")"; | ||
break; | ||
case 3: | ||
aws_expr2 = random_tm_format_string(); | ||
ceph_expr = "to_string(" + random_query_expr(depth-1, input_str, TIMESTAMP, aws_expr1) | ||
+ ", \'" + aws_expr2 + "\')"; | ||
aws_expr = "to_string(" + aws_expr1 + ", \'" + aws_expr2 + "\')"; | ||
break; | ||
} | ||
} | ||
else if (type == TIMESTAMP) // return type is TIMESTAMP | ||
{ | ||
string aws_expr1, aws_expr2, date_part; | ||
switch (option = rand() % 2) | ||
{ | ||
case 0: | ||
date_part = random_date_part(); | ||
ceph_expr = "date_add(" + date_part + ", " + random_number(aws_expr1) + ", " + | ||
random_query_expr(depth-1, input_str, TIMESTAMP, aws_expr2) + ")"; | ||
aws_expr = "date_add(" + date_part + ", " + aws_expr1 + ", " + aws_expr2 + ")"; | ||
break; | ||
case 1: | ||
ceph_expr = "to_timestamp(\'" + random_timestamp_string(aws_expr1) + "\')"; | ||
aws_expr = "to_timestamp(\'" + aws_expr1 + "\')"; | ||
break; | ||
} | ||
} | ||
else if (type == MIX_COL_NUM) | ||
{ | ||
ceph_expr = random_num_col_expr(depth-1, aws_expr); | ||
} | ||
else if (type == COLUMN) // return type integer column number | ||
{ | ||
ceph_expr = random_col(aws_expr); | ||
} | ||
else if (type == NUMBER) // return type randon number | ||
{ | ||
ceph_expr = random_number(aws_expr); | ||
} | ||
else | ||
{ | ||
aws_expr = "error"; | ||
ceph_expr = "error"; | ||
} | ||
return ceph_expr; | ||
} | ||
|
||
int main() | ||
{ | ||
srand(time(0)); | ||
int reps, depth; | ||
fstream query_file, aws_query_file; | ||
query_file.open("queries.txt", ios::out); | ||
aws_query_file.open("aws_queries.txt", ios::out); | ||
string input_str = " %%AbCdEfGhIjKlMnOpQrStUvWxYz## "; | ||
cout << "Enter number of quries to be generated: "; | ||
cin >> reps; | ||
cout << "Enter depth of queries to be generated: "; | ||
cin >> depth; | ||
if(query_file.is_open() && aws_query_file.is_open()) //checking whether the file is open | ||
{ | ||
while (reps) | ||
{ | ||
string aws_expr; | ||
int type; | ||
string ceph_query = "select "; | ||
string aws_query = "select "; | ||
/*int projection = rand() % 4; | ||
while (projection > 1) | ||
{ | ||
type = rand() % 4; | ||
ceph_query = ceph_query + random_query_expr(depth, input_str, | ||
type, aws_expr) + ", "; | ||
aws_query = aws_query + aws_expr + ", "; | ||
projection--; | ||
}*/ | ||
type = rand() % 4; | ||
ceph_query = ceph_query + random_query_expr(depth, input_str, type, | ||
aws_expr)+ " from stdin;"; | ||
aws_query = aws_query + aws_expr + " from s3object;"; | ||
query_file << ceph_query << endl; | ||
aws_query_file << aws_query <<endl; | ||
reps--; | ||
} | ||
query_file.close(); | ||
} | ||
return 0; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/sh | ||
set -x | ||
set -e | ||
|
||
g++ -o queries_generator queries_generator.cpp | ||
g++ -o generate_aws_cmds generate_aws_cmds.cpp | ||
|
||
./queries_generator | ||
./generate_aws_cmds | ||
|
||
chmod +x aws_cmds.sh | ||
|
||
./aws_cmds.sh |
Oops, something went wrong.