Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Neural Solution SQL/CMD injection #1627

Merged
merged 6 commits into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion neural_solution/backend/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def prepare_task(self, task: Task):
if not task.optimized:
# Generate quantization code with Neural Coder API
neural_coder_cmd = ["python -m neural_coder --enable --approach"]
# for users to define approach: "static, ""static_ipex", "dynamic", "auto"
# for users to define approach: "static", "static_ipex", "dynamic", "auto"
approach = task.approach
neural_coder_cmd.append(approach)
if is_remote_url(task.script_url):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ optional arguments:
"script_url": "tf_example1",
"optimized": "True",
"arguments": [
"--dataset_location=dataset --model_path=model"
"--dataset_location=dataset", "--model_path=model"
],
"approach": "static",
"requirements": [
Expand All @@ -106,7 +106,7 @@ When using distributed quantization, the `workers` needs to be set to greater th
"script_url": "tf_example1",
"optimized": "True",
"arguments": [
"--dataset_location=dataset --model_path=model"
"--dataset_location=dataset", "--model_path=model"
],
"approach": "static",
"requirements": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"script_url": "custom_models_optimized/tf_example1",
"optimized": "True",
"arguments": [
"--dataset_location=dataset --model_path=model"
"--dataset_location=dataset", "--model_path=model"
],
"approach": "static",
"requirements": ["tensorflow"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"script_url": "custom_models_optimized/tf_example1",
"optimized": "True",
"arguments": [
"--dataset_location=dataset --model_path=model"
"--dataset_location=dataset", "--model_path=model"
],
"approach": "static",
"requirements": ["tensorflow"
Expand Down
2 changes: 1 addition & 1 deletion neural_solution/examples/hf_models/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ optional arguments:
"script_url": "https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
"optimized": "False",
"arguments": [
"--model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result"
"--model_name_or_path=bert-base-cased", "--task_name=mrpc", "--do_eval", "--output_dir=result"
],
"approach": "static",
"requirements": [],
Expand Down
2 changes: 1 addition & 1 deletion neural_solution/examples/hf_models/task_request.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"script_url": "https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
"optimized": "False",
"arguments": [
"--model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result"
"--model_name_or_path=bert-base-cased", "--task_name=mrpc", "--do_eval", "--output_dir=result"
],
"approach": "static",
"requirements": ["datasets", "transformers=4.21.0", "torch"],
Expand Down
2 changes: 1 addition & 1 deletion neural_solution/examples/hf_models_grpc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ optional arguments:
"script_url": "https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
"optimized": "False",
"arguments": [
"--model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result"
"--model_name_or_path=bert-base-cased", "--task_name=mrpc", "--do_eval", "--output_dir=result"
],
"approach": "static",
"requirements": [],
Expand Down
2 changes: 1 addition & 1 deletion neural_solution/examples/hf_models_grpc/task_request.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"script_url": "https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
"optimized": "False",
"arguments": [
"--model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result"
"--model_name_or_path=bert-base-cased", "--task_name=mrpc", "--do_eval", "--output_dir=result"
],
"approach": "static",
"requirements": ["datasets", "transformers=4.21.0", "torch"],
Expand Down
5 changes: 5 additions & 0 deletions neural_solution/frontend/fastapi/main_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
get_cluster_info,
get_cluster_table,
get_res_during_tuning,
is_valid_task,
list_to_string,
serialize,
)
Expand Down Expand Up @@ -153,10 +154,14 @@ async def submit_task(task: Task):
Returns:
json: status , id of task and messages.
"""
if not is_valid_task(task.dict()):
raise HTTPException(status_code=422, detail="Invalid task")

msg = "Task submitted successfully"
status = "successfully"
# search the current
db_path = get_db_path(config.workspace)

if os.path.isfile(db_path):
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
Expand Down
58 changes: 58 additions & 0 deletions neural_solution/frontend/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,3 +295,61 @@ def list_to_string(lst: list):
str: string
"""
return " ".join(str(i) for i in lst)


def is_invalid_str(to_test_str: str):
"""Verify whether the to_test_str is valid.

Args:
to_test_str (str): string to be tested.

Returns:
bool: valid or invalid
"""
return any(char in to_test_str for char in [" ", '"', "'", "&", "|", ";", "`", ">"])


def is_valid_task(task: dict) -> bool:
"""Verify whether the task is valid.

Args:
task (dict): task request

Returns:
bool: valid or invalid
"""
required_fields = ["script_url", "optimized", "arguments", "approach", "requirements", "workers"]

for field in required_fields:
if field not in task:
return False

if not isinstance(task["script_url"], str) or is_invalid_str(task["script_url"]):
return False

if (isinstance(task["optimized"], str) and task["optimized"] not in ["True", "False"]) or (
not isinstance(task["optimized"], str) and not isinstance(task["optimized"], bool)
):
return False

if not isinstance(task["arguments"], list):
return False
else:
for argument in task["arguments"]:
if is_invalid_str(argument):
return False

if not isinstance(task["approach"], str) or task["approach"] not in ["static", "static_ipex", "dynamic", "auto"]:
return False

if not isinstance(task["requirements"], list):
return False
else:
for requirement in task["requirements"]:
if is_invalid_str(requirement):
return False

if not isinstance(task["workers"], int) or task["workers"] < 1:
return False

return True
22 changes: 18 additions & 4 deletions neural_solution/test/frontend/fastapi/test_main_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,26 @@ def test_get_description(self):
def test_submit_task(self, mock_submit_task):
task = {
"script_url": "http://example.com/script.py",
"optimized": True,
"optimized": "True",
"arguments": ["arg1", "arg2"],
"approach": "approach1",
"approach": "static",
"requirements": ["req1", "req2"],
"workers": 3,
}

# test invalid task
task_invalid = {
"script_url": "http://example.com/script.py",
"optimized": "True",
"arguments": "invalid str, should be list",
"approach": "static",
"requirements": ["req1", "req2"],
"workers": 3,
}
response = client.post("/task/submit/", json=task_invalid)
print(response)
self.assertEqual(response.status_code, 422)
self.assertIn("arguments", response.text)

# test no db case
delete_db()
Expand Down Expand Up @@ -174,7 +188,7 @@ def test_get_task_by_id(self, mock_submit_task):
"script_url": "http://example.com/script.py",
"optimized": True,
"arguments": ["arg1", "arg2"],
"approach": "approach1",
"approach": "static",
"requirements": ["req1", "req2"],
"workers": 3,
}
Expand All @@ -200,7 +214,7 @@ def test_get_task_status_by_id(self, mock_submit_task):
"script_url": "http://example.com/script.py",
"optimized": True,
"arguments": ["arg1", "arg2"],
"approach": "approach1",
"approach": "static",
"requirements": ["req1", "req2"],
"workers": 3,
}
Expand Down
97 changes: 97 additions & 0 deletions neural_solution/test/frontend/fastapi/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
get_cluster_info,
get_cluster_table,
get_res_during_tuning,
is_valid_task,
list_to_string,
serialize,
)
Expand Down Expand Up @@ -110,6 +111,102 @@ def test_list_to_string(self):
expected_result = "Hello Neural Solution"
self.assertEqual(list_to_string(lst), expected_result)

def test_is_valid_task(self):
task_sql_injection = {
"script_url": "https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
"optimized": "False",
"arguments": [],
"approach": "5', '6', 7, 'pending'), ('1b9ff5c2fd2143d58522bd71d18845a3', '2', 3, '4', '5', '6', 7, 'pending') ON CONFLICT (id) DO UPDATE SET id = '1b9ff5c2fd2143d58522bd71d18845a3', q_model_path = '/home/victim/.ssh' --",
"requirements": [],
"workers": 1,
}
self.assertFalse(is_valid_task(task_sql_injection))

task_cmd_injection = {
"script_url": 'https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py & eval "$(echo ZWNobyAiRG9tYWluIGV4cGFuc2lvbiIgPiB+L2F0dGFjay5weSI= | base64 --decode)"',
"optimized": "False",
"arguments": ["--model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result"],
"approach": "static",
"requirements": [],
"workers": 1,
}
self.assertFalse(is_valid_task(task_cmd_injection))

task_lack_field = {
"optimized": "True",
}
self.assertFalse(is_valid_task(task_lack_field))

task_script_url_not_str = {
"script_url": ["custom_models_optimized/tf_example1"],
"optimized": "True",
"arguments": ["--dataset_location=dataset --model_path=model"],
"approach": "static",
"requirements": ["tensorflow"],
"workers": 1,
}
self.assertFalse(is_valid_task(task_script_url_not_str))

task_optimized_not_bool_str = {
"script_url": ["custom_models_optimized/tf_example1"],
"optimized": "True or False",
"arguments": ["--dataset_location=dataset", "--model_path=model"],
"approach": "static",
"requirements": ["tensorflow"],
"workers": 1,
}
self.assertFalse(is_valid_task(task_optimized_not_bool_str))

task_arguments_not_list = {
"script_url": ["custom_models_optimized/tf_example1"],
"optimized": "True",
"arguments": 123,
"approach": "static",
"requirements": ["tensorflow"],
"workers": 1,
}
self.assertFalse(is_valid_task(task_arguments_not_list))

task_arguments_invalid = {
"script_url": ["custom_models_optimized/tf_example1"],
"optimized": "True",
"arguments": ["--dataset_location=dataset --model_path=model"],
"approach": "static",
"requirements": ["tensorflow"],
"workers": 1,
}
self.assertFalse(is_valid_task(task_arguments_not_list))

task_approach_is_invalid = {
"script_url": ["custom_models_optimized/tf_example1"],
"optimized": "True",
"arguments": [],
"approach": "static or dynamic",
"requirements": ["tensorflow"],
"workers": 1,
}
self.assertFalse(is_valid_task(task_approach_is_invalid))

task_requirements_not_list = {
"script_url": ["custom_models_optimized/tf_example1"],
"optimized": "True",
"arguments": [],
"approach": "static",
"requirements": "tensorflow",
"workers": 1,
}
self.assertFalse(is_valid_task(task_requirements_not_list))

task_normal = {
"script_url": "custom_models_optimized/tf_example1",
"optimized": "True",
"arguments": ["--dataset_location=dataset", "--model_path=model"],
"approach": "static",
"requirements": ["tensorflow"],
"workers": 1,
}
self.assertTrue(is_valid_task(task_normal))


if __name__ == "__main__":
unittest.main()