From ed7c7b20262e452542e725dc0af9847be31d66fe Mon Sep 17 00:00:00 2001 From: m-1-k-3 Date: Sun, 23 Jul 2023 20:48:52 +0200 Subject: [PATCH 01/10] rating --- README.md | 7 +++ helpers/helpers_emba_defaults.sh | 2 +- modules/F05_qs_resolver.sh | 2 +- modules/Q02_openai_question.sh | 60 ++++++++++++++++++-------- modules/S15_radare_decompile_checks.sh | 2 +- modules/S20_shell_check.sh | 23 ++++++++-- modules/S21_python_check.sh | 4 +- modules/S22_php_check.sh | 6 +-- modules/S23_lua_check.sh | 5 ++- 9 files changed, 81 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 84ddd908b..dc605f1d5 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,13 @@ For further details on EMBA's system-emulation engine check the [wiki](https://g sudo ./emba -l ~/log -f ~/firmware -p ./scan-profiles/default-scan-emulation.emba ``` +## Quick start with AI-assisted analysis enabled +For further details on EMBA's AI analysis engine check the [wiki](https://github.com/e-m-b-a/emba/wiki/AI-supported-firmware-analysis). +```console +sudo ./emba -l ~/log -f ~/firmware -p ./scan-profiles/default-scan-gpt.emba + +``` + --- *EMBA* supports multiple testing and reporting [options](https://github.com/e-m-b-a/emba/wiki/Usage#arguments). For more details check the [wiki](https://github.com/e-m-b-a/emba/wiki/Usage). diff --git a/helpers/helpers_emba_defaults.sh b/helpers/helpers_emba_defaults.sh index 25e207cc6..c5e0e14bb 100755 --- a/helpers/helpers_emba_defaults.sh +++ b/helpers/helpers_emba_defaults.sh @@ -71,7 +71,7 @@ set_defaults() { export QUEST_CONTAINER="" export GPT_OPTION=0 # 0 -> off 1-> unpayed plan 2 -> no rate-limit export GPT_QUESTION="For the following code I need you to tell me how an attacker could exploit it and point out all vulnerabilities:" - export MINIMUM_GPT_PRIO=2 # [3 downto 0] 3 -> everything gets checked; 0 -> nothing gets checked + export MINIMUM_GPT_PRIO=1 # everything above this value gets checked export SHORT_PATH=0 # short paths in cli output export THREADED=0 # 0 -> single thread diff --git a/modules/F05_qs_resolver.sh b/modules/F05_qs_resolver.sh index 4106c7c11..ebd4768cf 100755 --- a/modules/F05_qs_resolver.sh +++ b/modules/F05_qs_resolver.sh @@ -24,7 +24,7 @@ F05_qs_resolver() { grep -q "Q02_openai_question finished" "${LOG_DIR}"/"${MAIN_LOG_FILE}" || sleep 1m fi - local _GPT_INPUT_FILE_="" + # local _GPT_INPUT_FILE_="" local GPT_ANCHOR_="" local _GPT_PRIO_=3 local GPT_QUESTION_="" diff --git a/modules/Q02_openai_question.sh b/modules/Q02_openai_question.sh index dc429effd..9d47d568a 100755 --- a/modules/Q02_openai_question.sh +++ b/modules/Q02_openai_question.sh @@ -33,6 +33,7 @@ Q02_openai_question() { done fi + export GTP_CHECKED_ARR=() while ! grep -q "Testing phase ended" "${LOG_DIR}"/"${MAIN_LOG_FILE}"; do if [[ "${CHATGPT_RESULT_CNT}" -ge 0 ]]; then ask_chatgpt @@ -64,36 +65,52 @@ ask_chatgpt() { mkdir "${GPT_FILE_DIR_}" fi - while IFS=";" read -r COL1_ COL2_ COL3_ COL4_ COL5_ COL6_ COL7_; do - SCRIPT_PATH_TMP_="${COL1_}" - GPT_ANCHOR_="${COL2_}" - GPT_PRIO_="${COL3_//GPT-Prio-/}" - GPT_QUESTION_="${COL4_}" - GPT_OUTPUT_FILE_="${COL5_}" - GPT_TOKENS_="${COL6_//cost\=/}" - GPT_RESPONSE_="${COL7_}" + # generating Array for GPT requests - sorting according the prio in field 3 + # this array gets regenerated on every round + readarray -t Q02_OPENAI_QUESTIONS < <(sort -u -k 3 -t ';' -r "${CSV_DIR}/q02_openai_question.csv.tmp") + + for ELEM in "${Q02_OPENAI_QUESTIONS[@]}"; do + SCRIPT_PATH_TMP_="$(echo "${ELEM}" | cut -d\; -f1)" + + # already tested entry: + if [[ " ${GTP_CHECKED_ARR[*]} " =~ ${SCRIPT_PATH_TMP_} ]]; then + print_output "[*] GPT - Already tested ${SCRIPT_PATH_TMP_}" "no_log" + continue + fi + + GPT_ANCHOR_="$(echo "${ELEM}" | cut -d\; -f2)" + GPT_PRIO_="$(echo "${ELEM}" | cut -d\; -f3)" + # GPT_PRIO_="${GPT_PRIO_//GPT-Prio-/}" + GPT_QUESTION_="$(echo "${ELEM}" | cut -d\; -f4)" + GPT_OUTPUT_FILE_="$(echo "${ELEM}" | cut -d\; -f5)" + GPT_TOKENS_="$(echo "${ELEM}" | cut -d\; -f6)" + GPT_TOKENS_="${GPT_TOKENS_//cost\=/}" + GPT_RESPONSE_="$(echo "${ELEM}" | cut -d\; -f7)" GPT_INPUT_FILE_="$(basename "${SCRIPT_PATH_TMP_}")" # in case we have nothing we are going to move on [[ -z "${SCRIPT_PATH_TMP_}" ]] && continue - print_output "[*] Trying to check inside ${ORANGE}${LOG_DIR}/firmware${NC}" "no_log" + print_output "[*] Identification of ${ORANGE}${SCRIPT_PATH_TMP_} / ${GPT_INPUT_FILE_}${NC} inside ${ORANGE}${LOG_DIR}/firmware${NC}" "no_log" SCRIPT_PATH_TMP_="$(find "${LOG_DIR}/firmware" -wholename "*${SCRIPT_PATH_TMP_}")" + # in case we have nothing we are going to move on ! [[ -f "${SCRIPT_PATH_TMP_}" ]] && continue [[ -f "${SCRIPT_PATH_TMP_}" ]] && cp "${SCRIPT_PATH_TMP_}" "${GPT_FILE_DIR_}/${GPT_INPUT_FILE_}.log" - print_output "[*] Trying to check ${ORANGE}${SCRIPT_PATH_TMP_}${NC} with Question ${ORANGE}${GPT_QUESTION_}${NC}" "no_log" - print_output "[*] Prio for testing is ${GPT_PRIO_}" "no_log" + print_output "[*] AI-Assisted analysis of script ${ORANGE}${SCRIPT_PATH_TMP_}${NC} with question ${ORANGE}${GPT_QUESTION_}${NC}" "no_log" + print_output "[*] Current priority for testing is ${GPT_PRIO_}" "no_log" - if [[ -z ${GPT_RESPONSE_} ]] && [[ ${GPT_PRIO_} -le ${MINIMUM_GPT_PRIO} ]] && [[ "${SCRIPT_PATH_TMP_}" != '' ]]; then + if [[ -z ${GPT_RESPONSE_} ]] && [[ ${GPT_PRIO_} -ge ${MINIMUM_GPT_PRIO} ]] && [[ "${SCRIPT_PATH_TMP_}" != '' ]]; then if [[ -f "${SCRIPT_PATH_TMP_}" ]]; then # add navbar-item for file sub_module_title "${GPT_INPUT_FILE_}" - print_output "[*] Asking ChatGPT about ${ORANGE}$(print_path "${SCRIPT_PATH_TMP_}")${NC}" "" "${GPT_FILE_DIR_}/${GPT_INPUT_FILE_}.log" + + print_output "[*] AI-Assisted analysis for ${ORANGE}$(print_path "${SCRIPT_PATH_TMP_}")${NC}" "" "${GPT_FILE_DIR_}/${GPT_INPUT_FILE_}.log" head -n -2 "${CONFIG_DIR}/gpt_template.json" > "${TMP_DIR}/chat.json" CHATGPT_CODE_=$(sed 's/\\//g;s/"/\\\"/g' "${SCRIPT_PATH_TMP_}" | tr -d '[:space:]') printf '"%s %s"\n}]}' "${GPT_QUESTION_}" "${CHATGPT_CODE_}" >> "${TMP_DIR}/chat.json" print_output "[*] The Combined Cost of the OpenAI request / the length is: ${ORANGE}${#GPT_QUESTION_} + ${#CHATGPT_CODE_}${NC}" "no_log" + HTTP_CODE_=$(curl https://api.openai.com/v1/chat/completions -H "Content-Type: application/json" \ -H "Authorization: Bearer ${OPENAI_API_KEY}" \ -d @"${TMP_DIR}/chat.json" -o "${TMP_DIR}/${GPT_INPUT_FILE_}_response.json" --write-out "%{http_code}" || true) @@ -104,7 +121,7 @@ ask_chatgpt() { print_output "[-] ERROR response: $(cat "${TMP_DIR}/${GPT_INPUT_FILE_}_response.json")" if jq '.error.type' "${TMP_DIR}/${GPT_INPUT_FILE_}_response.json" | grep -q "insufficient_quota" ; then - print_output "[-] Stopping OpenAI requests since the API key has reached its quota" + print_output "[-] Stopping OpenAI requests since the API key has reached its quota limit" CHATGPT_RESULT_CNT=-1 sleep 20 break @@ -132,6 +149,7 @@ ask_chatgpt() { fi sleep 1 done + # TODO: now we should redo the last test else print_output "[-] Stopping OpenAI requests since the API key has reached its rate_limit" CHATGPT_RESULT_CNT=-1 @@ -140,6 +158,7 @@ ask_chatgpt() { fi cat "${TMP_DIR}/${GPT_INPUT_FILE_}_response.json" >> "${GPT_FILE_DIR_}/openai_server_errors.log" + readarray -t Q02_OPENAI_QUESTIONS < <(sort -u -k 3 -t ';' -r "${CSV_DIR}/q02_openai_question.csv.tmp") sleep 30s continue fi @@ -150,16 +169,18 @@ ask_chatgpt() { print_output "[-] Something went wrong with the ChatGPT request for ${GPT_INPUT_FILE_}" break fi + GPT_RESPONSE_=("$(jq '.choices[] | .message.content' "${TMP_DIR}/${GPT_INPUT_FILE_}_response.json")") GPT_RESPONSE_CLEANED_="${GPT_RESPONSE_[*]//\;/}" #remove ; from response GPT_TOKENS_=$(jq '.usage.total_tokens' "${TMP_DIR}/${GPT_INPUT_FILE_}_response.json") if [[ ${GPT_TOKENS_} -ne 0 ]]; then + GTP_CHECKED_ARR+=("${SCRIPT_PATH_TMP_}") # write new into done csv - write_csv_gpt "${GPT_INPUT_FILE_}" "${GPT_ANCHOR_}" "GPT-Prio-${GPT_PRIO_}" "${GPT_QUESTION_}" "${GPT_OUTPUT_FILE_}" "cost=${GPT_TOKENS_}" "'${GPT_RESPONSE_CLEANED_//\'/}'" + write_csv_gpt "${GPT_INPUT_FILE_}" "${GPT_ANCHOR_}" "${GPT_PRIO_}" "${GPT_QUESTION_}" "${GPT_OUTPUT_FILE_}" "cost=${GPT_TOKENS_}" "'${GPT_RESPONSE_CLEANED_//\'/}'" # print openai response print_ln - print_output "[*] ${ORANGE}OpenAI responded with the following details:${NC}" + print_output "[*] ${ORANGE}AI-assisted analysis results via OpenAI ChatGPT:${NC}\\n" echo -e "${GPT_RESPONSE_[*]}" | tee -a "${LOG_FILE}" # add proper module link print_ln @@ -187,7 +208,12 @@ ask_chatgpt() { if [[ "${GPT_OPTION}" -ne 2 ]]; then sleep 20s fi - done < "${CSV_DIR}/q02_openai_question.csv.tmp" + + # reload q02 results: + print_output "[*] Regenerate analysis array ..." + readarray -t Q02_OPENAI_QUESTIONS < <(sort -u -k 3 -t ';' -r "${CSV_DIR}/q02_openai_question.csv.tmp") + done + # done < "${CSV_DIR}/q02_openai_question.csv.tmp" if [[ -f "${CSV_DIR}/q02_openai_question.csv" ]]; then while IFS=";" read -r COL1_ COL2_ COL3_ COL4_ COL5_ COL6_ COL7_; do diff --git a/modules/S15_radare_decompile_checks.sh b/modules/S15_radare_decompile_checks.sh index 03ac47b1d..4947edd78 100755 --- a/modules/S15_radare_decompile_checks.sh +++ b/modules/S15_radare_decompile_checks.sh @@ -197,7 +197,7 @@ radare_decomp_print_top10_statistics() { print_output "[*] Asking OpenAI chatbot about ${LOG_PATH_MODULE}/vul_func_${F_COUNTER}_${FUNCTION}-${SEARCH_TERM}.txt" GPT_ANCHOR_="$(openssl rand -hex 8)" # "${GPT_INPUT_FILE_}" "$GPT_ANCHOR_" "GPT-Prio-$GPT_PRIO_" "$GPT_QUESTION_" "$GPT_OUTPUT_FILE_" "cost=$GPT_TOKENS_" "$GPT_RESPONSE_" - write_csv_gpt_tmp "${LOG_PATH_MODULE}/vul_func_${F_COUNTER}_${FUNCTION}-${SEARCH_TERM}.txt" "${GPT_ANCHOR}" "GPT-Prio-${GPT_PRIO}" "Can you give me a side by side desciption of the following code in a table, where on the left is the code and on the right the desciption. And please use proper spacing and | to make it terminal friendly:" "${LOG_PATH_MODULE}/vul_func_${F_COUNTER}_${FUNCTION}-${SEARCH_TERM}.txt" "" "" + write_csv_gpt_tmp "${LOG_PATH_MODULE}/vul_func_${F_COUNTER}_${FUNCTION}-${SEARCH_TERM}.txt" "${GPT_ANCHOR}" "${GPT_PRIO}" "Can you give me a side by side desciption of the following code in a table, where on the left is the code and on the right the desciption. And please use proper spacing and | to make it terminal friendly:" "${LOG_PATH_MODULE}/vul_func_${F_COUNTER}_${FUNCTION}-${SEARCH_TERM}.txt" "" "" # add ChatGPT link printf '%s\n\n' "" >> "${LOG_PATH_MODULE}/vul_func_${F_COUNTER}_${FUNCTION}-${SEARCH_TERM}.txt" write_anchor_gpt "${GPT_ANCHOR_}" "${LOG_PATH_MODULE}/vul_func_${F_COUNTER}_${FUNCTION}-${SEARCH_TERM}.txt" diff --git a/modules/S20_shell_check.sh b/modules/S20_shell_check.sh index 03d0a19bc..18f00aed6 100755 --- a/modules/S20_shell_check.sh +++ b/modules/S20_shell_check.sh @@ -115,6 +115,8 @@ S20_shell_check() s20_eval_script_check() { local SH_SCRIPTS_=("${@}") local SH_SCRIPT="" + local GPT_PRIO_=3 + local GPT_ANCHOR_="" sub_module_title "Check shell scripts for eval usage" @@ -122,10 +124,20 @@ s20_eval_script_check() { print_output "[*] Testing ${ORANGE}${SH_SCRIPT}${NC} for eval usage" "no_log" if grep "eval " "${SH_SCRIPT}" | grep -q -v "^#.*"; then SH_SCRIPT_NAME="$(basename "${SH_SCRIPT}")" + local SHELL_LOG="$LOG_PATH_MODULE"/sh_eval_sources/"${SH_SCRIPT_NAME}".log ! [[ -d "$LOG_PATH_MODULE"/sh_eval_sources/ ]] && mkdir "$LOG_PATH_MODULE"/sh_eval_sources/ - [[ -f "${SH_SCRIPT}" ]] && cp "${SH_SCRIPT}" "$LOG_PATH_MODULE"/sh_eval_sources/"${SH_SCRIPT_NAME}".log - sed -i -r "s/.*eval\ .*/\x1b[32m&\x1b[0m/" "$LOG_PATH_MODULE"/sh_eval_sources/"${SH_SCRIPT_NAME}".log - print_output "[+] Found ${ORANGE}eval${GREEN} usage in ${ORANGE}${SH_SCRIPT_NAME}${NC}" "" "${LOG_PATH_MODULE}/sh_eval_sources/${SH_SCRIPT_NAME}.log" + [[ -f "${SH_SCRIPT}" ]] && cp "${SH_SCRIPT}" "${SHELL_LOG}" + sed -i -r "s/.*eval\ .*/\x1b[32m&\x1b[0m/" "${SHELL_LOG}" + print_output "[+] Found ${ORANGE}eval${GREEN} usage in ${ORANGE}${SH_SCRIPT_NAME}${NC}" "" "${SHELL_LOG}" + + if [[ "${GPT_OPTION}" -gt 0 ]]; then + GPT_ANCHOR_="$(openssl rand -hex 8)" + # "${GPT_INPUT_FILE_}" "$GPT_ANCHOR_" "GPT-Prio-$GPT_PRIO_" "$GPT_QUESTION_" "$GPT_OUTPUT_FILE_" "cost=$GPT_TOKENS_" "$GPT_RESPONSE_" + write_csv_gpt_tmp "$(cut_path "${SH_SCRIPT}")" "${GPT_ANCHOR_}" "${GPT_PRIO_}" "${GPT_QUESTION}" "${SHELL_LOG}" "" "" + # add ChatGPT link + printf '%s\n\n' "" >> "${SHELL_LOG}" + write_anchor_gpt "${GPT_ANCHOR_}" "${SHELL_LOG}" + fi fi done } @@ -151,6 +163,7 @@ s20_reporter() { local SHELL_LOG="${3:0}" local GPT_PRIO_=2 local GPT_ANCHOR_="" + if [[ "$VULNS" -ne 0 ]] ; then # check if this is common linux file: local COMMON_FILES_FOUND @@ -172,14 +185,16 @@ s20_reporter() { print_output "[+] Found ""$ORANGE""$VULNS"" issues""$GREEN"" in script ""$COMMON_FILES_FOUND"":""$NC"" ""$(print_path "$SH_SCRIPT")" "" "$SHELL_LOG" fi write_csv_log "$(print_path "$SH_SCRIPT")" "$VULNS" "$CFF" "NA" + if [[ "${GPT_OPTION}" -gt 0 ]]; then GPT_ANCHOR_="$(openssl rand -hex 8)" # "${GPT_INPUT_FILE_}" "$GPT_ANCHOR_" "GPT-Prio-$GPT_PRIO_" "$GPT_QUESTION_" "$GPT_OUTPUT_FILE_" "cost=$GPT_TOKENS_" "$GPT_RESPONSE_" - write_csv_gpt_tmp "$(cut_path "${SH_SCRIPT}")" "${GPT_ANCHOR_}" "GPT-Prio-${GPT_PRIO_}" "${GPT_QUESTION}" "${SHELL_LOG}" "" "" + write_csv_gpt_tmp "$(cut_path "${SH_SCRIPT}")" "${GPT_ANCHOR_}" "${GPT_PRIO_}" "${GPT_QUESTION}" "${SHELL_LOG}" "" "" # add ChatGPT link printf '%s\n\n' "" >> "${SHELL_LOG}" write_anchor_gpt "${GPT_ANCHOR_}" "${SHELL_LOG}" fi + echo "$VULNS" >> "$TMP_DIR"/S20_VULNS.tmp fi } diff --git a/modules/S21_python_check.sh b/modules/S21_python_check.sh index 6c6ee598a..a5a66c4f8 100755 --- a/modules/S21_python_check.sh +++ b/modules/S21_python_check.sh @@ -77,7 +77,7 @@ s21_script_bandit() { local NAME="" local PY_LOG="" local VULNS="" - local GPT_PRIO_=3 + local GPT_PRIO_=2 local GPT_ANCHOR_="" NAME=$(basename "$PY_SCRIPT_" 2> /dev/null | sed -e 's/:/_/g') @@ -111,7 +111,7 @@ s21_script_bandit() { if [[ "${GPT_OPTION}" -gt 0 ]]; then GPT_ANCHOR_="$(openssl rand -hex 8)" # "${GPT_INPUT_FILE_}" "$GPT_ANCHOR_" "GPT-Prio-$GPT_PRIO_" "$GPT_QUESTION_" "$GPT_OUTPUT_FILE_" "cost=$GPT_TOKENS_" "$GPT_RESPONSE_" - write_csv_gpt_tmp "$(cut_path "${PY_SCRIPT_}")" "${GPT_ANCHOR_}" "GPT-Prio-${GPT_PRIO_}" "${GPT_QUESTION}" "${PY_LOG}" "" "" + write_csv_gpt_tmp "$(cut_path "${PY_SCRIPT_}")" "${GPT_ANCHOR_}" "${GPT_PRIO_}" "${GPT_QUESTION}" "${PY_LOG}" "" "" # add ChatGPT link to output file printf '%s\n\n' "" >> "${PY_LOG}" write_anchor_gpt "${GPT_ANCHOR_}" "${PY_LOG}" diff --git a/modules/S22_php_check.sh b/modules/S22_php_check.sh index 9971e2fd5..2a2a6ca18 100755 --- a/modules/S22_php_check.sh +++ b/modules/S22_php_check.sh @@ -93,7 +93,7 @@ s22_vuln_check_semgrep() { local SEMG_SOURCE_FILE="" local SEMG_SOURCE_FILE_NAME="" local SEMG_LINE_NR="" - local GPT_PRIO_=2 + local GPT_PRIO_=3 local GPT_ANCHOR_="" ! [[ -d "$LOG_PATH_MODULE"/semgrep_sources/ ]] && mkdir "$LOG_PATH_MODULE"/semgrep_sources/ @@ -116,7 +116,7 @@ s22_vuln_check_semgrep() { if [[ "${GPT_OPTION}" -gt 0 ]]; then GPT_ANCHOR_="$(openssl rand -hex 8)" # "${GPT_INPUT_FILE_}" "$GPT_ANCHOR_" "GPT-Prio-$GPT_PRIO_" "$GPT_QUESTION_" "$GPT_OUTPUT_FILE_" "cost=$GPT_TOKENS_" "$GPT_RESPONSE_" - write_csv_gpt_tmp "$(cut_path "${SEMG_SOURCE_FILE}")" "${GPT_ANCHOR_}" "GPT-Prio-${GPT_PRIO_}" "${GPT_QUESTION} And I think there might be something in line ${SEMG_LINE_NR}" "${LOG_PATH_MODULE}/semgrep_sources/${SEMG_SOURCE_FILE_NAME}.log" "" "" + write_csv_gpt_tmp "$(cut_path "${SEMG_SOURCE_FILE}")" "${GPT_ANCHOR_}" "${GPT_PRIO_}" "${GPT_QUESTION} And I think there might be something in line ${SEMG_LINE_NR}" "${LOG_PATH_MODULE}/semgrep_sources/${SEMG_SOURCE_FILE_NAME}.log" "" "" # add ChatGPT link printf '%s\n\n' "" >> "${LOG_PATH_MODULE}/semgrep_sources/${SEMG_SOURCE_FILE_NAME}.log" write_anchor_gpt "${GPT_ANCHOR_}" "${LOG_PATH_MODULE}/semgrep_sources/${SEMG_SOURCE_FILE_NAME}.log" @@ -207,7 +207,7 @@ s22_vuln_check() { if [[ "${GPT_OPTION}" -gt 0 ]]; then GPT_ANCHOR_="$(openssl rand -hex 8)" # "${GPT_INPUT_FILE_}" "$GPT_ANCHOR_" "GPT-Prio-$GPT_PRIO_" "$GPT_QUESTION_" "$GPT_OUTPUT_FILE_" "cost=$GPT_TOKENS_" "$GPT_RESPONSE_" - write_csv_gpt_tmp "$(cut_path "${PHP_SCRIPT_}")" "${GPT_ANCHOR_}" "GPT-Prio-${GPT_PRIO_}" "${GPT_QUESTION}" "${TMP_DIR}/S22_VULNS.tmp" "" "" + write_csv_gpt_tmp "$(cut_path "${PHP_SCRIPT_}")" "${GPT_ANCHOR_}" "${GPT_PRIO_}" "${GPT_QUESTION}" "${TMP_DIR}/S22_VULNS.tmp" "" "" # add ChatGPT link printf '%s\n\n' "" >> "${TMP_DIR}"/S22_VULNS.tmp write_anchor_gpt "${GPT_ANCHOR_}" "${TMP_DIR}"/S22_VULNS.tmp diff --git a/modules/S23_lua_check.sh b/modules/S23_lua_check.sh index 803ee4483..70be96bd9 100755 --- a/modules/S23_lua_check.sh +++ b/modules/S23_lua_check.sh @@ -65,6 +65,7 @@ s23_luaseccheck() { local NAME="" local LUA_LOG="" local GPT_ANCHOR_="" + local GPT_PRIO_=2 sub_module_title "LUA Security checks module" mapfile -t LUA_CGI_FILES < <(find "${FIRMWARE_PATH}" -type f -exec grep -H cgilua\. {} \; 2>/dev/null | cut -d ':' -f1 | sort -u) @@ -86,6 +87,7 @@ s23_luaseccheck() { ISSUES_FILE=$((ISSUES_FILE+1)) print_output "[+] Found lua QUERY (GET/POST) entry: ${ORANGE}${ENTRY}${GREEN} in file ${ORANGE}${QUERY_FILE}${GREEN} with file access capabilities." S23_ISSUE_FOUND=1 + GPT_PRIO=$((GPT_PRIO+1)) fi if grep "$ENTRY" "${QUERY_FILE}" | grep -q "os.execute"; then # command exec - critical @@ -93,6 +95,7 @@ s23_luaseccheck() { ISSUES_FILE=$((ISSUES_FILE+1)) print_output "[+] Found lua QUERY (GET/POST) entry: ${ORANGE}${ENTRY}${GREEN} in file ${ORANGE}${QUERY_FILE}${GREEN} with command execution capabilities." S23_ISSUE_FOUND=1 + GPT_PRIO=$((GPT_PRIO+1)) fi done if [[ "${ISSUES_FILE}" -eq 0 ]] && grep -q "os.execute" "${QUERY_FILE}"; then @@ -111,7 +114,7 @@ s23_luaseccheck() { if [[ "${GPT_OPTION}" -gt 0 ]]; then GPT_ANCHOR_="$(openssl rand -hex 8)" # "${GPT_INPUT_FILE_}" "$GPT_ANCHOR_" "GPT-Prio-$GPT_PRIO_" "$GPT_QUESTION_" "$GPT_OUTPUT_FILE_" "cost=$GPT_TOKENS_" "$GPT_RESPONSE_" - write_csv_gpt_tmp "$(cut_path "${QUERY_FILE}")" "${GPT_ANCHOR_}" "GPT-Prio-2" "${GPT_QUESTION}" "${CSV_DIR}/s23_lua_check.csv" "" "" + write_csv_gpt_tmp "$(cut_path "${QUERY_FILE}")" "${GPT_ANCHOR_}" "${GPT_PRIO_}" "${GPT_QUESTION}" "${CSV_DIR}/s23_lua_check.csv" "" "" # add ChatGPT link print_ln print_ln From 17a819d373a50e3d4ddecd3c1602e8e9ee99785d Mon Sep 17 00:00:00 2001 From: m-1-k-3 Date: Mon, 24 Jul 2023 08:55:11 +0200 Subject: [PATCH 02/10] looping --- modules/Q02_openai_question.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/Q02_openai_question.sh b/modules/Q02_openai_question.sh index 9d47d568a..df0d184be 100755 --- a/modules/Q02_openai_question.sh +++ b/modules/Q02_openai_question.sh @@ -210,14 +210,14 @@ ask_chatgpt() { fi # reload q02 results: - print_output "[*] Regenerate analysis array ..." + print_output "[*] Regenerate analysis array ..." "no_log" readarray -t Q02_OPENAI_QUESTIONS < <(sort -u -k 3 -t ';' -r "${CSV_DIR}/q02_openai_question.csv.tmp") done - # done < "${CSV_DIR}/q02_openai_question.csv.tmp" if [[ -f "${CSV_DIR}/q02_openai_question.csv" ]]; then - while IFS=";" read -r COL1_ COL2_ COL3_ COL4_ COL5_ COL6_ COL7_; do - GPT_ANCHOR_="${COL2_}" + local GPT_ENTRY_LINE="" + while read -r GPT_ENTRY_LINE; do + GPT_ANCHOR_="$(echo "${GPT_ENTRY_LINE}" | cut -d ';' -f2)" sed -i "/${GPT_ANCHOR_}/d" "${CSV_DIR}/q02_openai_question.csv.tmp" # TODO remove [CHATGPT] line in output file done < "${CSV_DIR}/q02_openai_question.csv" From 2f3cca8a06f52492072d3bc819d00a998257dab4 Mon Sep 17 00:00:00 2001 From: m-1-k-3 Date: Mon, 24 Jul 2023 09:01:07 +0200 Subject: [PATCH 03/10] cleanup --- modules/F05_qs_resolver.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/F05_qs_resolver.sh b/modules/F05_qs_resolver.sh index ebd4768cf..e9f8b43de 100755 --- a/modules/F05_qs_resolver.sh +++ b/modules/F05_qs_resolver.sh @@ -38,13 +38,13 @@ F05_qs_resolver() { while IFS=";" read -r COL1_ COL2_ COL3_ COL4_ COL5_ COL6_ COL7_; do GPT_INPUT_FILE_="${COL1_}" GPT_ANCHOR_="${COL2_}" - _GPT_PRIO_="${COL3_//GPT-Prio-/}" + _GPT_PRIO_="${COL3_}" GPT_QUESTION_="${COL4_}" GPT_OUTPUT_FILE_="${COL5_}" GPT_TOKENS_="${COL6_//cost\=/}" GPT_RESPONSE_="${COL7_//\"/}" - print_output "[*] Trying to resolve Anchor=${GPT_ANCHOR_} in Output_file=${GPT_OUTPUT_FILE_}" + print_output "[*] Trying to resolve ${ORANGE}Anchor ${GPT_ANCHOR_}${NC} in ${ORANGE}Output_file ${GPT_OUTPUT_FILE_}${NC}." if [[ ${GPT_TOKENS_} -ne 0 ]]; then if ! [ -f "${GPT_OUTPUT_FILE_}" ]; then From 3588d8759f6d6407ceb213cf28417fdd7c3bc7c7 Mon Sep 17 00:00:00 2001 From: m-1-k-3 Date: Mon, 24 Jul 2023 11:13:41 +0200 Subject: [PATCH 04/10] prio --- modules/Q02_openai_question.sh | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/modules/Q02_openai_question.sh b/modules/Q02_openai_question.sh index df0d184be..669811adc 100755 --- a/modules/Q02_openai_question.sh +++ b/modules/Q02_openai_question.sh @@ -69,12 +69,14 @@ ask_chatgpt() { # this array gets regenerated on every round readarray -t Q02_OPENAI_QUESTIONS < <(sort -u -k 3 -t ';' -r "${CSV_DIR}/q02_openai_question.csv.tmp") - for ELEM in "${Q02_OPENAI_QUESTIONS[@]}"; do + for (( ELE_INDEX=0; ELE_INDEX<"${#Q02_OPENAI_QUESTIONS[@]}"; ELE_INDEX++ )); do + ELEM="${Q02_OPENAI_QUESTIONS["${ELE_INDEX}"]}" SCRIPT_PATH_TMP_="$(echo "${ELEM}" | cut -d\; -f1)" - # already tested entry: + # as we always start with the highest rated entry, we need to check if this entry was already tested: if [[ " ${GTP_CHECKED_ARR[*]} " =~ ${SCRIPT_PATH_TMP_} ]]; then print_output "[*] GPT - Already tested ${SCRIPT_PATH_TMP_}" "no_log" + # lets test the next entry continue fi @@ -110,6 +112,10 @@ ask_chatgpt() { CHATGPT_CODE_=$(sed 's/\\//g;s/"/\\\"/g' "${SCRIPT_PATH_TMP_}" | tr -d '[:space:]') printf '"%s %s"\n}]}' "${GPT_QUESTION_}" "${CHATGPT_CODE_}" >> "${TMP_DIR}/chat.json" print_output "[*] The Combined Cost of the OpenAI request / the length is: ${ORANGE}${#GPT_QUESTION_} + ${#CHATGPT_CODE_}${NC}" "no_log" + if [[ "${#CHATGPT_CODE_}" -gt 4561 ]]; then + print_output "[-] GPT request is too big ... skipping it now" + continue + fi HTTP_CODE_=$(curl https://api.openai.com/v1/chat/completions -H "Content-Type: application/json" \ -H "Authorization: Bearer ${OPENAI_API_KEY}" \ @@ -159,6 +165,8 @@ ask_chatgpt() { cat "${TMP_DIR}/${GPT_INPUT_FILE_}_response.json" >> "${GPT_FILE_DIR_}/openai_server_errors.log" readarray -t Q02_OPENAI_QUESTIONS < <(sort -u -k 3 -t ';' -r "${CSV_DIR}/q02_openai_question.csv.tmp") + # reset the array index to start again with the highest rated entry + ELE_INDEX=0 sleep 30s continue fi @@ -212,6 +220,8 @@ ask_chatgpt() { # reload q02 results: print_output "[*] Regenerate analysis array ..." "no_log" readarray -t Q02_OPENAI_QUESTIONS < <(sort -u -k 3 -t ';' -r "${CSV_DIR}/q02_openai_question.csv.tmp") + # reset the array index to start again with the highest rated entry + ELE_INDEX=0 done if [[ -f "${CSV_DIR}/q02_openai_question.csv" ]]; then From bd268302a42f1bf70b598b042581e434354657bc Mon Sep 17 00:00:00 2001 From: m-1-k-3 Date: Mon, 24 Jul 2023 13:55:27 +0200 Subject: [PATCH 05/10] update cnt --- modules/F50_base_aggregator.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/F50_base_aggregator.sh b/modules/F50_base_aggregator.sh index f951fe484..2f5224655 100755 --- a/modules/F50_base_aggregator.sh +++ b/modules/F50_base_aggregator.sh @@ -110,7 +110,7 @@ output_overview() { fi if [[ -f "${LOG_DIR}"/"${Q02_LOG}" ]] && [[ "${GPT_OPTION}" -gt 0 ]]; then - GPT_RESULTS=$(grep -c "OpenAI responded with the following details" "${LOG_DIR}"/"${Q02_LOG}" || true) + GPT_RESULTS=$(grep -c "AI-assisted analysis results via OpenAI ChatGPT" "${LOG_DIR}"/"${Q02_LOG}" || true) if [[ "${GPT_RESULTS}" -gt 0 ]]; then print_output "[+] EMBA AI analysis enabled." write_link "q02" From 371f9b461e9d0ce8ebc410da3b65505381f17b5a Mon Sep 17 00:00:00 2001 From: Michael Messner Date: Mon, 24 Jul 2023 14:44:39 +0200 Subject: [PATCH 06/10] Update modules/S15_radare_decompile_checks.sh Co-authored-by: Benedikt Kuehne <62940240+BenediktMKuehne@users.noreply.github.com> --- modules/S15_radare_decompile_checks.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/S15_radare_decompile_checks.sh b/modules/S15_radare_decompile_checks.sh index 4947edd78..54a732bd9 100755 --- a/modules/S15_radare_decompile_checks.sh +++ b/modules/S15_radare_decompile_checks.sh @@ -196,7 +196,7 @@ radare_decomp_print_top10_statistics() { if [[ "${GPT_OPTION}" -gt 0 ]]; then print_output "[*] Asking OpenAI chatbot about ${LOG_PATH_MODULE}/vul_func_${F_COUNTER}_${FUNCTION}-${SEARCH_TERM}.txt" GPT_ANCHOR_="$(openssl rand -hex 8)" - # "${GPT_INPUT_FILE_}" "$GPT_ANCHOR_" "GPT-Prio-$GPT_PRIO_" "$GPT_QUESTION_" "$GPT_OUTPUT_FILE_" "cost=$GPT_TOKENS_" "$GPT_RESPONSE_" + # "${GPT_INPUT_FILE_}" "$GPT_ANCHOR_" "$GPT_PRIO_" "$GPT_QUESTION_" "$GPT_OUTPUT_FILE_" "cost=$GPT_TOKENS_" "$GPT_RESPONSE_" write_csv_gpt_tmp "${LOG_PATH_MODULE}/vul_func_${F_COUNTER}_${FUNCTION}-${SEARCH_TERM}.txt" "${GPT_ANCHOR}" "${GPT_PRIO}" "Can you give me a side by side desciption of the following code in a table, where on the left is the code and on the right the desciption. And please use proper spacing and | to make it terminal friendly:" "${LOG_PATH_MODULE}/vul_func_${F_COUNTER}_${FUNCTION}-${SEARCH_TERM}.txt" "" "" # add ChatGPT link printf '%s\n\n' "" >> "${LOG_PATH_MODULE}/vul_func_${F_COUNTER}_${FUNCTION}-${SEARCH_TERM}.txt" From 27059db1e7db9b1ccb7f985d5195260922ff46a8 Mon Sep 17 00:00:00 2001 From: Michael Messner Date: Mon, 24 Jul 2023 14:44:46 +0200 Subject: [PATCH 07/10] Update modules/S21_python_check.sh Co-authored-by: Benedikt Kuehne <62940240+BenediktMKuehne@users.noreply.github.com> --- modules/S21_python_check.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/S21_python_check.sh b/modules/S21_python_check.sh index a5a66c4f8..c2d411679 100755 --- a/modules/S21_python_check.sh +++ b/modules/S21_python_check.sh @@ -110,7 +110,7 @@ s21_script_bandit() { write_csv_log "$(print_path "$PY_SCRIPT_")" "$VULNS" "$CFF" "NA" if [[ "${GPT_OPTION}" -gt 0 ]]; then GPT_ANCHOR_="$(openssl rand -hex 8)" - # "${GPT_INPUT_FILE_}" "$GPT_ANCHOR_" "GPT-Prio-$GPT_PRIO_" "$GPT_QUESTION_" "$GPT_OUTPUT_FILE_" "cost=$GPT_TOKENS_" "$GPT_RESPONSE_" + # "${GPT_INPUT_FILE_}" "$GPT_ANCHOR_" "$GPT_PRIO_" "$GPT_QUESTION_" "$GPT_OUTPUT_FILE_" "cost=$GPT_TOKENS_" "$GPT_RESPONSE_" write_csv_gpt_tmp "$(cut_path "${PY_SCRIPT_}")" "${GPT_ANCHOR_}" "${GPT_PRIO_}" "${GPT_QUESTION}" "${PY_LOG}" "" "" # add ChatGPT link to output file printf '%s\n\n' "" >> "${PY_LOG}" From 966c396cb83a2628f9ed16300a0647fb04707b8a Mon Sep 17 00:00:00 2001 From: Michael Messner Date: Mon, 24 Jul 2023 14:44:54 +0200 Subject: [PATCH 08/10] Update modules/S22_php_check.sh Co-authored-by: Benedikt Kuehne <62940240+BenediktMKuehne@users.noreply.github.com> --- modules/S22_php_check.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/S22_php_check.sh b/modules/S22_php_check.sh index 2a2a6ca18..aa585c28e 100755 --- a/modules/S22_php_check.sh +++ b/modules/S22_php_check.sh @@ -115,7 +115,7 @@ s22_vuln_check_semgrep() { if [[ "${GPT_OPTION}" -gt 0 ]]; then GPT_ANCHOR_="$(openssl rand -hex 8)" - # "${GPT_INPUT_FILE_}" "$GPT_ANCHOR_" "GPT-Prio-$GPT_PRIO_" "$GPT_QUESTION_" "$GPT_OUTPUT_FILE_" "cost=$GPT_TOKENS_" "$GPT_RESPONSE_" + # "${GPT_INPUT_FILE_}" "$GPT_ANCHOR_" "$GPT_PRIO_" "$GPT_QUESTION_" "$GPT_OUTPUT_FILE_" "cost=$GPT_TOKENS_" "$GPT_RESPONSE_" write_csv_gpt_tmp "$(cut_path "${SEMG_SOURCE_FILE}")" "${GPT_ANCHOR_}" "${GPT_PRIO_}" "${GPT_QUESTION} And I think there might be something in line ${SEMG_LINE_NR}" "${LOG_PATH_MODULE}/semgrep_sources/${SEMG_SOURCE_FILE_NAME}.log" "" "" # add ChatGPT link printf '%s\n\n' "" >> "${LOG_PATH_MODULE}/semgrep_sources/${SEMG_SOURCE_FILE_NAME}.log" From 2dd490d33d7ca654a0cbf8e12ded548f95b581d4 Mon Sep 17 00:00:00 2001 From: Michael Messner Date: Mon, 24 Jul 2023 14:45:01 +0200 Subject: [PATCH 09/10] Update modules/S23_lua_check.sh Co-authored-by: Benedikt Kuehne <62940240+BenediktMKuehne@users.noreply.github.com> --- modules/S23_lua_check.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/S23_lua_check.sh b/modules/S23_lua_check.sh index 70be96bd9..c1bddd52f 100755 --- a/modules/S23_lua_check.sh +++ b/modules/S23_lua_check.sh @@ -113,7 +113,7 @@ s23_luaseccheck() { write_csv_log "$(print_path "$QUERY_FILE")" "0" "$ISSUES_FILE" "NA" if [[ "${GPT_OPTION}" -gt 0 ]]; then GPT_ANCHOR_="$(openssl rand -hex 8)" - # "${GPT_INPUT_FILE_}" "$GPT_ANCHOR_" "GPT-Prio-$GPT_PRIO_" "$GPT_QUESTION_" "$GPT_OUTPUT_FILE_" "cost=$GPT_TOKENS_" "$GPT_RESPONSE_" + # "${GPT_INPUT_FILE_}" "$GPT_ANCHOR_" "$GPT_PRIO_" "$GPT_QUESTION_" "$GPT_OUTPUT_FILE_" "cost=$GPT_TOKENS_" "$GPT_RESPONSE_" write_csv_gpt_tmp "$(cut_path "${QUERY_FILE}")" "${GPT_ANCHOR_}" "${GPT_PRIO_}" "${GPT_QUESTION}" "${CSV_DIR}/s23_lua_check.csv" "" "" # add ChatGPT link print_ln From a8d53eec09a5f6c8887c070656d8d48e849501f8 Mon Sep 17 00:00:00 2001 From: m-1-k-3 Date: Mon, 24 Jul 2023 14:48:09 +0200 Subject: [PATCH 10/10] requested changes --- helpers/helpers_emba_print.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helpers/helpers_emba_print.sh b/helpers/helpers_emba_print.sh index 25366df1b..0cc1781cd 100755 --- a/helpers/helpers_emba_print.sh +++ b/helpers/helpers_emba_print.sh @@ -834,7 +834,7 @@ print_notification() { } # writes inputs into csv for chatgpt -# Args: "${GPT_INPUT_FILE_}" "${GPT_ANCHOR_}" "GPT-Prio-${GPT_PRIO_}" "${GPT_QUESTION_}" "${GPT_OUTPUT_FILE_}" "cost=${GPT_TOKENS_}" "${GPT_RESPONSE_}" +# Args: "${GPT_INPUT_FILE_}" "${GPT_ANCHOR_}" "${GPT_PRIO_}" "${GPT_QUESTION_}" "${GPT_OUTPUT_FILE_}" "cost=${GPT_TOKENS_}" "${GPT_RESPONSE_}" write_csv_gpt() { local CSV_ITEMS=("$@") if ! [[ -d "$CSV_DIR" ]]; then @@ -846,7 +846,7 @@ write_csv_gpt() { } # writes inputs into tmp csv for chatgpt -# Args: "${GPT_INPUT_FILE_}" "${GPT_ANCHOR_}" "GPT-Prio-${GPT_PRIO_}" "${GPT_QUESTION_}" "${GPT_OUTPUT_FILE_}" "cost=${GPT_TOKENS_}" "${GPT_RESPONSE_}" +# Args: "${GPT_INPUT_FILE_}" "${GPT_ANCHOR_}" "${GPT_PRIO_}" "${GPT_QUESTION_}" "${GPT_OUTPUT_FILE_}" "cost=${GPT_TOKENS_}" "${GPT_RESPONSE_}" write_csv_gpt_tmp() { local CSV_ITEMS=("$@") if ! [[ -d "$CSV_DIR" ]]; then