Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes in download-file script for windows #318

Merged
merged 5 commits into from
Oct 2, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 41 additions & 10 deletions script/download-file/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,29 @@
import os
import subprocess

def escape_special_chars(text, tool=None):
special_chars = [
'&', '|', '(', ')'
]

for char in special_chars:
text = text.replace(char, f'^{char}')

#handle URL special cases
if tool != "rclone":
text = text.replace('%', "%%")

return text

def preprocess(i):

os_info = i['os_info']
env = i['env']

# env to be passed to the subprocess
subprocess_env = os.environ.copy()
subprocess_env['PATH'] += os.pathsep + os.pathsep.join(env.get('+PATH', ''))

meta = i['meta']

automation = i['automation']
Expand Down Expand Up @@ -86,16 +105,21 @@ def preprocess(i):
env['CM_DOWNLOAD_FILENAME'] = "index.html"

if tool == "cmutil":
print ('')
cmutil_require_download = 0
if env.get('CM_DOWNLOAD_CHECKSUM_FILE', '') != '':
checksum_cmd = f"cd {q}{filepath}{q} {xsep} md5sum -c{x_c} {x}{q}{env['CM_DOWNLOAD_CHECKSUM_FILE']}{q}"
checksum_result = subprocess.run(checksum_cmd, cwd=f'{q}{filepath}{q}', capture_output=True, text=True, shell=True)
if os_info['platform'] == 'windows':
checksum_cmd = f"cd {q}{filepath}{q} {xsep} md5sum -c{x_c} {x}{escape_special_chars(env['CM_DOWNLOAD_CHECKSUM_FILE'])}"
else:
checksum_cmd = f"cd {q}{filepath}{q} {xsep} md5sum -c{x_c} {x}{q}{env['CM_DOWNLOAD_CHECKSUM_FILE']}{q}"
checksum_result = subprocess.run(checksum_cmd, cwd=f'{q}{filepath}{q}', capture_output=True, text=True, shell=True, env=subprocess_env)
elif env.get('CM_DOWNLOAD_CHECKSUM', '') != '':
checksum_cmd = f"echo {env.get('CM_DOWNLOAD_CHECKSUM')} {x}{q}{env['CM_DOWNLOAD_FILENAME']}{q} | md5sum -c{x_c} -"
checksum_result = subprocess.run(checksum_cmd, capture_output=True, text=True, shell=True)
if os_info['platform'] == 'windows':
checksum_cmd = f"echo {env.get('CM_DOWNLOAD_CHECKSUM')} {x}{escape_special_chars(env['CM_DOWNLOAD_FILENAME'])} | md5sum -c{x_c} -"
else:
checksum_cmd = f"echo {env.get('CM_DOWNLOAD_CHECKSUM')} {x}{q}{env['CM_DOWNLOAD_FILENAME']}{q} | md5sum -c{x_c} -"
checksum_result = subprocess.run(checksum_cmd, capture_output=True, text=True, shell=True, env=subprocess_env)
if env.get('CM_DOWNLOAD_CHECKSUM_FILE', '') != '' or env.get('CM_DOWNLOAD_CHECKSUM', '') != '':
#print(checksum_result) #for debugging
# print(checksum_result) #for debugging
if "checksum did not match" in checksum_result.stderr.lower():
computed_checksum = subprocess.run(f"md5sum {env['CM_DOWNLOAD_FILENAME']}", capture_output=True, text=True, shell=True).stdout.split(" ")[0]
print(f"WARNING: File already present, mismatch between original checksum({env.get('CM_DOWNLOAD_CHECKSUM')}) and computed checksum({computed_checksum}). Deleting the already present file and downloading new.")
Expand All @@ -108,7 +132,7 @@ def preprocess(i):
elif "no such file" in checksum_result.stderr.lower():
#print(f"No file {env['CM_DOWNLOAD_FILENAME']}. Downloading through cmutil.")
cmutil_require_download = 1
elif checksum_result.returncode == 1:
elif checksum_result.returncode > 0:
return {"return":1, "error":f"Error while checking checksum: {checksum_result.stderr}"}
else:
print(f"File {env['CM_DOWNLOAD_FILENAME']} already present, original checksum and computed checksum matches! Skipping Download..")
Expand Down Expand Up @@ -195,19 +219,26 @@ def preprocess(i):
if env.get('CM_DOWNLOAD_CHECKSUM_FILE', '') != '':
env['CM_DOWNLOAD_CHECKSUM_CMD'] = f"cd {q}{filepath}{q} {xsep} md5sum -c {x_c} {x}{q}{env['CM_DOWNLOAD_CHECKSUM_FILE']}{q}"
elif env.get('CM_DOWNLOAD_CHECKSUM', '') != '':
env['CM_DOWNLOAD_CHECKSUM_CMD'] = "echo {} {}{}{}{} | md5sum {} -c -".format(env.get('CM_DOWNLOAD_CHECKSUM'), x, q, env['CM_DOWNLOAD_FILENAME'], q, x_c)
if os_info['platform'] == 'windows':
env['CM_DOWNLOAD_CHECKSUM_CMD'] = "echo {} {}{} | md5sum {} -c -".format(env.get('CM_DOWNLOAD_CHECKSUM'), x, escape_special_chars(env['CM_DOWNLOAD_FILENAME']), x_c)
else:
env['CM_DOWNLOAD_CHECKSUM_CMD'] = "echo {} {}{}{}{} | md5sum {} -c -".format(env.get('CM_DOWNLOAD_CHECKSUM'), x, q, env['CM_DOWNLOAD_FILENAME'], q, x_c)
for i in range(1,5):
if env.get('CM_DOWNLOAD_CHECKSUM'+str(i),'') == '':
break
env['CM_DOWNLOAD_CHECKSUM_CMD'] += " || echo {} {}{}{}{} | md5sum {} -c -".format(env.get('CM_DOWNLOAD_CHECKSUM'+str(i)), x, q, env['CM_DOWNLOAD_FILENAME'], q, x_c)
if os_info['platform'] == 'windows':
env['CM_DOWNLOAD_CHECKSUM_CMD'] += " || echo {} {}{} | md5sum {} -c -".format(env.get('CM_DOWNLOAD_CHECKSUM'+str(i)), x, escape_special_chars(env['CM_DOWNLOAD_FILENAME']), x_c)
else:
env['CM_DOWNLOAD_CHECKSUM_CMD'] += " || echo {} {}{}{}{} | md5sum {} -c -".format(env.get('CM_DOWNLOAD_CHECKSUM'+str(i)), x, q, env['CM_DOWNLOAD_FILENAME'].replace("%", "%%"), q, x_c)
# print(env['CM_DOWNLOAD_CHECKSUM_CMD'])
else:
env['CM_DOWNLOAD_CHECKSUM_CMD'] = ""

if not pre_clean:
env['CM_PRE_DOWNLOAD_CMD'] = ''

if os_info['platform'] == 'windows' and env.get('CM_DOWNLOAD_CMD', '') != '':
env['CM_DOWNLOAD_CMD'] = env['CM_DOWNLOAD_CMD'].replace('&', '^&').replace('|', '^|').replace('(', '^(').replace(')', '^)')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't we need this replace?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tested it on both powershell and command prompt. Both worked fine only when the escaping was removed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@anandhu-eng - may I suggest you to add # to this line instead of removing. Probably there was a reason why it was added. If we encounter an issue in the future, we can then trace it back ... Thanks a lot!!!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will then merge it and test on my machine before running another PR ...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@anandhu-eng It broke the Windows test of ABTF POC - it was working fine earlier as it uses wget for the download I believe.

https://github.com/mlcommons/cm4mlops/actions/runs/11126891963/job/30917906688?pr=318#step:5:3440

env['CM_DOWNLOAD_CMD'] = escape_special_chars(env['CM_DOWNLOAD_CMD'], tool)
arjunsuresh marked this conversation as resolved.
Show resolved Hide resolved
if pre_clean:
env['CM_PRE_DOWNLOAD_CLEAN_CMD'] = "del /Q %CM_DOWNLOAD_FILENAME%"
# Check that if empty CMD, should add ""
Expand Down
Loading