Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLI: 1)-dd to specify custom download directory, 2) -o can now set full filename #263

Merged
merged 3 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,6 @@ Search for words, documents, images, videos, news, maps and text translation usi
```python
pip install -U duckduckgo_search
```
There is also a beta release that uses the `httpx` library:
```python
pip install -U duckduckgo_search==6.2.11b1
```
> [!NOTE]
> you can install lxml to use the `text` function with `backend='html'` or `backend='lite'` (size ≈ 12Mb)</br>
> `pip install -U duckduckgo_search[lxml]`
Expand All @@ -44,13 +40,13 @@ CLI examples:
# AI chat
ddgs chat
# text search
ddgs text -k "standard oil"
ddgs text -k "Assyrian siege of Jerusalem"
# find and download pdf files via proxy
ddgs text -k "pushkin filetype:pdf" -r wt-wt -m 50 -d -p https://1.2.3.4:1234
ddgs text -k "Economics in one lesson filetype:pdf" -r wt-wt -m 50 -p https://1.2.3.4:1234 -d -dd economics_reading
# using Tor Browser as a proxy (`tb` is an alias for `socks5://127.0.0.1:9150`)
ddgs text -k "'to kill a mockingbird' filetype:doc" -m 50 -d -p tb
ddgs text -k "'The history of the Standard Oil Company' filetype:doc" -m 50 -d -p tb
# find and save to csv
ddgs text -k "'neuroscience exploring the brain' filetype:pdf" -m 70 -o csv
ddgs text -k "'neuroscience exploring the brain' filetype:pdf" -m 70 -o neuroscience_list.csv
# don't verify SSL when making the request
ddgs text -k "Mississippi Burning" -v false
# find and download images
Expand Down
158 changes: 90 additions & 68 deletions duckduckgo_search/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@
}


def _save_data(keywords, data, function_name, filename):
filename, ext = filename.rsplit(".", 1) if filename and filename.endswith((".csv", ".json")) else (None, filename)
filename = filename if filename else f"{function_name}_{keywords}_{datetime.now():%Y%m%d_%H%M%S}"
if ext == "csv":
_save_csv(f"{filename}.{ext}", data)
elif ext == "json":
_save_json(f"{filename}.{ext}", data)


def _save_json(jsonfile, data):
with open(jsonfile, "w", encoding="utf-8") as file:
file.write(json_dumps(data))
Expand Down Expand Up @@ -91,16 +100,15 @@ def _download_file(url, dir_path, filename, proxy, verify):
logger.debug(f"download_file url={url} {type(ex).__name__} {ex}")


def _download_results(keywords, results, images=False, proxy=None, threads=None, verify=True):
path_type = "images" if images else "text"
path = f"{path_type}_{keywords}_{datetime.now():%Y%m%d_%H%M%S}"
def _download_results(keywords, results, function_name, proxy=None, threads=None, verify=True, pathname=None):
path = pathname if pathname else f"{function_name}_{keywords}_{datetime.now():%Y%m%d_%H%M%S}"
os.makedirs(path, exist_ok=True)

threads = 10 if threads is None else threads
with ThreadPoolExecutor(max_workers=threads) as executor:
futures = []
for i, res in enumerate(results, start=1):
url = res["image"] if images else res["href"]
url = res["image"] if function_name == "images" else res["href"]
filename = unquote(url.split("/")[-1].split("?")[0])
f = executor.submit(_download_file, url, path, f"{i}_{filename}", proxy, verify)
futures.append(f)
Expand All @@ -115,7 +123,7 @@ def _download_results(keywords, results, images=False, proxy=None, threads=None,

@click.group(chain=True)
def cli():
"""dukduckgo_search CLI tool"""
"""duckduckgo_search CLI tool"""
pass


Expand Down Expand Up @@ -186,13 +194,27 @@ def chat(load, proxy, multiline, timeout, verify, model):
@click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"]))
@click.option("-t", "--timelimit", default=None, type=click.Choice(["d", "w", "m", "y"]), help="day, week, month, year")
@click.option("-m", "--max_results", default=20, help="maximum number of results, default=20")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-d", "--download", is_flag=True, default=False, help="download results to 'keywords' folder")
@click.option("-o", "--output", help="csv, json or filename.csv|json (save the results to a csv or json file)")
@click.option("-d", "--download", is_flag=True, default=False, help="download results. -dd to set custom directory")
@click.option("-dd", "--download-directory", help="Specify custom download directory")
@click.option("-b", "--backend", default="api", type=click.Choice(["api", "html", "lite"]), help="which backend to use")
@click.option("-th", "--threads", default=10, help="download threads, default=10")
@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://127.0.0.1:9150")
@click.option("-v", "--verify", default=True, help="verify SSL when making the request")
def text(keywords, region, safesearch, timelimit, backend, output, download, threads, max_results, proxy, verify):
def text(
keywords,
region,
safesearch,
timelimit,
backend,
output,
download,
download_directory,
threads,
max_results,
proxy,
verify,
):
"""CLI function to perform a text search using DuckDuckGo API."""
data = DDGS(proxy=_expand_proxy_tb_alias(proxy), verify=verify).text(
keywords=keywords,
Expand All @@ -203,32 +225,35 @@ def text(keywords, region, safesearch, timelimit, backend, output, download, thr
max_results=max_results,
)
keywords = _sanitize_keywords(keywords)
filename = f"text_{keywords}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print" and not download:
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)
if output:
_save_data(keywords, data, "text", filename=output)
if download:
_download_results(keywords, data, proxy=proxy, threads=threads, verify=verify)
_download_results(
keywords,
data,
function_name="text",
proxy=proxy,
threads=threads,
verify=verify,
pathname=download_directory,
)
if not output and not download:
_print_data(data)


@cli.command()
@click.option("-k", "--keywords", required=True, help="answers search, keywords for query")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-o", "--output", help="csv, json or filename.csv|json (save the results to a csv or json file)")
@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://127.0.0.1:9150")
@click.option("-v", "--verify", default=True, help="verify SSL when making the request")
def answers(keywords, output, proxy, verify):
"""CLI function to perform a answers search using DuckDuckGo API."""
data = DDGS(proxy=_expand_proxy_tb_alias(proxy), verify=verify).answers(keywords=keywords)
filename = f"answers_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print":
keywords = _sanitize_keywords(keywords)
if output:
_save_data(keywords, data, function_name="answers", filename=output)
else:
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)


@cli.command()
Expand Down Expand Up @@ -271,8 +296,9 @@ def answers(keywords, output, proxy, verify):
type=click.Choice(["any", "Public", "Share", "ShareCommercially", "Modify", "ModifyCommercially"]),
)
@click.option("-m", "--max_results", default=90, help="maximum number of results, default=90")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-d", "--download", is_flag=True, default=False, help="download and save images to 'keywords' folder")
@click.option("-o", "--output", help="csv, json or filename.csv|json (save the results to a csv or json file)")
@click.option("-d", "--download", is_flag=True, default=False, help="download results. -dd to set custom directory")
@click.option("-dd", "--download-directory", help="Specify custom download directory")
@click.option("-th", "--threads", default=10, help="download threads, default=10")
@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://127.0.0.1:9150")
@click.option("-v", "--verify", default=True, help="verify SSL when making the request")
Expand All @@ -287,6 +313,7 @@ def images(
layout,
license_image,
download,
download_directory,
threads,
max_results,
output,
Expand All @@ -307,15 +334,20 @@ def images(
max_results=max_results,
)
keywords = _sanitize_keywords(keywords)
filename = f"images_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print" and not download:
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)
if output:
_save_data(keywords, data, function_name="images", filename=output)
if download:
_download_results(keywords, data, images=True, proxy=proxy, threads=threads, verify=verify)
_download_results(
keywords,
data,
function_name="images",
proxy=proxy,
threads=threads,
verify=verify,
pathname=download_directory,
)
if not output and not download:
_print_data(data)


@cli.command()
Expand All @@ -327,7 +359,7 @@ def images(
@click.option("-d", "--duration", default=None, type=click.Choice(["short", "medium", "long"]))
@click.option("-lic", "--license_videos", default=None, type=click.Choice(["creativeCommon", "youtube"]))
@click.option("-m", "--max_results", default=50, help="maximum number of results, default=50")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-o", "--output", help="csv, json or filename.csv|json (save the results to a csv or json file)")
@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://127.0.0.1:9150")
@click.option("-v", "--verify", default=True, help="verify SSL when making the request")
def videos(
Expand All @@ -344,13 +376,11 @@ def videos(
license_videos=license_videos,
max_results=max_results,
)
filename = f"videos_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print":
keywords = _sanitize_keywords(keywords)
if output:
_save_data(keywords, data, function_name="videos", filename=output)
else:
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)


@cli.command()
Expand All @@ -359,21 +389,19 @@ def videos(
@click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"]))
@click.option("-t", "--timelimit", default=None, type=click.Choice(["d", "w", "m", "y"]), help="day, week, month, year")
@click.option("-m", "--max_results", default=25, help="maximum number of results, default=25")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-o", "--output", help="csv, json or filename.csv|json (save the results to a csv or json file)")
@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://127.0.0.1:9150")
@click.option("-v", "--verify", default=True, help="verify SSL when making the request")
def news(keywords, region, safesearch, timelimit, max_results, output, proxy, verify):
"""CLI function to perform a news search using DuckDuckGo API."""
data = DDGS(proxy=_expand_proxy_tb_alias(proxy), verify=verify).news(
keywords=keywords, region=region, safesearch=safesearch, timelimit=timelimit, max_results=max_results
)
filename = f"news_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print":
keywords = _sanitize_keywords(keywords)
if output:
_save_data(keywords, data, function_name="news", filename=output)
else:
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)


@cli.command()
Expand All @@ -389,7 +417,7 @@ def news(keywords, region, safesearch, timelimit, max_results, output, proxy, ve
@click.option("-lon", "--longitude", default=None, help="""if lat and long are set, the other params are not used""")
@click.option("-r", "--radius", default=0, help="expand the search square by the distance in kilometers")
@click.option("-m", "--max_results", default=50, help="number of results, default=50")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-o", "--output", help="csv, json or filename.csv|json (save the results to a csv or json file)")
@click.option("-proxy", "--proxy", default=None, help="the proxy to send requests, example: socks5://127.0.0.1:9150")
@click.option("-v", "--verify", default=True, help="verify SSL when making the request")
def maps(
Expand Down Expand Up @@ -424,50 +452,44 @@ def maps(
radius=radius,
max_results=max_results,
)
filename = f"maps_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print":
keywords = _sanitize_keywords(keywords)
if output:
_save_data(keywords, data, function_name="maps", filename=output)
else:
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)


@cli.command()
@click.option("-k", "--keywords", required=True, help="text for translation")
@click.option("-f", "--from_", help="What language to translate from (defaults automatically)")
@click.option("-t", "--to", default="en", help="de, ru, fr, etc. What language to translate, defaults='en'")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-o", "--output", help="csv, json or filename.csv|json (save the results to a csv or json file)")
@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://127.0.0.1:9150")
@click.option("-v", "--verify", default=True, help="verify SSL when making the request")
def translate(keywords, from_, to, output, proxy, verify):
"""CLI function to perform translate using DuckDuckGo API."""
data = DDGS(proxy=_expand_proxy_tb_alias(proxy), verify=verify).translate(keywords=keywords, from_=from_, to=to)
filename = f"translate_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print":
keywords = _sanitize_keywords(keywords)
if output:
_save_data(keywords, data, function_name="translate", filename=output)
else:
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)


@cli.command()
@click.option("-k", "--keywords", required=True, help="keywords for query")
@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-o", "--output", help="csv, json or filename.csv|json (save the results to a csv or json file)")
@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://127.0.0.1:9150")
@click.option("-v", "--verify", default=True, help="verify SSL when making the request")
def suggestions(keywords, region, output, proxy, verify):
"""CLI function to perform a suggestions search using DuckDuckGo API."""
data = DDGS(proxy=_expand_proxy_tb_alias(proxy), verify=verify).suggestions(keywords=keywords, region=region)
filename = f"suggestions_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print":
keywords = _sanitize_keywords(keywords)
if output:
_save_data(keywords, data, function_name="suggestions", filename=output)
else:
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)


if __name__ == "__main__":
Expand Down
10 changes: 5 additions & 5 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

@pytest.fixture(autouse=True)
def pause_between_tests():
time.sleep(0.5)
time.sleep(1)


def test_version_command():
Expand Down Expand Up @@ -70,7 +70,7 @@ def test_save_csv(tmp_path):
keywords = "butterfly"
with DDGS() as ddgs:
results = ddgs.text(keywords, max_results=30)
assert 27 <= len(results) <= 30
assert 23 <= len(results) <= 30

temp_file = tmp_path / f"{keywords}.csv"
_save_csv(temp_file, results)
Expand All @@ -81,7 +81,7 @@ def test_save_json(tmp_path):
keywords = "chicago"
with DDGS() as ddgs:
results = ddgs.text(keywords, max_results=30)
assert 27 <= len(results) <= 30
assert 23 <= len(results) <= 30

temp_file = tmp_path / f"{keywords}.json"
_save_json(temp_file, results)
Expand All @@ -94,7 +94,7 @@ def test_text_download():
results = ddgs.text(keywords, max_results=8)
assert 7 <= len(results) <= 8

_download_results(keywords, results)
_download_results(keywords, results, function_name="text")

# delete files contains keyword in name
files = False
Expand All @@ -121,7 +121,7 @@ def test_images_download():
results = ddgs.images(keywords, max_results=8)
assert len(results) >= 8

_download_results(keywords, results, images=True)
_download_results(keywords, results, function_name="images")

# delete files contains keyword in name
files = False
Expand Down
Loading