Skip to content

Commit

Permalink
Protection
Browse files Browse the repository at this point in the history
  • Loading branch information
pseudotensor committed Oct 31, 2024
1 parent f3ce8a2 commit d6fa55b
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 9 deletions.
20 changes: 12 additions & 8 deletions src/gpt_langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -4815,14 +4815,18 @@ def file_to_doc(file,
docs1.extend(docs1a)
if len(docs1) == 0 and have_playwright or do_playwright:
# then something went wrong, try another loader:
from langchain_community.document_loaders import PlaywrightURLLoader
docs1a = asyncio.run(PlaywrightURLLoader(urls=final_urls).aload())
# docs1 = PlaywrightURLLoader(urls=[file]).load()
docs1a = [x for x in docs1a if
x.page_content and x.page_content != '403 Forbidden' and not x.page_content.startswith(
'Access Denied')]
add_parser(docs1a, 'PlaywrightURLLoader')
docs1.extend(docs1a)
try:
from langchain_community.document_loaders import PlaywrightURLLoader
docs1a = asyncio.run(PlaywrightURLLoader(urls=final_urls).aload())
# docs1 = PlaywrightURLLoader(urls=[file]).load()
docs1a = [x for x in docs1a if
x.page_content and x.page_content != '403 Forbidden' and not x.page_content.startswith(
'Access Denied')]
add_parser(docs1a, 'PlaywrightURLLoader')
docs1.extend(docs1a)
except Exception as e0:
traceback.print_exc()
print("playwright failed: %s: %s" % (str(e0), traceback.print_exception(e0)), flush=True)
if len(docs1) == 0 and have_selenium or do_selenium:
# then something went wrong, try another loader:
# but requires Chrome binary, else get: selenium.common.exceptions.WebDriverException:
Expand Down
2 changes: 1 addition & 1 deletion src/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "f5a3cf5b09f5845a7177d1fff1c97b5267804202"
__version__ = "f3ce8a2491b1387b727280424a61680be896013b"

0 comments on commit d6fa55b

Please sign in to comment.