forked from run-llama/llama_index
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
move pandas to experimental (run-llama#12419)
- Loading branch information
1 parent
d96c966
commit 047576b
Showing
26 changed files
with
614 additions
and
364 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
::: llama_index.core.query_engine | ||
::: llama_index.experimental.query_engine | ||
options: | ||
members: | ||
- PandasQueryEngine |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
84 changes: 13 additions & 71 deletions
84
llama-index-core/llama_index/core/indices/struct_store/pandas.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,83 +1,25 @@ | ||
"""Pandas csv structured store.""" | ||
"""Pandas csv structured store. | ||
import logging | ||
from typing import Any, Optional, Sequence | ||
DEPRECATED: Please use :class:`PandasQueryEngine` in `llama-index-experimental` instead. | ||
""" | ||
|
||
import pandas as pd | ||
from llama_index.core.base.base_query_engine import BaseQueryEngine | ||
from llama_index.core.base.base_retriever import BaseRetriever | ||
from llama_index.core.data_structs.table import PandasStructTable | ||
from llama_index.core.indices.struct_store.base import BaseStructStoreIndex | ||
from llama_index.core.llms.utils import LLMType | ||
from llama_index.core.schema import BaseNode | ||
from typing import Any | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class PandasIndex(BaseStructStoreIndex[PandasStructTable]): | ||
"""Pandas Index. | ||
Deprecated. Please use :class:`PandasQueryEngine` instead. | ||
The PandasIndex is an index that stores | ||
a Pandas dataframe under the hood. | ||
Currently index "construction" is not supported. | ||
During query time, the user can either specify a raw SQL query | ||
or a natural language query to retrieve their data. | ||
Args: | ||
pandas_df (Optional[pd.DataFrame]): Pandas dataframe to use. | ||
See :ref:`Ref-Struct-Store` for more details. | ||
""" | ||
|
||
index_struct_cls = PandasStructTable | ||
|
||
class PandasIndex: | ||
def __init__( | ||
self, | ||
df: pd.DataFrame, | ||
nodes: Optional[Sequence[BaseNode]] = None, | ||
index_struct: Optional[PandasStructTable] = None, | ||
*args: Any, | ||
**kwargs: Any, | ||
) -> None: | ||
"""Initialize params.""" | ||
logger.warning( | ||
"PandasIndex is deprecated. \ | ||
Please directly use `PandasQueryEngine(df)` instead." | ||
raise DeprecationWarning( | ||
"PandasQueryEngine has been moved to `llama-index-experimental`.\n" | ||
"`pip install llama-index-experimental`\n" | ||
"`from llama_index.experimental.query_engine import PandasQueryEngine`\n" | ||
"Note that the PandasQueryEngine allows for arbitrary code execution, \n" | ||
"and should be used in a secure environment." | ||
) | ||
|
||
if nodes is not None: | ||
raise ValueError("We currently do not support indexing documents or nodes.") | ||
self.df = df | ||
|
||
super().__init__( | ||
nodes=[], | ||
index_struct=index_struct, | ||
**kwargs, | ||
) | ||
|
||
def as_retriever(self, **kwargs: Any) -> BaseRetriever: | ||
raise NotImplementedError("Not supported") | ||
|
||
def as_query_engine( | ||
self, llm: Optional[LLMType] = None, **kwargs: Any | ||
) -> BaseQueryEngine: | ||
# NOTE: lazy import | ||
from llama_index.core.query_engine.pandas.pandas_query_engine import ( | ||
PandasQueryEngine, | ||
) | ||
|
||
return PandasQueryEngine.from_index(self, llm=llm, **kwargs) | ||
|
||
def _build_index_from_nodes(self, nodes: Sequence[BaseNode]) -> PandasStructTable: | ||
"""Build index from documents.""" | ||
return self.index_struct_cls() | ||
|
||
def _insert(self, nodes: Sequence[BaseNode], **insert_kwargs: Any) -> None: | ||
"""Insert a document.""" | ||
raise NotImplementedError("We currently do not support inserting documents.") | ||
|
||
|
||
# legacy | ||
# Legacy | ||
GPTPandasIndex = PandasIndex |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
91 changes: 14 additions & 77 deletions
91
llama-index-core/llama_index/core/query_engine/pandas/output_parser.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,86 +1,23 @@ | ||
"""Pandas output parser.""" | ||
"""Pandas output parser. | ||
import logging | ||
from typing import Any, Dict, Optional | ||
DEPRECATED: This class has been moved to `llama-index-experimental`. | ||
""" | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from llama_index.core.exec_utils import safe_eval, safe_exec | ||
from llama_index.core.output_parsers.base import ChainableOutputParser | ||
from llama_index.core.output_parsers.utils import parse_code_markdown | ||
from typing import Any | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def default_output_processor( | ||
output: str, df: pd.DataFrame, **output_kwargs: Any | ||
) -> str: | ||
"""Process outputs in a default manner.""" | ||
import ast | ||
import sys | ||
import traceback | ||
|
||
if sys.version_info < (3, 9): | ||
logger.warning( | ||
"Python version must be >= 3.9 in order to use " | ||
"the default output processor, which executes " | ||
"the Python query. Instead, we will return the " | ||
"raw Python instructions as a string." | ||
) | ||
return output | ||
|
||
local_vars = {"df": df} | ||
global_vars = {"np": np, "pd": pd} | ||
|
||
output = parse_code_markdown(output, only_last=True)[0] | ||
|
||
# NOTE: inspired from langchain's tool | ||
# see langchain.tools.python.tool (PythonAstREPLTool) | ||
try: | ||
tree = ast.parse(output) | ||
module = ast.Module(tree.body[:-1], type_ignores=[]) | ||
safe_exec(ast.unparse(module), {}, local_vars) # type: ignore | ||
module_end = ast.Module(tree.body[-1:], type_ignores=[]) | ||
module_end_str = ast.unparse(module_end) # type: ignore | ||
if module_end_str.strip("'\"") != module_end_str: | ||
# if there's leading/trailing quotes, then we need to eval | ||
# string to get the actual expression | ||
module_end_str = safe_eval(module_end_str, global_vars, local_vars) | ||
try: | ||
# str(pd.dataframe) will truncate output by display.max_colwidth | ||
# set width temporarily to extract more text | ||
if "max_colwidth" in output_kwargs: | ||
pd.set_option("display.max_colwidth", output_kwargs["max_colwidth"]) | ||
output_str = str(safe_eval(module_end_str, global_vars, local_vars)) | ||
pd.reset_option("display.max_colwidth") | ||
return output_str | ||
|
||
except Exception: | ||
raise | ||
except Exception as e: | ||
err_string = ( | ||
"There was an error running the output as Python code. " | ||
f"Error message: {e}" | ||
) | ||
traceback.print_exc() | ||
return err_string | ||
|
||
|
||
class PandasInstructionParser(ChainableOutputParser): | ||
class PandasInstructionParser: | ||
"""Pandas instruction parser. | ||
This 'output parser' takes in pandas instructions (in Python code) and | ||
executes them to return an output. | ||
DEPRECATED: This class has been moved to `llama-index-experimental`. | ||
""" | ||
|
||
def __init__( | ||
self, df: pd.DataFrame, output_kwargs: Optional[Dict[str, Any]] = None | ||
) -> None: | ||
"""Initialize params.""" | ||
self.df = df | ||
self.output_kwargs = output_kwargs or {} | ||
|
||
def parse(self, output: str) -> Any: | ||
"""Parse, validate, and correct errors programmatically.""" | ||
return default_output_processor(output, self.df, **self.output_kwargs) | ||
def __init__(self, *args: Any, **kwargs: Any) -> None: | ||
raise DeprecationWarning( | ||
"PandasInstructionParser has been moved to `llama-index-experimental`.\n" | ||
"`pip install llama-index-experimental`\n" | ||
"`from llama_index.experimental.query_engine.pandas import PandasInstructionParser`\n" | ||
"Note that the PandasInstructionParser allows for arbitrary code execution, \n" | ||
"and should be used in a secure environment." | ||
) |
Oops, something went wrong.