From 74c391586280b55c35d66c697167122d72c13386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Fri, 29 Mar 2024 18:28:16 -0500 Subject: [PATCH 1/4] docs: add "Supported pandas APIs" reference to the documentation (#542) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) 🦕 --- .kokoro/release-nightly.sh | 1 + docs/index.rst | 1 + docs/supported_pandas_apis.rst | 62 +++++++ docs/supported_pandas_apis/.gitignore | 1 + docs/templates/toc.yml | 2 + noxfile.py | 12 ++ scripts/publish_api_coverage.py | 222 ++++++++++++++++++++++++-- scripts/test_publish_api_coverage.py | 2 + 8 files changed, 291 insertions(+), 12 deletions(-) create mode 100644 docs/supported_pandas_apis.rst create mode 100644 docs/supported_pandas_apis/.gitignore diff --git a/.kokoro/release-nightly.sh b/.kokoro/release-nightly.sh index 5624df3b8d..7da0881bbe 100755 --- a/.kokoro/release-nightly.sh +++ b/.kokoro/release-nightly.sh @@ -106,6 +106,7 @@ for gcs_path in gs://vertex_sdk_private_releases/bigframe/ \ # write access to COVERAGE_TABLE=bigframes-metrics.coverage_report.bigframes_coverage_nightly python3.10 scripts/publish_api_coverage.py \ + bigquery \ --bigframes_version=$BIGFRAMES_VERSION \ --release_version=$RELEASE_VERSION \ --bigquery_table=$COVERAGE_TABLE diff --git a/docs/index.rst b/docs/index.rst index d239ea3a78..b17ac7cbd9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,6 +7,7 @@ API reference :maxdepth: 3 reference/index + supported_pandas_apis Changelog --------- diff --git a/docs/supported_pandas_apis.rst b/docs/supported_pandas_apis.rst new file mode 100644 index 0000000000..f4b57f05d1 --- /dev/null +++ b/docs/supported_pandas_apis.rst @@ -0,0 +1,62 @@ +Supported pandas APIs +===================== + +The following tables show the pandas APIs that have been implemented (or not) +in BigQuery DataFrames. + +* 'Y' means it implements all parameters. +* 'P' means it implements only some parameters. + +DataFrame +--------- + +.. raw:: html + :file: supported_pandas_apis/bf_dataframe.html + +DataFrameGroupBy +---------------- + +.. raw:: html + :file: supported_pandas_apis/bf_dataframegroupby.html + +Index +----- + +.. raw:: html + :file: supported_pandas_apis/bf_index.html + +pandas module +------------- + +.. raw:: html + :file: supported_pandas_apis/bf_pandas.html + +Series +------ + +.. raw:: html + :file: supported_pandas_apis/bf_series.html + +Series.dt methods +----------------- + +.. raw:: html + :file: supported_pandas_apis/bf_datetimemethods.html + +Series.str methods +------------------ + +.. raw:: html + :file: supported_pandas_apis/bf_stringmethods.html + +SeriesGroupBy +------------- + +.. raw:: html + :file: supported_pandas_apis/bf_seriesgroupby.html + +Window +------ + +.. raw:: html + :file: supported_pandas_apis/bf_window.html diff --git a/docs/supported_pandas_apis/.gitignore b/docs/supported_pandas_apis/.gitignore new file mode 100644 index 0000000000..2d19fc766d --- /dev/null +++ b/docs/supported_pandas_apis/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/docs/templates/toc.yml b/docs/templates/toc.yml index c07e6141f1..57b0522d04 100644 --- a/docs/templates/toc.yml +++ b/docs/templates/toc.yml @@ -72,6 +72,8 @@ name: Series - name: Window uid: bigframes.core.window.Window + - href: supported_pandas_apis.html + name: Supported pandas APIs name: bigframes.pandas - items: - items: diff --git a/noxfile.py b/noxfile.py index a5e77964f1..4ac3a81723 100644 --- a/noxfile.py +++ b/noxfile.py @@ -467,6 +467,12 @@ def docs(session): ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) + + session.run( + "python", + "scripts/publish_api_coverage.py", + "docs", + ) session.run( "sphinx-build", "-W", # warnings as errors @@ -503,6 +509,12 @@ def docfx(session): ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) + + session.run( + "python", + "scripts/publish_api_coverage.py", + "docs", + ) session.run( "sphinx-build", "-T", # show full traceback on exception diff --git a/scripts/publish_api_coverage.py b/scripts/publish_api_coverage.py index 856307e440..4a35ade9ef 100644 --- a/scripts/publish_api_coverage.py +++ b/scripts/publish_api_coverage.py @@ -17,27 +17,110 @@ import argparse import inspect +import pathlib +import sys import pandas as pd +import pandas.core.groupby +import pandas.core.indexes.accessors +import pandas.core.strings.accessor +import pandas.core.window.rolling +import bigframes +import bigframes.core.groupby +import bigframes.core.window +import bigframes.operations.datetimes import bigframes.pandas as bpd +REPO_ROOT = pathlib.Path(__file__).parent.parent + +URL_PREFIX = { + "pandas": ( + "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.pandas#bigframes_pandas_" + ), + "dataframe": ( + "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.dataframe.DataFrame#bigframes_dataframe_DataFrame_" + ), + "dataframegroupby": ( + "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.core.groupby.DataFrameGroupBy#bigframes_core_groupby_DataFrameGroupBy_" + ), + "series": ( + "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.series.Series#bigframes_series_Series_" + ), + "seriesgroupby": ( + "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.core.groupby.SeriesGroupBy#bigframes_core_groupby_SeriesGroupBy_" + ), + "datetimemethods": ( + "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.operations.datetimes.DatetimeMethods#bigframes_operations_datetimes_DatetimeMethods_" + ), + "stringmethods": ( + "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.operations.strings.StringMethods#bigframes_operations_strings_StringMethods_" + ), + "window": ( + "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.core.window.Window#bigframes_core_window_Window_" + ), + # TODO: Index not documented. +} + + +PANDAS_TARGETS = [ + ("pandas", pd, bpd), + ("dataframe", pd.DataFrame, bpd.DataFrame), + ( + "dataframegroupby", + pandas.core.groupby.DataFrameGroupBy, + bigframes.core.groupby.DataFrameGroupBy, + ), + ("series", pd.Series, bpd.Series), + ( + "seriesgroupby", + pandas.core.groupby.DataFrameGroupBy, + bigframes.core.groupby.DataFrameGroupBy, + ), + ( + "datetimemethods", + pandas.core.indexes.accessors.CombinedDatetimelikeProperties, + bigframes.operations.datetimes.DatetimeMethods, + ), + ( + "stringmethods", + pandas.core.strings.accessor.StringMethods, + bigframes.operations.strings.StringMethods, + ), + ( + "window", + pandas.core.window.rolling.Rolling, + bigframes.core.window.Window, + ), + ("index", pd.Index, bpd.Index), +] + + +def names_from_signature(signature): + """Extract the names of parameters from signature + + See: https://docs.python.org/3/library/inspect.html#inspect.signature + """ + return frozenset({parameter for parameter in signature.parameters}) + + +def calculate_missing_parameters(bigframes_function, target_function): + bigframes_params = names_from_signature(inspect.signature(bigframes_function)) + target_params = names_from_signature(inspect.signature(target_function)) + return target_params - bigframes_params + def generate_pandas_api_coverage(): """Inspect all our pandas objects, and compare with the real pandas objects, to see which methods we implement. For each, generate a regex that can be used to check if its present in a notebook""" - header = ["api", "pattern", "kind", "is_in_bigframes"] + header = ["api", "pattern", "kind", "is_in_bigframes", "missing_parameters"] api_patterns = [] - targets = [ - ("pandas", pd, bpd), - ("dataframe", pd.DataFrame, bpd.DataFrame), - ("series", pd.Series, bpd.Series), - ("index", pd.Index, bpd.Index), - ] indexers = ["loc", "iloc", "iat", "ix", "at"] - for name, pandas_obj, bigframes_obj in targets: + for name, pandas_obj, bigframes_obj in PANDAS_TARGETS: for member in dir(pandas_obj): + missing_parameters = "" + # skip private functions and properties if member[0] == "_" and member[1] != "_": continue @@ -50,6 +133,17 @@ def generate_pandas_api_coverage(): # Function, match .member( token = f"\\.{member}\\(" token_type = "function" + + if hasattr(bigframes_obj, member): + bigframes_function = getattr(bigframes_obj, member) + pandas_function = getattr(pandas_obj, member) + missing_parameters = ", ".join( + sorted( + calculate_missing_parameters( + bigframes_function, pandas_function + ) + ) + ) elif member in indexers: # Indexer, match .indexer[ token = f"\\.{member}\\[" @@ -62,7 +156,13 @@ def generate_pandas_api_coverage(): is_in_bigframes = hasattr(bigframes_obj, member) api_patterns.append( - [f"{name}.{member}", token, token_type, is_in_bigframes] + [ + f"{name}.{member}", + token, + token_type, + is_in_bigframes, + missing_parameters, + ] ) return pd.DataFrame(api_patterns, columns=header) @@ -165,14 +265,112 @@ def build_api_coverage_table(bigframes_version: str, release_version: str): return combined_df.infer_objects().convert_dtypes() +def format_api(api_names, is_in_bigframes, api_prefix): + api_names = api_names.str.slice(start=len(f"{api_prefix}.")) + formatted = "" + api_names + "" + url_prefix = URL_PREFIX.get(api_prefix) + if url_prefix is None: + return formatted + + linked = '' + formatted + "" + return formatted.mask(is_in_bigframes, linked) + + +def generate_api_coverage(df, api_prefix): + dataframe_apis = df.loc[df["api"].str.startswith(f"{api_prefix}.")] + fully_implemented = ( + dataframe_apis["missing_parameters"].str.len() == 0 + ) & dataframe_apis["is_in_bigframes"] + partial_implemented = ( + dataframe_apis["missing_parameters"].str.len() != 0 + ) & dataframe_apis["is_in_bigframes"] + not_implemented = ~dataframe_apis["is_in_bigframes"] + dataframe_table = pd.DataFrame( + { + "API": format_api( + dataframe_apis["api"], + dataframe_apis["is_in_bigframes"], + api_prefix, + ), + "Implemented": "", + "Missing parameters": dataframe_apis["missing_parameters"], + } + ) + dataframe_table.loc[fully_implemented, "Implemented"] = "Y" + dataframe_table.loc[partial_implemented, "Implemented"] = "P" + dataframe_table.loc[not_implemented, "Implemented"] = "N" + return dataframe_table + + +def generate_api_coverage_doc(df, api_prefix): + dataframe_table = generate_api_coverage(df, api_prefix) + dataframe_table = dataframe_table.loc[~(dataframe_table["Implemented"] == "N")] + dataframe_table["Implemented"] = dataframe_table["Implemented"].map( + { + "Y": "Y", + "P": "P", + } + ) + + with open( + REPO_ROOT / "docs" / "supported_pandas_apis" / f"bf_{api_prefix}.html", + "w", + ) as html_file: + dataframe_table.to_html( + html_file, index=False, header=True, escape=False, border=0, col_space="8em" + ) + + +def generate_api_coverage_docs(df): + for target in PANDAS_TARGETS: + api_prefix = target[0] + generate_api_coverage_doc(df, api_prefix) + + +def print_api_coverage_summary(df, api_prefix): + dataframe_table = generate_api_coverage(df, api_prefix) + + print(api_prefix) + print(dataframe_table[["Implemented", "API"]].groupby(["Implemented"]).count()) + print(f"{api_prefix} APIs: {dataframe_table.shape[0]}\n") + + +def print_api_coverage_summaries(df): + for target in PANDAS_TARGETS: + api_prefix = target[0] + print_api_coverage_summary(df, api_prefix) + + print(f"\nAll APIs: {len(df.index)}") + fully_implemented = (df["missing_parameters"].str.len() == 0) & df[ + "is_in_bigframes" + ] + print(f"Y: {fully_implemented.sum()}") + partial_implemented = (df["missing_parameters"].str.len() != 0) & df[ + "is_in_bigframes" + ] + print(f"P: {partial_implemented.sum()}") + not_implemented = ~df["is_in_bigframes"] + print(f"N: {not_implemented.sum()}") + + def main(): parser = argparse.ArgumentParser() - parser.add_argument("--bigframes_version") - parser.add_argument("--release_version") + parser.add_argument("output_type") + parser.add_argument("--bigframes_version", default=bigframes.__version__) + parser.add_argument("--release_version", default="") parser.add_argument("--bigquery_table_name") args = parser.parse_args() df = build_api_coverage_table(args.bigframes_version, args.release_version) - df.to_gbq(args.bigquery_table_name, if_exists="append") + + if args.output_type == "bigquery": + df.to_gbq(args.bigquery_table_name, if_exists="append") + elif args.output_type == "docs": + generate_api_coverage_docs(df) + elif args.output_type == "summary": + print_api_coverage_summaries(df) + else: + print(f"Unexpected output_type {repr(args.output_type)}") + sys.exit(1) if __name__ == "__main__": diff --git a/scripts/test_publish_api_coverage.py b/scripts/test_publish_api_coverage.py index 96b2d1bb48..061cc1c25c 100644 --- a/scripts/test_publish_api_coverage.py +++ b/scripts/test_publish_api_coverage.py @@ -27,6 +27,7 @@ def test_api_coverage_produces_expected_schema(): "string", "boolean", "string", + "string", "datetime64[ns]", "string", "string", @@ -36,6 +37,7 @@ def test_api_coverage_produces_expected_schema(): "pattern", "kind", "is_in_bigframes", + "missing_parameters", "module", "timestamp", "bigframes_version", From 347f2dda2298e17cd44a298f04a723f2d20c080a Mon Sep 17 00:00:00 2001 From: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com> Date: Fri, 29 Mar 2024 17:08:16 -0700 Subject: [PATCH 2/4] fix: sync the notebook with embedding changes (#550) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- .../bq_dataframes_llm_kmeans.ipynb | 1074 +++++++++-------- 1 file changed, 547 insertions(+), 527 deletions(-) diff --git a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb index 61445d85c5..2c6d109ba8 100644 --- a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb +++ b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb @@ -289,7 +289,7 @@ { "data": { "text/html": [ - "Query job d5778724-6966-42ba-b8a6-2a1865a1184c is DONE. 2.3 GB processed. Open Job" + "Query job 030e5d08-f690-47e4-b7cc-342731245575 is DONE. 2.3 GB processed. Open Job" ], "text/plain": [ "" @@ -301,7 +301,7 @@ { "data": { "text/html": [ - "Query job 4d48bf69-571c-4773-8486-0232840597d5 is DONE. 55.1 MB processed. Open Job" + "Query job a9c5f416-c5d2-4209-b639-bccb81a25d7e is DONE. 58.8 MB processed. Open Job" ], "text/plain": [ "" @@ -336,36 +336,36 @@ " \n", " \n", " \n", - " 24\n", - " I sent disputed to Transunion, XXXX and XXXX f...\n", + " 1053364\n", + " My Macy 's American Express account was taken ...\n", " \n", " \n", - " 942\n", - " on XX/XX/2017 I sent XXXX, transunion, XXXX pr...\n", + " 1053757\n", + " I am a victim of identity theft. The informati...\n", " \n", " \n", - " 1193\n", - " On Wednesday, XXXX XXXX , I initiated a wir...\n", + " 1053784\n", + " In XXXX 2016, Amex took out $ XXXX.+ unauthori...\n", " \n", " \n", - " 1292\n", - " Dear Sir or Madam, I am a victim of identity t...\n", + " 1054237\n", + " I am not for sure the exact date of my loan it...\n", " \n", " \n", - " 1377\n", - " For the purpose of this complaint, I will refe...\n", + " 1054244\n", + " I entered a consumer credit transaction with t...\n", " \n", " \n", "\n", "" ], "text/plain": [ - " consumer_complaint_narrative\n", - "24 I sent disputed to Transunion, XXXX and XXXX f...\n", - "942 on XX/XX/2017 I sent XXXX, transunion, XXXX pr...\n", - "1193 On Wednesday, XXXX XXXX , I initiated a wir...\n", - "1292 Dear Sir or Madam, I am a victim of identity t...\n", - "1377 For the purpose of this complaint, I will refe..." + " consumer_complaint_narrative\n", + "1053364 My Macy 's American Express account was taken ...\n", + "1053757 I am a victim of identity theft. The informati...\n", + "1053784 In XXXX 2016, Amex took out $ XXXX.+ unauthori...\n", + "1054237 I am not for sure the exact date of my loan it...\n", + "1054244 I entered a consumer credit transaction with t..." ] }, "execution_count": 7, @@ -418,7 +418,7 @@ { "data": { "text/html": [ - "Query job 15b352c2-783c-42b1-bc03-e5772f00381a is DONE. 0 Bytes processed. Open Job" + "Query job 77eee871-31eb-4939-a015-f5505c94786e is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -444,7 +444,7 @@ { "data": { "text/html": [ - "Query job e2152e81-b736-4a68-a25a-c5eb2b03d734 is DONE. 1.3 GB processed. Open Job" + "Query job 63cdd004-21b6-41bf-8876-aa646f1f268e is DONE. 1.3 GB processed. Open Job" ], "text/plain": [ "" @@ -456,7 +456,7 @@ { "data": { "text/html": [ - "Query job b1a3d20b-aee3-424c-a0c5-5b36f1177709 is DONE. 80.0 kB processed. Open Job" + "Query job cda12546-9931-48f6-8b22-74a9ab85fa28 is DONE. 80.0 kB processed. Open Job" ], "text/plain": [ "" @@ -468,7 +468,7 @@ { "data": { "text/html": [ - "Query job 6b2fad50-cbc8-42ea-83c1-b5d3eaac10b9 is DONE. 20.0 kB processed. Open Job" + "Query job 759a13c5-c02f-4ae8-9b22-d7ef423ffe8d is DONE. 20.0 kB processed. Open Job" ], "text/plain": [ "" @@ -480,19 +480,7 @@ { "data": { "text/html": [ - "Query job 31896ae6-fbb5-42fb-98c4-13bd19d1adfa is DONE. 0 Bytes processed. Open Job" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Query job 43f04543-f59b-4f1b-8598-c529324904be is DONE. 72.1 MB processed. Open Job" + "Query job 1bad8ef3-8103-4a98-bec4-699d97673b9a is DONE. 72.0 MB processed. Open Job" ], "text/plain": [ "" @@ -522,187 +510,188 @@ " \n", " \n", " \n", - " text_embedding\n", - " statistics\n", - " ml_embed_text_status\n", + " ml_generate_embedding_result\n", + " ml_generate_embedding_statistics\n", + " ml_generate_embedding_status\n", " content\n", " \n", " \n", " \n", " \n", - " 545\n", - " [ 1.82510037e-02 -1.27867460e-02 -1.57095697e-...\n", - " {\"token_count\":178,\"truncated\":false}\n", + " 357\n", + " [ 1.33585772e-02 -3.76597494e-02 -6.14452176e-...\n", + " {\"token_count\":306,\"truncated\":false}\n", " \n", - " My payments have been approximately {$89.00} w...\n", + " I decided to try XXXX services for my wife and...\n", " \n", " \n", - " 614\n", - " [ 5.40032536e-02 -5.28502129e-02 -5.33268750e-...\n", - " {\"token_count\":399,\"truncated\":false}\n", + " 428\n", + " [ 3.10036819e-04 -3.82593311e-02 -3.41922641e-...\n", + " {\"token_count\":134,\"truncated\":false}\n", " \n", - " Hi, I have contacted Trans Union XXXX XXXX abo...\n", + " XXXX I went to the bank in question ( XXXX XXX...\n", " \n", " \n", - " 1236\n", - " [-5.32836001e-03 -5.84292673e-02 -5.86670786e-...\n", - " {\"token_count\":129,\"truncated\":false}\n", + " 1319\n", + " [ 1.97481886e-02 -1.97448786e-02 -5.13443351e-...\n", + " {\"token_count\":215,\"truncated\":false}\n", " \n", - " I have a XXXX XXXX XXXX credit card on my Exp...\n", + " I currently have a home loan with my ex husban...\n", " \n", " \n", - " 1477\n", - " [ 3.02605387e-02 -4.37121317e-02 -2.70802993e-...\n", - " {\"token_count\":16,\"truncated\":false}\n", + " 1993\n", + " [ 9.83821880e-03 -6.55664057e-02 -5.46210706e-...\n", + " {\"token_count\":536,\"truncated\":false}\n", " \n", - " Wrongs information, selling my information to ...\n", + " NOT MY ACCOUNT, NOT AN AUTHORIZED USER {$1800....\n", " \n", " \n", - " 2261\n", - " [ 2.35723313e-02 -3.73509154e-02 -6.44604117e-...\n", - " {\"token_count\":33,\"truncated\":false}\n", + " 1997\n", + " [ 0.03145148 -0.01011822 -0.02316323 -0.025078...\n", + " {\"token_count\":123,\"truncated\":false}\n", " \n", - " Please investigate and delete disputed item th...\n", + " After a while the payments became harder and h...\n", " \n", " \n", - " 2361\n", - " [ 1.04440488e-02 -9.37070698e-03 -7.36323372e-...\n", - " {\"token_count\":45,\"truncated\":false}\n", + " 2469\n", + " [ 4.74590808e-03 -4.56819348e-02 -2.49751769e-...\n", + " {\"token_count\":60,\"truncated\":false}\n", " \n", - " By the provisions of the Fair Credit Reporting...\n", + " In the course of my student loan, I have been ...\n", " \n", " \n", - " 2378\n", - " [ 3.04989032e-02 -4.08191867e-02 -6.18648790e-...\n", - " {\"token_count\":892,\"truncated\":false}\n", + " 2624\n", + " [ 3.91883589e-03 -3.26644145e-02 -7.10378587e-...\n", + " {\"token_count\":254,\"truncated\":false}\n", " \n", - " Since XX/XX/XXXX I have been trying to dispute...\n", + " In accordance with the Fair Credit Reporting A...\n", " \n", " \n", - " 3133\n", - " [ 0.00152804 -0.04189068 -0.04220504 -0.053740...\n", - " {\"token_count\":90,\"truncated\":false}\n", + " 2832\n", + " [ 8.35181400e-03 -2.91643552e-02 -4.30776961e-...\n", + " {\"token_count\":79,\"truncated\":false}\n", " \n", - " Out of the blue I received a debt collection n...\n", + " LVNV FUNDING LLC is continually placing a coll...\n", " \n", " \n", - " 3140\n", - " [ 3.11435573e-02 -4.44000624e-02 -2.10917685e-...\n", - " {\"token_count\":372,\"truncated\":false}\n", + " 3328\n", + " [ 2.71253809e-02 -1.77491009e-02 -5.32273464e-...\n", + " {\"token_count\":156,\"truncated\":false}\n", " \n", - " My wife and I have been sending money to XXXX ...\n", + " On XX/XX/2020 I sent a letter regarding inaccu...\n", " \n", " \n", - " 3322\n", - " [ 2.75927987e-02 -6.23729872e-03 -3.83295454e-...\n", - " {\"token_count\":36,\"truncated\":false}\n", + " 3650\n", + " [-6.10093866e-03 -5.93599863e-02 -8.04531425e-...\n", + " {\"token_count\":175,\"truncated\":false}\n", " \n", - " Phone calls from Convergent Outsourcing XXXX. ...\n", + " Over a year and a half ago we started the proc...\n", " \n", " \n", - " 3583\n", - " [ 9.20385588e-03 -3.83387171e-02 -6.46291822e-...\n", - " {\"token_count\":52,\"truncated\":false}\n", + " 3860\n", + " [ 5.84836192e-02 -2.43354496e-03 -5.57337068e-...\n", + " {\"token_count\":1267,\"truncated\":false}\n", " \n", - " I recently received a copy of my credit report...\n", + " The issue is 26 late payments on me and my wif...\n", " \n", " \n", - " 4134\n", - " [-7.04960374e-04 -3.52595337e-02 -1.65264793e-...\n", - " {\"token_count\":412,\"truncated\":false}\n", + " 4464\n", + " [ 6.05084226e-02 -3.21578234e-02 -7.51668587e-...\n", + " {\"token_count\":906,\"truncated\":false}\n", " \n", - " I have been sending the creditor what they hav...\n", + " I purchased as replacement for a lost XXXX XXX...\n", " \n", " \n", - " 4496\n", - " [ 3.67735326e-02 1.21120387e-03 -5.20942472e-...\n", - " {\"token_count\":182,\"truncated\":false}\n", + " 4470\n", + " [ 1.28689921e-02 -3.25881056e-02 -6.53645024e-...\n", + " {\"token_count\":200,\"truncated\":false}\n", " \n", - " This is my second complaint. Their response to...\n", + " in accordance with the Fair Credit Reporting a...\n", " \n", " \n", - " 5260\n", - " [ 2.07133405e-02 -1.69602726e-02 -5.07124476e-...\n", - " {\"token_count\":103,\"truncated\":false}\n", + " 4567\n", + " [-5.49167022e-03 -3.84587422e-02 -8.56091827e-...\n", + " {\"token_count\":110,\"truncated\":false}\n", " \n", - " XX/XX/XXXX and XX/XX/XXXX, {$3200.00} contacte...\n", + " I have submitted multiple disputes through the...\n", " \n", " \n", - " 5400\n", - " [ 1.44114876e-02 -2.34710164e-02 -6.58538565e-...\n", - " {\"token_count\":60,\"truncated\":false}\n", + " 4713\n", + " [ 2.68485844e-02 -3.46762352e-02 -4.59849052e-...\n", + " {\"token_count\":549,\"truncated\":false}\n", " \n", - " Upon checking my XXXX credit report I noticed ...\n", + " While shopping for furniture for my home I ope...\n", " \n", " \n", - " 5425\n", - " [ 3.10326386e-02 -2.19427086e-02 -6.56386837e-...\n", - " {\"token_count\":87,\"truncated\":false}\n", + " 5181\n", + " [ 2.05754172e-02 -3.83999050e-02 -9.29225236e-...\n", + " {\"token_count\":77,\"truncated\":false}\n", " \n", - " Follow up to previous complaint XXXX XXXX XXXX...\n", + " I had opened a Wells Fargo checking account wi...\n", " \n", " \n", - " 6014\n", - " [ 1.90773793e-02 -2.27493346e-02 -3.27166244e-...\n", - " {\"token_count\":175,\"truncated\":false}\n", + " 5511\n", + " [-0.00217485 -0.04031368 -0.06604777 -0.052006...\n", + " {\"token_count\":262,\"truncated\":false}\n", " \n", - " My new XXXX lease was over always paid on time...\n", + " I recently disputed ( see attached letter ) wi...\n", " \n", " \n", - " 8192\n", - " [ 0.01937891 -0.05466933 -0.06070872 -0.059028...\n", - " {\"token_count\":131,\"truncated\":false}\n", + " 5888\n", + " [-8.15972779e-03 -3.46563384e-02 -5.91776446e-...\n", + " {\"token_count\":176,\"truncated\":false}\n", " \n", - " I have no idea where this account cane from. B...\n", + " XXXX XXXX XXXX XXXX \n", + "I have disputed this acco...\n", " \n", " \n", - " 8240\n", - " [ 4.34123818e-03 -3.40953320e-02 -4.06381376e-...\n", - " {\"token_count\":87,\"truncated\":false}\n", + " 6299\n", + " [ 4.80043218e-02 -4.13420722e-02 -6.12363108e-...\n", + " {\"token_count\":151,\"truncated\":false}\n", " \n", - " I TIED TO BUY CAR AT XXXX, THEY GOT APPROVAL F...\n", + " XXXX ; XXXX and Transunion are reporting ( 30 ...\n", " \n", " \n", - " 8720\n", - " [ 0.03133732 -0.03972461 -0.00178199 -0.035876...\n", - " {\"token_count\":645,\"truncated\":false}\n", + " 7143\n", + " [ 4.39200476e-02 -3.04005221e-02 -3.47866341e-...\n", + " {\"token_count\":234,\"truncated\":false}\n", " \n", - " XXXX XXXX XXXX XXXX, NY XXXX XX/XX/XXXX Consum...\n", + " My Macys account is due on the first of every ...\n", " \n", " \n", - " 8914\n", - " [ 1.75969116e-02 -2.25022305e-02 -5.70390299e-...\n", - " {\"token_count\":180,\"truncated\":false}\n", + " 7219\n", + " [ 1.00224940e-02 -3.79302073e-03 -3.41785327e-...\n", + " {\"token_count\":26,\"truncated\":false}\n", " \n", - " On XX/XX/21 I sent a letter regarding inaccura...\n", + " Keep getting letters and calls from collection...\n", " \n", " \n", - " 10021\n", - " [ 5.02460636e-02 -5.25112189e-02 -4.12914790e-...\n", - " {\"token_count\":30,\"truncated\":false}\n", + " 7574\n", + " [-0.00149564 -0.06619431 -0.05084481 -0.048579...\n", + " {\"token_count\":129,\"truncated\":false}\n", " \n", - " XX/XX/XXXX and XX/XX/XXXX inaccurate informati...\n", + " On XXXX I was on the XXXX app and there was a ...\n", " \n", " \n", - " 10327\n", - " [-0.00979626 -0.04912931 -0.08654705 -0.021063...\n", - " {\"token_count\":194,\"truncated\":false}\n", + " 8759\n", + " [ 0.01501553 -0.03575936 -0.050562 -0.034884...\n", + " {\"token_count\":501,\"truncated\":false}\n", " \n", - " When I reviewed my credit report, I discovered...\n", + " Obviously I've been a victim of fraud, therefo...\n", " \n", " \n", - " 10345\n", - " [-0.04292191 -0.02636929 -0.06177032 -0.076520...\n", - " {\"token_count\":262,\"truncated\":false}\n", + " 9700\n", + " [ 1.01501048e-02 -2.80565154e-02 -4.05892394e-...\n", + " {\"token_count\":48,\"truncated\":false}\n", " \n", - " U.S. Bank sent two letters containing Visa Deb...\n", + " The following item have not been properly inve...\n", " \n", " \n", - " 10369\n", - " [ 2.16020197e-02 -5.62509745e-02 -5.93873672e-...\n", - " {\"token_count\":77,\"truncated\":false}\n", + " 9822\n", + " [ 2.95880195e-02 1.65440738e-02 -3.33247967e-...\n", + " {\"token_count\":2373,\"truncated\":true}\n", " \n", - " I requested from XXXX that they reverse the la...\n", + " During the housing market crash I went through...\n", " \n", " \n", "\n", @@ -710,86 +699,87 @@ "[10000 rows x 4 columns in total]" ], "text/plain": [ - " text_embedding \\\n", - "545 [ 1.82510037e-02 -1.27867460e-02 -1.57095697e-... \n", - "614 [ 5.40032536e-02 -5.28502129e-02 -5.33268750e-... \n", - "1236 [-5.32836001e-03 -5.84292673e-02 -5.86670786e-... \n", - "1477 [ 3.02605387e-02 -4.37121317e-02 -2.70802993e-... \n", - "2261 [ 2.35723313e-02 -3.73509154e-02 -6.44604117e-... \n", - "2361 [ 1.04440488e-02 -9.37070698e-03 -7.36323372e-... \n", - "2378 [ 3.04989032e-02 -4.08191867e-02 -6.18648790e-... \n", - "3133 [ 0.00152804 -0.04189068 -0.04220504 -0.053740... \n", - "3140 [ 3.11435573e-02 -4.44000624e-02 -2.10917685e-... \n", - "3322 [ 2.75927987e-02 -6.23729872e-03 -3.83295454e-... \n", - "3583 [ 9.20385588e-03 -3.83387171e-02 -6.46291822e-... \n", - "4134 [-7.04960374e-04 -3.52595337e-02 -1.65264793e-... \n", - "4496 [ 3.67735326e-02 1.21120387e-03 -5.20942472e-... \n", - "5260 [ 2.07133405e-02 -1.69602726e-02 -5.07124476e-... \n", - "5400 [ 1.44114876e-02 -2.34710164e-02 -6.58538565e-... \n", - "5425 [ 3.10326386e-02 -2.19427086e-02 -6.56386837e-... \n", - "6014 [ 1.90773793e-02 -2.27493346e-02 -3.27166244e-... \n", - "8192 [ 0.01937891 -0.05466933 -0.06070872 -0.059028... \n", - "8240 [ 4.34123818e-03 -3.40953320e-02 -4.06381376e-... \n", - "8720 [ 0.03133732 -0.03972461 -0.00178199 -0.035876... \n", - "8914 [ 1.75969116e-02 -2.25022305e-02 -5.70390299e-... \n", - "10021 [ 5.02460636e-02 -5.25112189e-02 -4.12914790e-... \n", - "10327 [-0.00979626 -0.04912931 -0.08654705 -0.021063... \n", - "10345 [-0.04292191 -0.02636929 -0.06177032 -0.076520... \n", - "10369 [ 2.16020197e-02 -5.62509745e-02 -5.93873672e-... \n", + " ml_generate_embedding_result \\\n", + "357 [ 1.33585772e-02 -3.76597494e-02 -6.14452176e-... \n", + "428 [ 3.10036819e-04 -3.82593311e-02 -3.41922641e-... \n", + "1319 [ 1.97481886e-02 -1.97448786e-02 -5.13443351e-... \n", + "1993 [ 9.83821880e-03 -6.55664057e-02 -5.46210706e-... \n", + "1997 [ 0.03145148 -0.01011822 -0.02316323 -0.025078... \n", + "2469 [ 4.74590808e-03 -4.56819348e-02 -2.49751769e-... \n", + "2624 [ 3.91883589e-03 -3.26644145e-02 -7.10378587e-... \n", + "2832 [ 8.35181400e-03 -2.91643552e-02 -4.30776961e-... \n", + "3328 [ 2.71253809e-02 -1.77491009e-02 -5.32273464e-... \n", + "3650 [-6.10093866e-03 -5.93599863e-02 -8.04531425e-... \n", + "3860 [ 5.84836192e-02 -2.43354496e-03 -5.57337068e-... \n", + "4464 [ 6.05084226e-02 -3.21578234e-02 -7.51668587e-... \n", + "4470 [ 1.28689921e-02 -3.25881056e-02 -6.53645024e-... \n", + "4567 [-5.49167022e-03 -3.84587422e-02 -8.56091827e-... \n", + "4713 [ 2.68485844e-02 -3.46762352e-02 -4.59849052e-... \n", + "5181 [ 2.05754172e-02 -3.83999050e-02 -9.29225236e-... \n", + "5511 [-0.00217485 -0.04031368 -0.06604777 -0.052006... \n", + "5888 [-8.15972779e-03 -3.46563384e-02 -5.91776446e-... \n", + "6299 [ 4.80043218e-02 -4.13420722e-02 -6.12363108e-... \n", + "7143 [ 4.39200476e-02 -3.04005221e-02 -3.47866341e-... \n", + "7219 [ 1.00224940e-02 -3.79302073e-03 -3.41785327e-... \n", + "7574 [-0.00149564 -0.06619431 -0.05084481 -0.048579... \n", + "8759 [ 0.01501553 -0.03575936 -0.050562 -0.034884... \n", + "9700 [ 1.01501048e-02 -2.80565154e-02 -4.05892394e-... \n", + "9822 [ 2.95880195e-02 1.65440738e-02 -3.33247967e-... \n", "\n", - " statistics ml_embed_text_status \\\n", - "545 {\"token_count\":178,\"truncated\":false} \n", - "614 {\"token_count\":399,\"truncated\":false} \n", - "1236 {\"token_count\":129,\"truncated\":false} \n", - "1477 {\"token_count\":16,\"truncated\":false} \n", - "2261 {\"token_count\":33,\"truncated\":false} \n", - "2361 {\"token_count\":45,\"truncated\":false} \n", - "2378 {\"token_count\":892,\"truncated\":false} \n", - "3133 {\"token_count\":90,\"truncated\":false} \n", - "3140 {\"token_count\":372,\"truncated\":false} \n", - "3322 {\"token_count\":36,\"truncated\":false} \n", - "3583 {\"token_count\":52,\"truncated\":false} \n", - "4134 {\"token_count\":412,\"truncated\":false} \n", - "4496 {\"token_count\":182,\"truncated\":false} \n", - "5260 {\"token_count\":103,\"truncated\":false} \n", - "5400 {\"token_count\":60,\"truncated\":false} \n", - "5425 {\"token_count\":87,\"truncated\":false} \n", - "6014 {\"token_count\":175,\"truncated\":false} \n", - "8192 {\"token_count\":131,\"truncated\":false} \n", - "8240 {\"token_count\":87,\"truncated\":false} \n", - "8720 {\"token_count\":645,\"truncated\":false} \n", - "8914 {\"token_count\":180,\"truncated\":false} \n", - "10021 {\"token_count\":30,\"truncated\":false} \n", - "10327 {\"token_count\":194,\"truncated\":false} \n", - "10345 {\"token_count\":262,\"truncated\":false} \n", - "10369 {\"token_count\":77,\"truncated\":false} \n", + " ml_generate_embedding_statistics ml_generate_embedding_status \\\n", + "357 {\"token_count\":306,\"truncated\":false} \n", + "428 {\"token_count\":134,\"truncated\":false} \n", + "1319 {\"token_count\":215,\"truncated\":false} \n", + "1993 {\"token_count\":536,\"truncated\":false} \n", + "1997 {\"token_count\":123,\"truncated\":false} \n", + "2469 {\"token_count\":60,\"truncated\":false} \n", + "2624 {\"token_count\":254,\"truncated\":false} \n", + "2832 {\"token_count\":79,\"truncated\":false} \n", + "3328 {\"token_count\":156,\"truncated\":false} \n", + "3650 {\"token_count\":175,\"truncated\":false} \n", + "3860 {\"token_count\":1267,\"truncated\":false} \n", + "4464 {\"token_count\":906,\"truncated\":false} \n", + "4470 {\"token_count\":200,\"truncated\":false} \n", + "4567 {\"token_count\":110,\"truncated\":false} \n", + "4713 {\"token_count\":549,\"truncated\":false} \n", + "5181 {\"token_count\":77,\"truncated\":false} \n", + "5511 {\"token_count\":262,\"truncated\":false} \n", + "5888 {\"token_count\":176,\"truncated\":false} \n", + "6299 {\"token_count\":151,\"truncated\":false} \n", + "7143 {\"token_count\":234,\"truncated\":false} \n", + "7219 {\"token_count\":26,\"truncated\":false} \n", + "7574 {\"token_count\":129,\"truncated\":false} \n", + "8759 {\"token_count\":501,\"truncated\":false} \n", + "9700 {\"token_count\":48,\"truncated\":false} \n", + "9822 {\"token_count\":2373,\"truncated\":true} \n", "\n", - " content \n", - "545 My payments have been approximately {$89.00} w... \n", - "614 Hi, I have contacted Trans Union XXXX XXXX abo... \n", - "1236 I have a XXXX XXXX XXXX credit card on my Exp... \n", - "1477 Wrongs information, selling my information to ... \n", - "2261 Please investigate and delete disputed item th... \n", - "2361 By the provisions of the Fair Credit Reporting... \n", - "2378 Since XX/XX/XXXX I have been trying to dispute... \n", - "3133 Out of the blue I received a debt collection n... \n", - "3140 My wife and I have been sending money to XXXX ... \n", - "3322 Phone calls from Convergent Outsourcing XXXX. ... \n", - "3583 I recently received a copy of my credit report... \n", - "4134 I have been sending the creditor what they hav... \n", - "4496 This is my second complaint. Their response to... \n", - "5260 XX/XX/XXXX and XX/XX/XXXX, {$3200.00} contacte... \n", - "5400 Upon checking my XXXX credit report I noticed ... \n", - "5425 Follow up to previous complaint XXXX XXXX XXXX... \n", - "6014 My new XXXX lease was over always paid on time... \n", - "8192 I have no idea where this account cane from. B... \n", - "8240 I TIED TO BUY CAR AT XXXX, THEY GOT APPROVAL F... \n", - "8720 XXXX XXXX XXXX XXXX, NY XXXX XX/XX/XXXX Consum... \n", - "8914 On XX/XX/21 I sent a letter regarding inaccura... \n", - "10021 XX/XX/XXXX and XX/XX/XXXX inaccurate informati... \n", - "10327 When I reviewed my credit report, I discovered... \n", - "10345 U.S. Bank sent two letters containing Visa Deb... \n", - "10369 I requested from XXXX that they reverse the la... \n", + " content \n", + "357 I decided to try XXXX services for my wife and... \n", + "428 XXXX I went to the bank in question ( XXXX XXX... \n", + "1319 I currently have a home loan with my ex husban... \n", + "1993 NOT MY ACCOUNT, NOT AN AUTHORIZED USER {$1800.... \n", + "1997 After a while the payments became harder and h... \n", + "2469 In the course of my student loan, I have been ... \n", + "2624 In accordance with the Fair Credit Reporting A... \n", + "2832 LVNV FUNDING LLC is continually placing a coll... \n", + "3328 On XX/XX/2020 I sent a letter regarding inaccu... \n", + "3650 Over a year and a half ago we started the proc... \n", + "3860 The issue is 26 late payments on me and my wif... \n", + "4464 I purchased as replacement for a lost XXXX XXX... \n", + "4470 in accordance with the Fair Credit Reporting a... \n", + "4567 I have submitted multiple disputes through the... \n", + "4713 While shopping for furniture for my home I ope... \n", + "5181 I had opened a Wells Fargo checking account wi... \n", + "5511 I recently disputed ( see attached letter ) wi... \n", + "5888 XXXX XXXX XXXX XXXX \n", + "I have disputed this acco... \n", + "6299 XXXX ; XXXX and Transunion are reporting ( 30 ... \n", + "7143 My Macys account is due on the first of every ... \n", + "7219 Keep getting letters and calls from collection... \n", + "7574 On XXXX I was on the XXXX app and there was a ... \n", + "8759 Obviously I've been a victim of fraud, therefo... \n", + "9700 The following item have not been properly inve... \n", + "9822 During the housing market crash I went through... \n", "...\n", "\n", "[10000 rows x 4 columns]" @@ -822,19 +812,7 @@ { "data": { "text/html": [ - "Query job c78e1040-2a57-42f6-8fdb-5b9524846259 is DONE. 72.1 MB processed. Open Job" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Query job 0986541b-3941-4387-b813-8888f53d149e is DONE. 0 Bytes processed. Open Job" + "Query job b4594edf-80e5-4476-ac06-b799001f4cb0 is DONE. 72.0 MB processed. Open Job" ], "text/plain": [ "" @@ -846,7 +824,7 @@ { "data": { "text/html": [ - "Query job 754aadd2-fee6-495c-acef-506f4e13c062 is DONE. 72.6 MB processed. Open Job" + "Query job 417e806a-2574-4b1b-8276-a95fa2df56e1 is DONE. 72.5 MB processed. Open Job" ], "text/plain": [ "" @@ -876,187 +854,188 @@ " \n", " \n", " \n", - " text_embedding\n", - " statistics\n", - " ml_embed_text_status\n", + " ml_generate_embedding_result\n", + " ml_generate_embedding_statistics\n", + " ml_generate_embedding_status\n", " content\n", " \n", " \n", " \n", " \n", - " 545\n", - " [ 1.82510037e-02 -1.27867460e-02 -1.57095697e-...\n", - " {\"token_count\":178,\"truncated\":false}\n", + " 357\n", + " [ 1.33585772e-02 -3.76597494e-02 -6.14452176e-...\n", + " {\"token_count\":306,\"truncated\":false}\n", " \n", - " My payments have been approximately {$89.00} w...\n", + " I decided to try XXXX services for my wife and...\n", " \n", " \n", - " 614\n", - " [ 5.40032536e-02 -5.28502129e-02 -5.33268750e-...\n", - " {\"token_count\":399,\"truncated\":false}\n", + " 428\n", + " [ 3.10036819e-04 -3.82593311e-02 -3.41922641e-...\n", + " {\"token_count\":134,\"truncated\":false}\n", " \n", - " Hi, I have contacted Trans Union XXXX XXXX abo...\n", + " XXXX I went to the bank in question ( XXXX XXX...\n", " \n", " \n", - " 1236\n", - " [-5.32836001e-03 -5.84292673e-02 -5.86670786e-...\n", - " {\"token_count\":129,\"truncated\":false}\n", + " 1319\n", + " [ 1.97481886e-02 -1.97448786e-02 -5.13443351e-...\n", + " {\"token_count\":215,\"truncated\":false}\n", " \n", - " I have a XXXX XXXX XXXX credit card on my Exp...\n", + " I currently have a home loan with my ex husban...\n", " \n", " \n", - " 1477\n", - " [ 3.02605387e-02 -4.37121317e-02 -2.70802993e-...\n", - " {\"token_count\":16,\"truncated\":false}\n", + " 1993\n", + " [ 9.83821880e-03 -6.55664057e-02 -5.46210706e-...\n", + " {\"token_count\":536,\"truncated\":false}\n", " \n", - " Wrongs information, selling my information to ...\n", + " NOT MY ACCOUNT, NOT AN AUTHORIZED USER {$1800....\n", " \n", " \n", - " 2261\n", - " [ 2.35723313e-02 -3.73509154e-02 -6.44604117e-...\n", - " {\"token_count\":33,\"truncated\":false}\n", + " 1997\n", + " [ 0.03145148 -0.01011822 -0.02316323 -0.025078...\n", + " {\"token_count\":123,\"truncated\":false}\n", " \n", - " Please investigate and delete disputed item th...\n", + " After a while the payments became harder and h...\n", " \n", " \n", - " 2361\n", - " [ 1.04440488e-02 -9.37070698e-03 -7.36323372e-...\n", - " {\"token_count\":45,\"truncated\":false}\n", + " 2469\n", + " [ 4.74590808e-03 -4.56819348e-02 -2.49751769e-...\n", + " {\"token_count\":60,\"truncated\":false}\n", " \n", - " By the provisions of the Fair Credit Reporting...\n", + " In the course of my student loan, I have been ...\n", " \n", " \n", - " 2378\n", - " [ 3.04989032e-02 -4.08191867e-02 -6.18648790e-...\n", - " {\"token_count\":892,\"truncated\":false}\n", + " 2624\n", + " [ 3.91883589e-03 -3.26644145e-02 -7.10378587e-...\n", + " {\"token_count\":254,\"truncated\":false}\n", " \n", - " Since XX/XX/XXXX I have been trying to dispute...\n", + " In accordance with the Fair Credit Reporting A...\n", " \n", " \n", - " 3133\n", - " [ 0.00152804 -0.04189068 -0.04220504 -0.053740...\n", - " {\"token_count\":90,\"truncated\":false}\n", + " 2832\n", + " [ 8.35181400e-03 -2.91643552e-02 -4.30776961e-...\n", + " {\"token_count\":79,\"truncated\":false}\n", " \n", - " Out of the blue I received a debt collection n...\n", + " LVNV FUNDING LLC is continually placing a coll...\n", " \n", " \n", - " 3140\n", - " [ 3.11435573e-02 -4.44000624e-02 -2.10917685e-...\n", - " {\"token_count\":372,\"truncated\":false}\n", + " 3328\n", + " [ 2.71253809e-02 -1.77491009e-02 -5.32273464e-...\n", + " {\"token_count\":156,\"truncated\":false}\n", " \n", - " My wife and I have been sending money to XXXX ...\n", + " On XX/XX/2020 I sent a letter regarding inaccu...\n", " \n", " \n", - " 3322\n", - " [ 2.75927987e-02 -6.23729872e-03 -3.83295454e-...\n", - " {\"token_count\":36,\"truncated\":false}\n", + " 3650\n", + " [-6.10093866e-03 -5.93599863e-02 -8.04531425e-...\n", + " {\"token_count\":175,\"truncated\":false}\n", " \n", - " Phone calls from Convergent Outsourcing XXXX. ...\n", + " Over a year and a half ago we started the proc...\n", " \n", " \n", - " 3583\n", - " [ 9.20385588e-03 -3.83387171e-02 -6.46291822e-...\n", - " {\"token_count\":52,\"truncated\":false}\n", + " 3860\n", + " [ 5.84836192e-02 -2.43354496e-03 -5.57337068e-...\n", + " {\"token_count\":1267,\"truncated\":false}\n", " \n", - " I recently received a copy of my credit report...\n", + " The issue is 26 late payments on me and my wif...\n", " \n", " \n", - " 4134\n", - " [-7.04960374e-04 -3.52595337e-02 -1.65264793e-...\n", - " {\"token_count\":412,\"truncated\":false}\n", + " 4464\n", + " [ 6.05084226e-02 -3.21578234e-02 -7.51668587e-...\n", + " {\"token_count\":906,\"truncated\":false}\n", " \n", - " I have been sending the creditor what they hav...\n", + " I purchased as replacement for a lost XXXX XXX...\n", " \n", " \n", - " 4496\n", - " [ 3.67735326e-02 1.21120387e-03 -5.20942472e-...\n", - " {\"token_count\":182,\"truncated\":false}\n", + " 4470\n", + " [ 1.28689921e-02 -3.25881056e-02 -6.53645024e-...\n", + " {\"token_count\":200,\"truncated\":false}\n", " \n", - " This is my second complaint. Their response to...\n", + " in accordance with the Fair Credit Reporting a...\n", " \n", " \n", - " 5260\n", - " [ 2.07133405e-02 -1.69602726e-02 -5.07124476e-...\n", - " {\"token_count\":103,\"truncated\":false}\n", + " 4567\n", + " [-5.49167022e-03 -3.84587422e-02 -8.56091827e-...\n", + " {\"token_count\":110,\"truncated\":false}\n", " \n", - " XX/XX/XXXX and XX/XX/XXXX, {$3200.00} contacte...\n", + " I have submitted multiple disputes through the...\n", " \n", " \n", - " 5400\n", - " [ 1.44114876e-02 -2.34710164e-02 -6.58538565e-...\n", - " {\"token_count\":60,\"truncated\":false}\n", + " 4713\n", + " [ 2.68485844e-02 -3.46762352e-02 -4.59849052e-...\n", + " {\"token_count\":549,\"truncated\":false}\n", " \n", - " Upon checking my XXXX credit report I noticed ...\n", + " While shopping for furniture for my home I ope...\n", " \n", " \n", - " 5425\n", - " [ 3.10326386e-02 -2.19427086e-02 -6.56386837e-...\n", - " {\"token_count\":87,\"truncated\":false}\n", + " 5181\n", + " [ 2.05754172e-02 -3.83999050e-02 -9.29225236e-...\n", + " {\"token_count\":77,\"truncated\":false}\n", " \n", - " Follow up to previous complaint XXXX XXXX XXXX...\n", + " I had opened a Wells Fargo checking account wi...\n", " \n", " \n", - " 6014\n", - " [ 1.90773793e-02 -2.27493346e-02 -3.27166244e-...\n", - " {\"token_count\":175,\"truncated\":false}\n", + " 5511\n", + " [-0.00217485 -0.04031368 -0.06604777 -0.052006...\n", + " {\"token_count\":262,\"truncated\":false}\n", " \n", - " My new XXXX lease was over always paid on time...\n", + " I recently disputed ( see attached letter ) wi...\n", " \n", " \n", - " 8192\n", - " [ 0.01937891 -0.05466933 -0.06070872 -0.059028...\n", - " {\"token_count\":131,\"truncated\":false}\n", + " 5888\n", + " [-8.15972779e-03 -3.46563384e-02 -5.91776446e-...\n", + " {\"token_count\":176,\"truncated\":false}\n", " \n", - " I have no idea where this account cane from. B...\n", + " XXXX XXXX XXXX XXXX \n", + "I have disputed this acco...\n", " \n", " \n", - " 8240\n", - " [ 4.34123818e-03 -3.40953320e-02 -4.06381376e-...\n", - " {\"token_count\":87,\"truncated\":false}\n", + " 6299\n", + " [ 4.80043218e-02 -4.13420722e-02 -6.12363108e-...\n", + " {\"token_count\":151,\"truncated\":false}\n", " \n", - " I TIED TO BUY CAR AT XXXX, THEY GOT APPROVAL F...\n", + " XXXX ; XXXX and Transunion are reporting ( 30 ...\n", " \n", " \n", - " 8720\n", - " [ 0.03133732 -0.03972461 -0.00178199 -0.035876...\n", - " {\"token_count\":645,\"truncated\":false}\n", + " 7143\n", + " [ 4.39200476e-02 -3.04005221e-02 -3.47866341e-...\n", + " {\"token_count\":234,\"truncated\":false}\n", " \n", - " XXXX XXXX XXXX XXXX, NY XXXX XX/XX/XXXX Consum...\n", + " My Macys account is due on the first of every ...\n", " \n", " \n", - " 8914\n", - " [ 1.75969116e-02 -2.25022305e-02 -5.70390299e-...\n", - " {\"token_count\":180,\"truncated\":false}\n", + " 7219\n", + " [ 1.00224940e-02 -3.79302073e-03 -3.41785327e-...\n", + " {\"token_count\":26,\"truncated\":false}\n", " \n", - " On XX/XX/21 I sent a letter regarding inaccura...\n", + " Keep getting letters and calls from collection...\n", " \n", " \n", - " 10021\n", - " [ 5.02460636e-02 -5.25112189e-02 -4.12914790e-...\n", - " {\"token_count\":30,\"truncated\":false}\n", + " 7574\n", + " [-0.00149564 -0.06619431 -0.05084481 -0.048579...\n", + " {\"token_count\":129,\"truncated\":false}\n", " \n", - " XX/XX/XXXX and XX/XX/XXXX inaccurate informati...\n", + " On XXXX I was on the XXXX app and there was a ...\n", " \n", " \n", - " 10327\n", - " [-0.00979626 -0.04912931 -0.08654705 -0.021063...\n", - " {\"token_count\":194,\"truncated\":false}\n", + " 8759\n", + " [ 0.01501553 -0.03575936 -0.050562 -0.034884...\n", + " {\"token_count\":501,\"truncated\":false}\n", " \n", - " When I reviewed my credit report, I discovered...\n", + " Obviously I've been a victim of fraud, therefo...\n", " \n", " \n", - " 10345\n", - " [-0.04292191 -0.02636929 -0.06177032 -0.076520...\n", - " {\"token_count\":262,\"truncated\":false}\n", + " 9700\n", + " [ 1.01501048e-02 -2.80565154e-02 -4.05892394e-...\n", + " {\"token_count\":48,\"truncated\":false}\n", " \n", - " U.S. Bank sent two letters containing Visa Deb...\n", + " The following item have not been properly inve...\n", " \n", " \n", - " 10369\n", - " [ 2.16020197e-02 -5.62509745e-02 -5.93873672e-...\n", - " {\"token_count\":77,\"truncated\":false}\n", + " 9822\n", + " [ 2.95880195e-02 1.65440738e-02 -3.33247967e-...\n", + " {\"token_count\":2373,\"truncated\":true}\n", " \n", - " I requested from XXXX that they reverse the la...\n", + " During the housing market crash I went through...\n", " \n", " \n", "\n", @@ -1064,86 +1043,87 @@ "[10000 rows x 4 columns in total]" ], "text/plain": [ - " text_embedding \\\n", - "545 [ 1.82510037e-02 -1.27867460e-02 -1.57095697e-... \n", - "614 [ 5.40032536e-02 -5.28502129e-02 -5.33268750e-... \n", - "1236 [-5.32836001e-03 -5.84292673e-02 -5.86670786e-... \n", - "1477 [ 3.02605387e-02 -4.37121317e-02 -2.70802993e-... \n", - "2261 [ 2.35723313e-02 -3.73509154e-02 -6.44604117e-... \n", - "2361 [ 1.04440488e-02 -9.37070698e-03 -7.36323372e-... \n", - "2378 [ 3.04989032e-02 -4.08191867e-02 -6.18648790e-... \n", - "3133 [ 0.00152804 -0.04189068 -0.04220504 -0.053740... \n", - "3140 [ 3.11435573e-02 -4.44000624e-02 -2.10917685e-... \n", - "3322 [ 2.75927987e-02 -6.23729872e-03 -3.83295454e-... \n", - "3583 [ 9.20385588e-03 -3.83387171e-02 -6.46291822e-... \n", - "4134 [-7.04960374e-04 -3.52595337e-02 -1.65264793e-... \n", - "4496 [ 3.67735326e-02 1.21120387e-03 -5.20942472e-... \n", - "5260 [ 2.07133405e-02 -1.69602726e-02 -5.07124476e-... \n", - "5400 [ 1.44114876e-02 -2.34710164e-02 -6.58538565e-... \n", - "5425 [ 3.10326386e-02 -2.19427086e-02 -6.56386837e-... \n", - "6014 [ 1.90773793e-02 -2.27493346e-02 -3.27166244e-... \n", - "8192 [ 0.01937891 -0.05466933 -0.06070872 -0.059028... \n", - "8240 [ 4.34123818e-03 -3.40953320e-02 -4.06381376e-... \n", - "8720 [ 0.03133732 -0.03972461 -0.00178199 -0.035876... \n", - "8914 [ 1.75969116e-02 -2.25022305e-02 -5.70390299e-... \n", - "10021 [ 5.02460636e-02 -5.25112189e-02 -4.12914790e-... \n", - "10327 [-0.00979626 -0.04912931 -0.08654705 -0.021063... \n", - "10345 [-0.04292191 -0.02636929 -0.06177032 -0.076520... \n", - "10369 [ 2.16020197e-02 -5.62509745e-02 -5.93873672e-... \n", + " ml_generate_embedding_result \\\n", + "357 [ 1.33585772e-02 -3.76597494e-02 -6.14452176e-... \n", + "428 [ 3.10036819e-04 -3.82593311e-02 -3.41922641e-... \n", + "1319 [ 1.97481886e-02 -1.97448786e-02 -5.13443351e-... \n", + "1993 [ 9.83821880e-03 -6.55664057e-02 -5.46210706e-... \n", + "1997 [ 0.03145148 -0.01011822 -0.02316323 -0.025078... \n", + "2469 [ 4.74590808e-03 -4.56819348e-02 -2.49751769e-... \n", + "2624 [ 3.91883589e-03 -3.26644145e-02 -7.10378587e-... \n", + "2832 [ 8.35181400e-03 -2.91643552e-02 -4.30776961e-... \n", + "3328 [ 2.71253809e-02 -1.77491009e-02 -5.32273464e-... \n", + "3650 [-6.10093866e-03 -5.93599863e-02 -8.04531425e-... \n", + "3860 [ 5.84836192e-02 -2.43354496e-03 -5.57337068e-... \n", + "4464 [ 6.05084226e-02 -3.21578234e-02 -7.51668587e-... \n", + "4470 [ 1.28689921e-02 -3.25881056e-02 -6.53645024e-... \n", + "4567 [-5.49167022e-03 -3.84587422e-02 -8.56091827e-... \n", + "4713 [ 2.68485844e-02 -3.46762352e-02 -4.59849052e-... \n", + "5181 [ 2.05754172e-02 -3.83999050e-02 -9.29225236e-... \n", + "5511 [-0.00217485 -0.04031368 -0.06604777 -0.052006... \n", + "5888 [-8.15972779e-03 -3.46563384e-02 -5.91776446e-... \n", + "6299 [ 4.80043218e-02 -4.13420722e-02 -6.12363108e-... \n", + "7143 [ 4.39200476e-02 -3.04005221e-02 -3.47866341e-... \n", + "7219 [ 1.00224940e-02 -3.79302073e-03 -3.41785327e-... \n", + "7574 [-0.00149564 -0.06619431 -0.05084481 -0.048579... \n", + "8759 [ 0.01501553 -0.03575936 -0.050562 -0.034884... \n", + "9700 [ 1.01501048e-02 -2.80565154e-02 -4.05892394e-... \n", + "9822 [ 2.95880195e-02 1.65440738e-02 -3.33247967e-... \n", "\n", - " statistics ml_embed_text_status \\\n", - "545 {\"token_count\":178,\"truncated\":false} \n", - "614 {\"token_count\":399,\"truncated\":false} \n", - "1236 {\"token_count\":129,\"truncated\":false} \n", - "1477 {\"token_count\":16,\"truncated\":false} \n", - "2261 {\"token_count\":33,\"truncated\":false} \n", - "2361 {\"token_count\":45,\"truncated\":false} \n", - "2378 {\"token_count\":892,\"truncated\":false} \n", - "3133 {\"token_count\":90,\"truncated\":false} \n", - "3140 {\"token_count\":372,\"truncated\":false} \n", - "3322 {\"token_count\":36,\"truncated\":false} \n", - "3583 {\"token_count\":52,\"truncated\":false} \n", - "4134 {\"token_count\":412,\"truncated\":false} \n", - "4496 {\"token_count\":182,\"truncated\":false} \n", - "5260 {\"token_count\":103,\"truncated\":false} \n", - "5400 {\"token_count\":60,\"truncated\":false} \n", - "5425 {\"token_count\":87,\"truncated\":false} \n", - "6014 {\"token_count\":175,\"truncated\":false} \n", - "8192 {\"token_count\":131,\"truncated\":false} \n", - "8240 {\"token_count\":87,\"truncated\":false} \n", - "8720 {\"token_count\":645,\"truncated\":false} \n", - "8914 {\"token_count\":180,\"truncated\":false} \n", - "10021 {\"token_count\":30,\"truncated\":false} \n", - "10327 {\"token_count\":194,\"truncated\":false} \n", - "10345 {\"token_count\":262,\"truncated\":false} \n", - "10369 {\"token_count\":77,\"truncated\":false} \n", + " ml_generate_embedding_statistics ml_generate_embedding_status \\\n", + "357 {\"token_count\":306,\"truncated\":false} \n", + "428 {\"token_count\":134,\"truncated\":false} \n", + "1319 {\"token_count\":215,\"truncated\":false} \n", + "1993 {\"token_count\":536,\"truncated\":false} \n", + "1997 {\"token_count\":123,\"truncated\":false} \n", + "2469 {\"token_count\":60,\"truncated\":false} \n", + "2624 {\"token_count\":254,\"truncated\":false} \n", + "2832 {\"token_count\":79,\"truncated\":false} \n", + "3328 {\"token_count\":156,\"truncated\":false} \n", + "3650 {\"token_count\":175,\"truncated\":false} \n", + "3860 {\"token_count\":1267,\"truncated\":false} \n", + "4464 {\"token_count\":906,\"truncated\":false} \n", + "4470 {\"token_count\":200,\"truncated\":false} \n", + "4567 {\"token_count\":110,\"truncated\":false} \n", + "4713 {\"token_count\":549,\"truncated\":false} \n", + "5181 {\"token_count\":77,\"truncated\":false} \n", + "5511 {\"token_count\":262,\"truncated\":false} \n", + "5888 {\"token_count\":176,\"truncated\":false} \n", + "6299 {\"token_count\":151,\"truncated\":false} \n", + "7143 {\"token_count\":234,\"truncated\":false} \n", + "7219 {\"token_count\":26,\"truncated\":false} \n", + "7574 {\"token_count\":129,\"truncated\":false} \n", + "8759 {\"token_count\":501,\"truncated\":false} \n", + "9700 {\"token_count\":48,\"truncated\":false} \n", + "9822 {\"token_count\":2373,\"truncated\":true} \n", "\n", - " content \n", - "545 My payments have been approximately {$89.00} w... \n", - "614 Hi, I have contacted Trans Union XXXX XXXX abo... \n", - "1236 I have a XXXX XXXX XXXX credit card on my Exp... \n", - "1477 Wrongs information, selling my information to ... \n", - "2261 Please investigate and delete disputed item th... \n", - "2361 By the provisions of the Fair Credit Reporting... \n", - "2378 Since XX/XX/XXXX I have been trying to dispute... \n", - "3133 Out of the blue I received a debt collection n... \n", - "3140 My wife and I have been sending money to XXXX ... \n", - "3322 Phone calls from Convergent Outsourcing XXXX. ... \n", - "3583 I recently received a copy of my credit report... \n", - "4134 I have been sending the creditor what they hav... \n", - "4496 This is my second complaint. Their response to... \n", - "5260 XX/XX/XXXX and XX/XX/XXXX, {$3200.00} contacte... \n", - "5400 Upon checking my XXXX credit report I noticed ... \n", - "5425 Follow up to previous complaint XXXX XXXX XXXX... \n", - "6014 My new XXXX lease was over always paid on time... \n", - "8192 I have no idea where this account cane from. B... \n", - "8240 I TIED TO BUY CAR AT XXXX, THEY GOT APPROVAL F... \n", - "8720 XXXX XXXX XXXX XXXX, NY XXXX XX/XX/XXXX Consum... \n", - "8914 On XX/XX/21 I sent a letter regarding inaccura... \n", - "10021 XX/XX/XXXX and XX/XX/XXXX inaccurate informati... \n", - "10327 When I reviewed my credit report, I discovered... \n", - "10345 U.S. Bank sent two letters containing Visa Deb... \n", - "10369 I requested from XXXX that they reverse the la... \n", + " content \n", + "357 I decided to try XXXX services for my wife and... \n", + "428 XXXX I went to the bank in question ( XXXX XXX... \n", + "1319 I currently have a home loan with my ex husban... \n", + "1993 NOT MY ACCOUNT, NOT AN AUTHORIZED USER {$1800.... \n", + "1997 After a while the payments became harder and h... \n", + "2469 In the course of my student loan, I have been ... \n", + "2624 In accordance with the Fair Credit Reporting A... \n", + "2832 LVNV FUNDING LLC is continually placing a coll... \n", + "3328 On XX/XX/2020 I sent a letter regarding inaccu... \n", + "3650 Over a year and a half ago we started the proc... \n", + "3860 The issue is 26 late payments on me and my wif... \n", + "4464 I purchased as replacement for a lost XXXX XXX... \n", + "4470 in accordance with the Fair Credit Reporting a... \n", + "4567 I have submitted multiple disputes through the... \n", + "4713 While shopping for furniture for my home I ope... \n", + "5181 I had opened a Wells Fargo checking account wi... \n", + "5511 I recently disputed ( see attached letter ) wi... \n", + "5888 XXXX XXXX XXXX XXXX \n", + "I have disputed this acco... \n", + "6299 XXXX ; XXXX and Transunion are reporting ( 30 ... \n", + "7143 My Macys account is due on the first of every ... \n", + "7219 Keep getting letters and calls from collection... \n", + "7574 On XXXX I was on the XXXX app and there was a ... \n", + "8759 Obviously I've been a victim of fraud, therefo... \n", + "9700 The following item have not been properly inve... \n", + "9822 During the housing market crash I went through... \n", "...\n", "\n", "[10000 rows x 4 columns]" @@ -1156,10 +1136,10 @@ ], "source": [ "successful_rows = (\n", - " (predicted_embeddings[\"ml_embed_text_status\"] == \"\")\n", + " (predicted_embeddings[\"ml_generate_embedding_status\"] == \"\")\n", " # Series.str.len() gives the length of an array.\n", " # See: https://stackoverflow.com/a/41340543/101923\n", - " & (predicted_embeddings[\"text_embedding\"].str.len() != 0)\n", + " & (predicted_embeddings[\"ml_generate_embedding_result\"].str.len() != 0)\n", ")\n", "predicted_embeddings = predicted_embeddings[successful_rows]\n", "predicted_embeddings\n" @@ -1214,7 +1194,7 @@ { "data": { "text/html": [ - "Query job fa4bbc13-3831-4c80-9b59-9939e605ed58 is DONE. 61.7 MB processed. Open Job" + "Query job 18aa46ee-0b10-4912-ae14-87b7e81ee447 is DONE. 61.7 MB processed. Open Job" ], "text/plain": [ "" @@ -1226,7 +1206,7 @@ { "data": { "text/html": [ - "Query job d2d681aa-e49a-4fda-89fd-60cf906d3aec is DONE. 0 Bytes processed. Open Job" + "Query job fd573f97-2424-472a-969d-463f184967d9 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1238,7 +1218,7 @@ { "data": { "text/html": [ - "Query job 234bb6be-625c-4c96-baea-c37c33410114 is DONE. 72.7 MB processed. Open Job" + "Query job 9f2e0a3f-d7d6-4fb8-b558-95f39235410d is DONE. 72.7 MB processed. Open Job" ], "text/plain": [ "" @@ -1250,7 +1230,7 @@ { "data": { "text/html": [ - "Query job 285817cb-99d3-426f-82c3-89d36119e8db is DONE. 80.0 kB processed. Open Job" + "Query job 786ababe-7c40-426f-bb39-154329e4c51a is DONE. 80.0 kB processed. Open Job" ], "text/plain": [ "" @@ -1262,7 +1242,7 @@ { "data": { "text/html": [ - "Query job 3a39d2b0-55a1-4922-972a-8806b387f877 is DONE. 73.3 MB processed. Open Job" + "Query job a191fc97-baa6-4c7c-b78f-4365678caa60 is DONE. 73.2 MB processed. Open Job" ], "text/plain": [ "" @@ -1294,57 +1274,57 @@ " \n", " CENTROID_ID\n", " NEAREST_CENTROIDS_DISTANCE\n", - " text_embedding\n", - " statistics\n", - " ml_embed_text_status\n", + " ml_generate_embedding_result\n", + " ml_generate_embedding_statistics\n", + " ml_generate_embedding_status\n", " content\n", " \n", " \n", " \n", " \n", - " 182250\n", + " 1244571\n", " 1\n", - " [{'CENTROID_ID': 1, 'DISTANCE': 0.570560301900...\n", - " [ 4.70298417e-02 -4.08669300e-02 -2.99868709e-...\n", - " {\"token_count\":10,\"truncated\":false}\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.414497263076...\n", + " [ 1.10590272e-02 -2.11433582e-02 -5.66212423e-...\n", + " {\"token_count\":100,\"truncated\":false}\n", " \n", - " These are not my accounts. Please remove them.\n", + " Ive disputed two Bankruptcies that still exist...\n", " \n", " \n", - " 3023485\n", + " 744390\n", " 1\n", - " [{'CENTROID_ID': 1, 'DISTANCE': 0.523572693768...\n", - " [ 1.55437263e-02 -1.93240177e-02 -2.48466972e-...\n", - " {\"token_count\":10,\"truncated\":false}\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.416584344032...\n", + " [ 4.15011719e-02 -4.50705849e-02 -7.35541508e-...\n", + " {\"token_count\":100,\"truncated\":false}\n", " \n", - " This debt is not mine due to identity theft.\n", + " The XXXX account was settled as a class action...\n", " \n", " \n", - " 407254\n", + " 127514\n", " 1\n", - " [{'CENTROID_ID': 1, 'DISTANCE': 0.515173566816...\n", - " [-0.01293471 -0.01959546 -0.02238463 -0.066214...\n", - " {\"token_count\":10,\"truncated\":false}\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.541137734253...\n", + " [ 3.54415141e-02 1.23769706e-02 -2.61783414e-...\n", + " {\"token_count\":100,\"truncated\":false}\n", " \n", - " I do not owe this company money!!!!!\n", + " I have late payments reported on my student lo...\n", " \n", " \n", - " 1509454\n", + " 630563\n", " 1\n", - " [{'CENTROID_ID': 1, 'DISTANCE': 0.645342721754...\n", - " [ 3.21860723e-02 -2.67103072e-02 -4.78175096e-...\n", - " {\"token_count\":10,\"truncated\":false}\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.477175150810...\n", + " [ 2.34235693e-02 -4.21241224e-02 -3.90484147e-...\n", + " {\"token_count\":100,\"truncated\":false}\n", " \n", - " VIOLATES HIPPA AND CRA\n", + " A Military Star Credit card, aka Take it Home ...\n", " \n", " \n", - " 2357848\n", + " 2651231\n", " 1\n", - " [{'CENTROID_ID': 1, 'DISTANCE': 0.519872186251...\n", - " [-1.88122243e-02 -2.68064123e-02 -4.69480827e-...\n", - " {\"token_count\":10,\"truncated\":false}\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.489760037964...\n", + " [ 2.64898203e-02 -5.62610961e-02 -5.82714193e-...\n", + " {\"token_count\":101,\"truncated\":false}\n", " \n", - " Receive numerous phone calls. I have no debt.\n", + " My mortgage is with Bank of America. I filed C...\n", " \n", " \n", "\n", @@ -1352,32 +1332,32 @@ ], "text/plain": [ " CENTROID_ID NEAREST_CENTROIDS_DISTANCE \\\n", - "182250 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.570560301900... \n", - "3023485 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.523572693768... \n", - "407254 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.515173566816... \n", - "1509454 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.645342721754... \n", - "2357848 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.519872186251... \n", + "1244571 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.414497263076... \n", + "744390 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.416584344032... \n", + "127514 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.541137734253... \n", + "630563 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.477175150810... \n", + "2651231 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.489760037964... \n", "\n", - " text_embedding \\\n", - "182250 [ 4.70298417e-02 -4.08669300e-02 -2.99868709e-... \n", - "3023485 [ 1.55437263e-02 -1.93240177e-02 -2.48466972e-... \n", - "407254 [-0.01293471 -0.01959546 -0.02238463 -0.066214... \n", - "1509454 [ 3.21860723e-02 -2.67103072e-02 -4.78175096e-... \n", - "2357848 [-1.88122243e-02 -2.68064123e-02 -4.69480827e-... \n", + " ml_generate_embedding_result \\\n", + "1244571 [ 1.10590272e-02 -2.11433582e-02 -5.66212423e-... \n", + "744390 [ 4.15011719e-02 -4.50705849e-02 -7.35541508e-... \n", + "127514 [ 3.54415141e-02 1.23769706e-02 -2.61783414e-... \n", + "630563 [ 2.34235693e-02 -4.21241224e-02 -3.90484147e-... \n", + "2651231 [ 2.64898203e-02 -5.62610961e-02 -5.82714193e-... \n", "\n", - " statistics ml_embed_text_status \\\n", - "182250 {\"token_count\":10,\"truncated\":false} \n", - "3023485 {\"token_count\":10,\"truncated\":false} \n", - "407254 {\"token_count\":10,\"truncated\":false} \n", - "1509454 {\"token_count\":10,\"truncated\":false} \n", - "2357848 {\"token_count\":10,\"truncated\":false} \n", + " ml_generate_embedding_statistics ml_generate_embedding_status \\\n", + "1244571 {\"token_count\":100,\"truncated\":false} \n", + "744390 {\"token_count\":100,\"truncated\":false} \n", + "127514 {\"token_count\":100,\"truncated\":false} \n", + "630563 {\"token_count\":100,\"truncated\":false} \n", + "2651231 {\"token_count\":101,\"truncated\":false} \n", "\n", - " content \n", - "182250 These are not my accounts. Please remove them. \n", - "3023485 This debt is not mine due to identity theft. \n", - "407254 I do not owe this company money!!!!! \n", - "1509454 VIOLATES HIPPA AND CRA \n", - "2357848 Receive numerous phone calls. I have no debt. " + " content \n", + "1244571 Ive disputed two Bankruptcies that still exist... \n", + "744390 The XXXX account was settled as a class action... \n", + "127514 I have late payments reported on my student lo... \n", + "630563 A Military Star Credit card, aka Take it Home ... \n", + "2651231 My mortgage is with Bank of America. I filed C... " ] }, "execution_count": 13, @@ -1387,7 +1367,7 @@ ], "source": [ "# Use KMeans clustering to calculate our groups. Will take ~3 minutes.\n", - "cluster_model.fit(predicted_embeddings[[\"text_embedding\"]])\n", + "cluster_model.fit(predicted_embeddings[[\"ml_generate_embedding_result\"]])\n", "clustered_result = cluster_model.predict(predicted_embeddings)\n", "# Notice the CENTROID_ID column, which is the ID number of the group that\n", "# each complaint belongs to.\n", @@ -1430,7 +1410,7 @@ { "data": { "text/html": [ - "Query job 85ead687-4ba9-44bf-88da-23a066f45960 is DONE. 10.7 MB processed. Open Job" + "Query job 8bfc647f-b9e5-40a2-816c-d12e8f81bea3 is DONE. 10.6 MB processed. Open Job" ], "text/plain": [ "" @@ -1442,7 +1422,7 @@ { "data": { "text/html": [ - "Query job 68ef20cd-220d-40a9-bb42-63ed3d6f5d3f is DONE. 10.7 MB processed. Open Job" + "Query job 6f834214-9cc3-4577-bb2d-980ba05df817 is DONE. 10.6 MB processed. Open Job" ], "text/plain": [ "" @@ -1478,42 +1458,62 @@ "output_type": "stream", "text": [ "comment list 1:\n", - "1. Wrongs information, selling my information to third party. Incorrect reporting\n", - "2. I TIED TO BUY CAR AT XXXX, THEY GOT APPROVAL FROM XXXX XXXX XXXX XXXX WHICH ENDED A A LIE. THEY ALSO, PULLED MANY OTHERS I DID NT AGREED TO. SOLF PULLS ONLY\n", - "3. XX/XX/XXXX and XX/XX/XXXX inaccurate information reported 30 days late.\n", - "4. Im working on myCredit and I see a couple of inquiries that I have no idea where they came from.\n", - "5. I request a copy of all dispute results and documentary evidence from XXXX, and XXXX mailed to me\n", + "1. I currently have a home loan with my ex husband with PHH Mortgages. We filed for divorce and in the divorce decree he became liable for the home and paying the payments. He ended up missing XXXX payments which effected my credit fairly substaintailly. when I became aware of the late payments, I ensured that the account was up to date and have since. I presented to them that I have the legal documents that he is obligated to make the payments each month and that I am not responisble for the payment. I asked them to remove the XXXX dings on my credit and they would not. I offered to present the paperwork and they still would not. The home is now being sold. I even filed with XXXX as a discrepency and they would not remove it. I would have never let these become a late payment. I was not even notified as they had all of his information in the file.\n", + "2. In the course of my student loan, I have been making payments and I feel that the payments haven't been added to the debt, the company stated that I am delinquent over 180 and my payments are auto pay. This has had a negative impact on my credit score.\n", + "3. The issue is 26 late payments on me and my wife 's credit reports due to a system error on a joint mortgage account that was always paid on time using autopay. ( will attach docs to support this ). \n", "\n", - "comment list 2:\n", - "1. My wife and I have been sending money to XXXX via my brother-in-law to finish a building project we have been working on since XXXX with target date of completion by XX/XX/XXXX. In XXXX XXXX my brother-in-law in was contacted by his bank to confirm he was not defrauding my wife. My brother-in-law confirmed he was helping to handle the building project by organizing and paying the workers. In XXXX XXXX Bank of America reach out to my wife to update her profile to avoid account restrictions. My wife 's account was eventually restricted until she called and confirmed her employment and other personal information. My wife 's full account activities were then restored and we continued sending wire transfers to XXXX via her checking account. \n", - "Then I received a letter dated XXXX XXXX XXXX from Bank of America stating the money market account I share with my wife which has been opened since XXXX will be will be restricted from use in 21 days and closed in 30 days with no reason. I strongly believe this is a result of the legal international wires because there was no reason to close the Savings account which had with hardly any activity. \n", - "I agree that Bank of America has a right to close accounts but I do not agree with Bank of America closing accounts because of international transactions unless they can prove fraud, criminal activity or support for terrorism, this is discriminatory towards foreign nationals. How are foreign nationals suppose to make investments or support their family/community if they are excluded from the banking system?\n", - "2. XXXX XXXX XXXX XXXX, NY XXXX XX/XX/XXXX Consumer Financial Protection Bureau XXXX XXXX XXXX XXXX, IA XXXX Dear Sir or Madam : In XX/XX/XXXX Out of the blue JP Morgan Chase arbitrarily closed my account. This was after my mother is a XXXX survivor who is over XXXX years old and for whom I have a general power of attorney and take care of her bill paying was questioned about a transaction. She is also XXXX XXXX. \n", + "This is an ongoing nightmare me and my wife are going through over the past 3 years. \n", + "Sent many dispute letters to the creditor and to the 3 bureaus, was promised multiple times that all late payments will be removed, we also has a letter from the bank stating we were never late on this account, also have a recording of a phone call with bank 's permission were the representative admits there was a system error and promised again that all late payments will be deleted from both of our credit reports. \n", + "As of today, for an unknown reason XXXX reports 6x30 days late payments, XXXX reports 24 lates, and Transunion 23 lates. \n", + "\n", + "We have always paid our mortgage on time for many years, enrolled in autopay and making 2 payments per month. Our mortgage is currently with XXXX XXXX, XXXX XXXX is the mortgage servicer who's collecting from us and disbursing payments to XXXX XXXX. \n", + "\n", + "I will attach here our mortgage transaction history confirming payments have been made on time, letter from the mortgage servicer XXXX XXXX XXXX XXXX confirming we were never late, copy of a page from my credit report showing all the late payments, and a few bank statements showing payment made on time while showing as late on our credit reports. ( XXXX & XXXX XXXX ) PLEASE HELP us to resolve this issue and have all late payments on this account removed from XXXX & XXXX XXXX. \n", + "\n", + "Her is a small portion of our previous communication with XXXX and their response : XX/XX/XXXX : we spoke with XXXX, XXXX stated that the funds were misappropriated and went to the wrong account. Said he'll contact their Tax Dep ' and get back to me and never did. \n", "\n", - "I have reason to belief that a mentally disturbed family member for whom I have an order of protection initiated this situation. This individual has ben harassing me and other members of my family for a considerable amount of time. \n", + "XX/XX/XXXX : I spoke in length with XXXX from XXXX XXXX & XXXX from XXXX at XXXX who opened an investigation. Their supervisor said he made all the necessary changes. The next month, the money was withdrawn from my account on time and i received again a late fee and 30 days late on my credit report. \n", "\n", - "The bank initially was satisfied with her response. However within 2 days they closed the account of a XXXX year old XXXX XXXX person. \n", + "XX/XX/XXXX Spoke to XXXX who sent me to XXXX who sent me to XXXX XXXX from Escalation department, she promised the issue was fixed and late payments will be removed in up to 30 days and she will email me a deletion letter. Nothing was sent! and i called and wrote 5 emails to her and never got a response. \n", "\n", - "Soon after for no reason my account was closed as well. I tried to reach out to the corporate offices of Chase and make great effort to find out what happened and to restore my account as well as my mothers but I was unsuccessful. In addition the people I spoke to were not only unhelpful bu exceedingly rude. \n", + "XX/XX/XXXX spoke with XXXX to follow up with XXXX XXXX, no response. \n", "\n", - "I should add that I have had an account with Chase since XX/XX/XXXX and took care of my ailing father before he passed away for over 25 years as well. I am now taking care of my mother for over 28 years. \n", + "XX/XX/XXXX spoke with XXXX at XXXX, she said that the transaction history is our proof that issue was corrected and she'll submit a request to delete the late payments as the system does not show that previous request was made. \n", "\n", - "I went so far as contacting a prominent Television reporter who was interested in doing a report on what happened. \n", + "XX/XX/XXXX XXXX sent a letter stating that we had a shortage of {$5300.00}. Again, upon checking my bank account all monthly payments were made on time. To avoid further issues i sent a check for {$5300.00} on XX/XX/XXXX. \n", "\n", - "I have since managed to open an account at another bank but this week I had reason to go to a branch of Chase regarding another issue and a manager using my That is a very serious unsubstantiated accusation and given this information I have no choice but to submit this complaint. \n", + "XX/XX/XXXX following many joint calls with XXXX XXXX i received a letter from XXXX stating that my credit might have been affected due to processing error and that they sent XXXX XXXX a letter requesting a removal of all late payments. \n", "\n", - "I have no interest in having an account again at a disreputable bank like Chase but I can not and will not accept or tolerate a derogatory accusation be associated with my name. \n", + "XX/XX/XXXX spoke with XXXX XXXX again.. \n", + "XX/XX/XXXX spoke with XXXX at XXXX I have many more... \n", "\n", - "I hope that my complaint will hAve the desired effect of removing this derogatory unsubstantiated accusation be removed from my name. However. I will not let this unfair matter stand and Chase ought to know that I have already retained an attorney and will if necessary hold Chase responsible and liable all damage i have incurred now And in the future Enclosed, please find the letter from Chase stating that they were closing my mothers account and a similar letter was received by me too. \n", + "*** XX/XX/XXXX : SPOKE WITH XXXX XXXX ( resolution team ) at XXXX. She said they will delete the late payments from both reports ( XXXX & XXXX XXXX and will call me to follow up on XX/XX/XXXX. She also emailed me the payment activity on the account. XXXX I received the payment history but NO CALL OR RESOLUTION. \n", "\n", - "Also please find a letter from her Doctor stating that she is XXXX XXXX. \n", + "XX/XX/XXXX : SPOKE WITH XXXX FROM XXXX AND XXXX FROM XXXX XXXX ON A XXXX WAY CALL at XXXX, XXXX PERMISSION TO RECORD THE CALL, SHE AGREED, we went over all the late payments, she said she sees the error and promised that this time it will be resolved and get deleted from our credit reports. Again, nothing was resolved and we never heard back from anyone.\n", + "4. XXXX ; XXXX and Transunion are reporting ( 30 ) plus days late on the XXXX XXXX partial account number XXXX. ( Please see page 3 of the attached credit report. ) This account was paid in XXXX, 2019 and the lates are reporting in XXXX, 2019. Please keep in mind that it is impossible to have late payments on an account that was paid off a month prior. This incorrect reporting is harming my credit score and this line item need to be removed from my credit report. I have contacted the ( 3 ) bureaus to fix this, however I have been unsuccessful.\n", + "5. My Macys account is due on the first of every month. Since I have had the card I have paid on the XXXX PRIOR to the due date. And have paid over the amount due. In XXXX my XXXX XXXX auto pay did not come out of my account and rather than calling me - on the XXXX of XXXX just 5 days late they cut my credit off and shut me out of my account so I can not even see my credit profile - I have made the payment and they still are locking me out - please look into this - you will see that is what happened and they are stating in a letter it is becasue my XXXX report shows a seriuos derogorty item which it does not and I have submitted a complaint with them as well. Macys has been the worst credit experience of my LIFE and I did read the reviews but thought it would be different for me I guess? \n", + "thank you for your help.\n", "\n", - "Thank you. \n", + "comment list 2:\n", + "1. I decided to try XXXX services for my wife and I so I purchased phones for both of us. After a day or two of trial we felt unsatisfied so we headed back to the store and we returned all items. We got charged with restocking fees and taxes. Later on I got a bill in the mail in which I was being charged {$1200.00} for the returned items. After several attempts of arguing for about XXXX months about whether I owed XXXX or not I was dismissed of such charges, but a month after I was charged by a collecting company called ERC for {$61.00}. I asked them to explain such charges weather if they were fees or taxes and they we unable to disclose information. Therefore, I asked them to send me a bill in the mail with details about the charges, as well as a dispute package and they told me they would send me a bill. About the dispute part, they said that I needed to call XXXX to discuss the charges with them but XXXX said that I had to discuss this with the collecting company. I never received a detailed statement neither a chanse to defend my self about such charges, I checked my credit score and found a red flag in it because of this. \n", + "\n", + "I am now hoping you may help me with this case. \n", + "\n", + "Thanks :\n", + "2. Over a year and a half ago we started the process of buying a home. Our mortgage guy sent us to a credit repair co. They got the collection account from Weltman , Weinberg & Reis taken off my credit, because it was unverifiable. Now it is back on my credit. I have credit reports showing the trade line on and then off and now today it is currently on my report. When I called to verify the account with WW & R they sent me a heavily redacted letter verifying absolutely nothing. I would like this unverifiable account taken off my credit and removed permanently. This should not be a loan I have to pay for if there is no verification that it is my debt. Attached are the credit reports and the letter of verification that was sent to me.\n", + "3. I recently disputed ( see attached letter ) with Receivable Management Services an account entry that they placed on my credit report without providing a dunning letter or any correspondence that would have allowed me 30 days to dispute the validity of the alleged debt. To date, I have not received any communication from them. They are blatantly violating my rights by reporting this inaccurate, erroneous, unverifiable entry.\n", + "\n", + "Additionally, this account entry does not reflect a payment history which should be included on any entry that is reflected on my credit report. In my previous communication to them, I specifically requested that they provide an agreement that states their authority to collect on the alleged debt, agreement with signature of the alleged debtor wherein he/she agreed to pay the creditor, alleged account number, date this alleged debt became payable, original delinquency date, and to date to no avail. \n", "\n", - "XXXX XXXX\n", - "3. U.S. Bank sent two letters containing Visa Debit Cards to our address on XX/XX/2021. One Visa Debit Card is in the name of XXXX XXXX and one Visa Debit Card is in the name of XXXX XXXX. These cards supposedly link to existing checking accounts at U.S. Bank. However : ( 1 ) Neither of us have existing checking accounts at U.S. Bank, ( 2 ) Neither of us solicited a bank account at U.S. Bank, and ( 3 ) Neither of us solicited a Visa Debit Card. We have attempted to call U.S. Bank at the phone numbers provided in the letters but are only able to access an automated system which will not proceed without us establishing accounts and activating these cards. We are concerned here that one of two things has happened : either ( 1 ) we are victims of identity theft and some third party is trying to establish accounts in our name, or ( 2 ) U.S. Bank is engaged in bank fraud. In either case, we request the assistance of the Consumer Financial Protection Bureau. Thank you.\n", - "4. I contacted my bank over 3 times about this amount, the first two times I spoke to gentleman that agreed with me that I didnt get back a certain amount of dollars back, I did the math and they refuse to see that I do not owe this amount because I never had it in the first place. I wrote out all my charges and connected it to the charges made back from the consumer and I was missing XXXX, I called XXXX they said they gave it all back which is not their fault because they showed me proof. Along the lines Capital One does not want to take responsibility for the missing money. I have wrote everything out and then its not adding up, they keep saying that they did a charge back twice which is incorrect. My balance was at XXXX before I made this purchase and it shouldve been returned back to XXXX because I return all the items and nothing is in my possession. I have proof that I returned everything.\n", - "5. CB INDIGO ( Bank ) XX/XX/2022 I just recently got off the phone with the company and they wont put in a request of removal of a fraudulent hard inquiry from Insigo Mastercard to XXXX. They dont even have my information on file, I called 3 times most of them are lazy and was giving me a hard time.\n", + "As such, since they have refused to respond to my request and not provide any documentation to substantiate their allegations, coupled with the fact that they did not provide me a dunning letter is grounds for this erroneous, inaccurate, unverifiable entry to be deleted from my credit report.\n", + "4. I accepted service from XXXX XXXX XXXX. The company did not inform me that internet was required. They also told me that the agreement was at will without penalty. They never addressed my needs as a customer. My bill is only {$230.00}. They placed false information regarding my bill with a collection agency who has placed information on my credit report without contacting me or giving me an opportunity to dispute the validity of the debt. The debt is not valid. The actions are unlawful and I am requesting that the actions of this collection agency be reported to the Federal Trade Commission.\n", + "5. I have continued to submit an investigation for a Bankruptcy place on my credit report. I have been trying to get this removed because it was place on my credit report in error and inaccurate. ALL THREE CREDIT BUREAUS have continue to ignore the information proving this was place in error and fail to properly investigate the dispute I have place in their office. \n", + "\n", + "1. They say they have verified this dispute with XXXX but I have a letter from XXXX stating this was removed because they were unable to verify the accuracy of the bankruptcy. I received this letter on XX/XX/XXXX. XXXX just finished an investigation on XX/XX/2019 stating the verified this with XXXX. \n", + "2.Experian Open the dispute on XX/XX/2019 and closed it on XX/XX/19 stated they verified with XXXX and the Bankruptcy court and I have a letter From XXXX stating they could not verify the accuracy of this dispute. I also, have a letter from the court house stating they do not verify information with the credit bureaus How could be this be on my XXXX file when XXXX has removed this item. \n", + "3. XXXX open and investigation XX/XX/2019 and closed it XX/XX/2019 No way they properly investigation I have submitted all information to dispute the inaccurate information. Please do a proper investigation. \n", + "\n", + "XXXX, Experian, and XXXX please do a proper investigation under 611 of the FCRA thank you very much I have attached the letter proving this this is not on my XXXX consumer report and a letter form the court house stating they do not report information to the credit bureaus from the XXXX XXXX XXXX, Clerk of Court United State Bankruptcy Court on dated XX/XX/2019 I have summited it to the credit bureaus to be ignored. I have as for a description of my investigation by section 611 of the FCRA and the information from the investigation is inaccurate.\n", "\n" ] } @@ -1547,41 +1547,61 @@ "text": [ "Please highlight the most obvious difference between the two lists of comments:\n", "comment list 1:\n", - "1. Wrongs information, selling my information to third party. Incorrect reporting\n", - "2. I TIED TO BUY CAR AT XXXX, THEY GOT APPROVAL FROM XXXX XXXX XXXX XXXX WHICH ENDED A A LIE. THEY ALSO, PULLED MANY OTHERS I DID NT AGREED TO. SOLF PULLS ONLY\n", - "3. XX/XX/XXXX and XX/XX/XXXX inaccurate information reported 30 days late.\n", - "4. Im working on myCredit and I see a couple of inquiries that I have no idea where they came from.\n", - "5. I request a copy of all dispute results and documentary evidence from XXXX, and XXXX mailed to me\n", - "comment list 2:\n", - "1. My wife and I have been sending money to XXXX via my brother-in-law to finish a building project we have been working on since XXXX with target date of completion by XX/XX/XXXX. In XXXX XXXX my brother-in-law in was contacted by his bank to confirm he was not defrauding my wife. My brother-in-law confirmed he was helping to handle the building project by organizing and paying the workers. In XXXX XXXX Bank of America reach out to my wife to update her profile to avoid account restrictions. My wife 's account was eventually restricted until she called and confirmed her employment and other personal information. My wife 's full account activities were then restored and we continued sending wire transfers to XXXX via her checking account. \n", - "Then I received a letter dated XXXX XXXX XXXX from Bank of America stating the money market account I share with my wife which has been opened since XXXX will be will be restricted from use in 21 days and closed in 30 days with no reason. I strongly believe this is a result of the legal international wires because there was no reason to close the Savings account which had with hardly any activity. \n", - "I agree that Bank of America has a right to close accounts but I do not agree with Bank of America closing accounts because of international transactions unless they can prove fraud, criminal activity or support for terrorism, this is discriminatory towards foreign nationals. How are foreign nationals suppose to make investments or support their family/community if they are excluded from the banking system?\n", - "2. XXXX XXXX XXXX XXXX, NY XXXX XX/XX/XXXX Consumer Financial Protection Bureau XXXX XXXX XXXX XXXX, IA XXXX Dear Sir or Madam : In XX/XX/XXXX Out of the blue JP Morgan Chase arbitrarily closed my account. This was after my mother is a XXXX survivor who is over XXXX years old and for whom I have a general power of attorney and take care of her bill paying was questioned about a transaction. She is also XXXX XXXX. \n", + "1. I currently have a home loan with my ex husband with PHH Mortgages. We filed for divorce and in the divorce decree he became liable for the home and paying the payments. He ended up missing XXXX payments which effected my credit fairly substaintailly. when I became aware of the late payments, I ensured that the account was up to date and have since. I presented to them that I have the legal documents that he is obligated to make the payments each month and that I am not responisble for the payment. I asked them to remove the XXXX dings on my credit and they would not. I offered to present the paperwork and they still would not. The home is now being sold. I even filed with XXXX as a discrepency and they would not remove it. I would have never let these become a late payment. I was not even notified as they had all of his information in the file.\n", + "2. In the course of my student loan, I have been making payments and I feel that the payments haven't been added to the debt, the company stated that I am delinquent over 180 and my payments are auto pay. This has had a negative impact on my credit score.\n", + "3. The issue is 26 late payments on me and my wife 's credit reports due to a system error on a joint mortgage account that was always paid on time using autopay. ( will attach docs to support this ). \n", + "\n", + "This is an ongoing nightmare me and my wife are going through over the past 3 years. \n", + "Sent many dispute letters to the creditor and to the 3 bureaus, was promised multiple times that all late payments will be removed, we also has a letter from the bank stating we were never late on this account, also have a recording of a phone call with bank 's permission were the representative admits there was a system error and promised again that all late payments will be deleted from both of our credit reports. \n", + "As of today, for an unknown reason XXXX reports 6x30 days late payments, XXXX reports 24 lates, and Transunion 23 lates. \n", "\n", - "I have reason to belief that a mentally disturbed family member for whom I have an order of protection initiated this situation. This individual has ben harassing me and other members of my family for a considerable amount of time. \n", + "We have always paid our mortgage on time for many years, enrolled in autopay and making 2 payments per month. Our mortgage is currently with XXXX XXXX, XXXX XXXX is the mortgage servicer who's collecting from us and disbursing payments to XXXX XXXX. \n", "\n", - "The bank initially was satisfied with her response. However within 2 days they closed the account of a XXXX year old XXXX XXXX person. \n", + "I will attach here our mortgage transaction history confirming payments have been made on time, letter from the mortgage servicer XXXX XXXX XXXX XXXX confirming we were never late, copy of a page from my credit report showing all the late payments, and a few bank statements showing payment made on time while showing as late on our credit reports. ( XXXX & XXXX XXXX ) PLEASE HELP us to resolve this issue and have all late payments on this account removed from XXXX & XXXX XXXX. \n", "\n", - "Soon after for no reason my account was closed as well. I tried to reach out to the corporate offices of Chase and make great effort to find out what happened and to restore my account as well as my mothers but I was unsuccessful. In addition the people I spoke to were not only unhelpful bu exceedingly rude. \n", + "Her is a small portion of our previous communication with XXXX and their response : XX/XX/XXXX : we spoke with XXXX, XXXX stated that the funds were misappropriated and went to the wrong account. Said he'll contact their Tax Dep ' and get back to me and never did. \n", "\n", - "I should add that I have had an account with Chase since XX/XX/XXXX and took care of my ailing father before he passed away for over 25 years as well. I am now taking care of my mother for over 28 years. \n", + "XX/XX/XXXX : I spoke in length with XXXX from XXXX XXXX & XXXX from XXXX at XXXX who opened an investigation. Their supervisor said he made all the necessary changes. The next month, the money was withdrawn from my account on time and i received again a late fee and 30 days late on my credit report. \n", "\n", - "I went so far as contacting a prominent Television reporter who was interested in doing a report on what happened. \n", + "XX/XX/XXXX Spoke to XXXX who sent me to XXXX who sent me to XXXX XXXX from Escalation department, she promised the issue was fixed and late payments will be removed in up to 30 days and she will email me a deletion letter. Nothing was sent! and i called and wrote 5 emails to her and never got a response. \n", + "\n", + "XX/XX/XXXX spoke with XXXX to follow up with XXXX XXXX, no response. \n", + "\n", + "XX/XX/XXXX spoke with XXXX at XXXX, she said that the transaction history is our proof that issue was corrected and she'll submit a request to delete the late payments as the system does not show that previous request was made. \n", + "\n", + "XX/XX/XXXX XXXX sent a letter stating that we had a shortage of {$5300.00}. Again, upon checking my bank account all monthly payments were made on time. To avoid further issues i sent a check for {$5300.00} on XX/XX/XXXX. \n", + "\n", + "XX/XX/XXXX following many joint calls with XXXX XXXX i received a letter from XXXX stating that my credit might have been affected due to processing error and that they sent XXXX XXXX a letter requesting a removal of all late payments. \n", + "\n", + "XX/XX/XXXX spoke with XXXX XXXX again.. \n", + "XX/XX/XXXX spoke with XXXX at XXXX I have many more... \n", + "\n", + "*** XX/XX/XXXX : SPOKE WITH XXXX XXXX ( resolution team ) at XXXX. She said they will delete the late payments from both reports ( XXXX & XXXX XXXX and will call me to follow up on XX/XX/XXXX. She also emailed me the payment activity on the account. XXXX I received the payment history but NO CALL OR RESOLUTION. \n", + "\n", + "XX/XX/XXXX : SPOKE WITH XXXX FROM XXXX AND XXXX FROM XXXX XXXX ON A XXXX WAY CALL at XXXX, XXXX PERMISSION TO RECORD THE CALL, SHE AGREED, we went over all the late payments, she said she sees the error and promised that this time it will be resolved and get deleted from our credit reports. Again, nothing was resolved and we never heard back from anyone.\n", + "4. XXXX ; XXXX and Transunion are reporting ( 30 ) plus days late on the XXXX XXXX partial account number XXXX. ( Please see page 3 of the attached credit report. ) This account was paid in XXXX, 2019 and the lates are reporting in XXXX, 2019. Please keep in mind that it is impossible to have late payments on an account that was paid off a month prior. This incorrect reporting is harming my credit score and this line item need to be removed from my credit report. I have contacted the ( 3 ) bureaus to fix this, however I have been unsuccessful.\n", + "5. My Macys account is due on the first of every month. Since I have had the card I have paid on the XXXX PRIOR to the due date. And have paid over the amount due. In XXXX my XXXX XXXX auto pay did not come out of my account and rather than calling me - on the XXXX of XXXX just 5 days late they cut my credit off and shut me out of my account so I can not even see my credit profile - I have made the payment and they still are locking me out - please look into this - you will see that is what happened and they are stating in a letter it is becasue my XXXX report shows a seriuos derogorty item which it does not and I have submitted a complaint with them as well. Macys has been the worst credit experience of my LIFE and I did read the reviews but thought it would be different for me I guess? \n", + "thank you for your help.\n", + "comment list 2:\n", + "1. I decided to try XXXX services for my wife and I so I purchased phones for both of us. After a day or two of trial we felt unsatisfied so we headed back to the store and we returned all items. We got charged with restocking fees and taxes. Later on I got a bill in the mail in which I was being charged {$1200.00} for the returned items. After several attempts of arguing for about XXXX months about whether I owed XXXX or not I was dismissed of such charges, but a month after I was charged by a collecting company called ERC for {$61.00}. I asked them to explain such charges weather if they were fees or taxes and they we unable to disclose information. Therefore, I asked them to send me a bill in the mail with details about the charges, as well as a dispute package and they told me they would send me a bill. About the dispute part, they said that I needed to call XXXX to discuss the charges with them but XXXX said that I had to discuss this with the collecting company. I never received a detailed statement neither a chanse to defend my self about such charges, I checked my credit score and found a red flag in it because of this. \n", "\n", - "I have since managed to open an account at another bank but this week I had reason to go to a branch of Chase regarding another issue and a manager using my That is a very serious unsubstantiated accusation and given this information I have no choice but to submit this complaint. \n", + "I am now hoping you may help me with this case. \n", "\n", - "I have no interest in having an account again at a disreputable bank like Chase but I can not and will not accept or tolerate a derogatory accusation be associated with my name. \n", + "Thanks :\n", + "2. Over a year and a half ago we started the process of buying a home. Our mortgage guy sent us to a credit repair co. They got the collection account from Weltman , Weinberg & Reis taken off my credit, because it was unverifiable. Now it is back on my credit. I have credit reports showing the trade line on and then off and now today it is currently on my report. When I called to verify the account with WW & R they sent me a heavily redacted letter verifying absolutely nothing. I would like this unverifiable account taken off my credit and removed permanently. This should not be a loan I have to pay for if there is no verification that it is my debt. Attached are the credit reports and the letter of verification that was sent to me.\n", + "3. I recently disputed ( see attached letter ) with Receivable Management Services an account entry that they placed on my credit report without providing a dunning letter or any correspondence that would have allowed me 30 days to dispute the validity of the alleged debt. To date, I have not received any communication from them. They are blatantly violating my rights by reporting this inaccurate, erroneous, unverifiable entry.\n", "\n", - "I hope that my complaint will hAve the desired effect of removing this derogatory unsubstantiated accusation be removed from my name. However. I will not let this unfair matter stand and Chase ought to know that I have already retained an attorney and will if necessary hold Chase responsible and liable all damage i have incurred now And in the future Enclosed, please find the letter from Chase stating that they were closing my mothers account and a similar letter was received by me too. \n", + "Additionally, this account entry does not reflect a payment history which should be included on any entry that is reflected on my credit report. In my previous communication to them, I specifically requested that they provide an agreement that states their authority to collect on the alleged debt, agreement with signature of the alleged debtor wherein he/she agreed to pay the creditor, alleged account number, date this alleged debt became payable, original delinquency date, and to date to no avail. \n", "\n", - "Also please find a letter from her Doctor stating that she is XXXX XXXX. \n", + "As such, since they have refused to respond to my request and not provide any documentation to substantiate their allegations, coupled with the fact that they did not provide me a dunning letter is grounds for this erroneous, inaccurate, unverifiable entry to be deleted from my credit report.\n", + "4. I accepted service from XXXX XXXX XXXX. The company did not inform me that internet was required. They also told me that the agreement was at will without penalty. They never addressed my needs as a customer. My bill is only {$230.00}. They placed false information regarding my bill with a collection agency who has placed information on my credit report without contacting me or giving me an opportunity to dispute the validity of the debt. The debt is not valid. The actions are unlawful and I am requesting that the actions of this collection agency be reported to the Federal Trade Commission.\n", + "5. I have continued to submit an investigation for a Bankruptcy place on my credit report. I have been trying to get this removed because it was place on my credit report in error and inaccurate. ALL THREE CREDIT BUREAUS have continue to ignore the information proving this was place in error and fail to properly investigate the dispute I have place in their office. \n", "\n", - "Thank you. \n", + "1. They say they have verified this dispute with XXXX but I have a letter from XXXX stating this was removed because they were unable to verify the accuracy of the bankruptcy. I received this letter on XX/XX/XXXX. XXXX just finished an investigation on XX/XX/2019 stating the verified this with XXXX. \n", + "2.Experian Open the dispute on XX/XX/2019 and closed it on XX/XX/19 stated they verified with XXXX and the Bankruptcy court and I have a letter From XXXX stating they could not verify the accuracy of this dispute. I also, have a letter from the court house stating they do not verify information with the credit bureaus How could be this be on my XXXX file when XXXX has removed this item. \n", + "3. XXXX open and investigation XX/XX/2019 and closed it XX/XX/2019 No way they properly investigation I have submitted all information to dispute the inaccurate information. Please do a proper investigation. \n", "\n", - "XXXX XXXX\n", - "3. U.S. Bank sent two letters containing Visa Debit Cards to our address on XX/XX/2021. One Visa Debit Card is in the name of XXXX XXXX and one Visa Debit Card is in the name of XXXX XXXX. These cards supposedly link to existing checking accounts at U.S. Bank. However : ( 1 ) Neither of us have existing checking accounts at U.S. Bank, ( 2 ) Neither of us solicited a bank account at U.S. Bank, and ( 3 ) Neither of us solicited a Visa Debit Card. We have attempted to call U.S. Bank at the phone numbers provided in the letters but are only able to access an automated system which will not proceed without us establishing accounts and activating these cards. We are concerned here that one of two things has happened : either ( 1 ) we are victims of identity theft and some third party is trying to establish accounts in our name, or ( 2 ) U.S. Bank is engaged in bank fraud. In either case, we request the assistance of the Consumer Financial Protection Bureau. Thank you.\n", - "4. I contacted my bank over 3 times about this amount, the first two times I spoke to gentleman that agreed with me that I didnt get back a certain amount of dollars back, I did the math and they refuse to see that I do not owe this amount because I never had it in the first place. I wrote out all my charges and connected it to the charges made back from the consumer and I was missing XXXX, I called XXXX they said they gave it all back which is not their fault because they showed me proof. Along the lines Capital One does not want to take responsibility for the missing money. I have wrote everything out and then its not adding up, they keep saying that they did a charge back twice which is incorrect. My balance was at XXXX before I made this purchase and it shouldve been returned back to XXXX because I return all the items and nothing is in my possession. I have proof that I returned everything.\n", - "5. CB INDIGO ( Bank ) XX/XX/2022 I just recently got off the phone with the company and they wont put in a request of removal of a fraudulent hard inquiry from Insigo Mastercard to XXXX. They dont even have my information on file, I called 3 times most of them are lazy and was giving me a hard time.\n", + "XXXX, Experian, and XXXX please do a proper investigation under 611 of the FCRA thank you very much I have attached the letter proving this this is not on my XXXX consumer report and a letter form the court house stating they do not report information to the credit bureaus from the XXXX XXXX XXXX, Clerk of Court United State Bankruptcy Court on dated XX/XX/2019 I have summited it to the credit bureaus to be ignored. I have as for a description of my investigation by section 611 of the FCRA and the information from the investigation is inaccurate.\n", "\n" ] } @@ -1613,7 +1633,7 @@ { "data": { "text/html": [ - "Query job a7ce86a7-3a18-47b9-a46f-98dbe6a5a339 is DONE. 0 Bytes processed. Open Job" + "Query job a069b4a5-5238-4ca8-a6c0-d48781d00f6c is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1651,7 +1671,7 @@ { "data": { "text/html": [ - "Query job d568c03d-6bbd-4c3e-b087-563b7f5135ed is DONE. 0 Bytes processed. Open Job" + "Query job 63f6e1d0-b0dc-4f5c-a001-5889c28162c5 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1663,7 +1683,7 @@ { "data": { "text/html": [ - "Query job 17eaa806-51a4-4ee9-b219-75455d0095a7 is DONE. 8 Bytes processed. Open Job" + "Query job c1c9e28b-ba6d-4485-b892-0bf2428f927c is DONE. 8 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1675,7 +1695,7 @@ { "data": { "text/html": [ - "Query job e6d40ded-691d-4523-94ea-dd8202bd0220 is DONE. 2 Bytes processed. Open Job" + "Query job 67402b3c-eee4-4fe4-aeaf-fb27606ecde7 is DONE. 2 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1687,7 +1707,7 @@ { "data": { "text/html": [ - "Query job 200f0b88-7b6d-417b-a181-a98138e3bc95 is DONE. 193 Bytes processed. Open Job" + "Query job 83166900-0787-4a6d-b822-c3be87990e35 is DONE. 328 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1699,7 +1719,7 @@ { "data": { "text/plain": [ - "'The most obvious difference between the two lists of comments is that list 1 is related to credit reporting disputes and list 2 is a collection of general consumer banking complaints.'" + "'The most obvious difference between the two lists of comments is that the first list contains comments about credit report issues related to mortgages and loans, while the second list contains comments about credit report issues related to other types of debts, such as cell phone bills, collections, and bankruptcies.'" ] }, "execution_count": 19, @@ -1753,7 +1773,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.1" + "version": "3.10.13" } }, "nbformat": 4, From 9e741543ee978a33101a73f7ff1a8ef8925abbd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 1 Apr 2024 11:12:38 -0500 Subject: [PATCH 3/4] Revert "feat: Support max_columns in repr and make repr more efficient (#515)" (#554) This reverts commit 54e49cff89bd329852a823cd5cf5c5b41b7f9e32. --- bigframes/core/blocks.py | 42 +++++++-------------- bigframes/core/indexes/index.py | 10 ++--- bigframes/dataframe.py | 66 ++++++++++++++++++++------------- bigframes/series.py | 9 +++-- bigframes/session/__init__.py | 8 +--- 5 files changed, 65 insertions(+), 70 deletions(-) diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index aab8b1ad4d..11899eef11 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -467,23 +467,6 @@ def to_pandas_batches(self): self._copy_index_to_pandas(df) yield df - def download_pandas_preview( - self, max_rows: int - ) -> Tuple[pd.DataFrame, bigquery.QueryJob]: - """Download one page of results and return the query job.""" - dtypes = dict(zip(self.index_columns, self.index.dtypes)) - dtypes.update(zip(self.value_columns, self.dtypes)) - results_iterator, query_job = self.session._execute( - self.expr, sorted=True, max_results=max_rows - ) - arrow_results_iterator = results_iterator.to_arrow_iterable() - arrow_table = next(arrow_results_iterator) - downloaded_df = bigframes.session._io.pandas.arrow_to_pandas( - arrow_table, dtypes - ) - self._copy_index_to_pandas(downloaded_df) - return downloaded_df, query_job - def _copy_index_to_pandas(self, df: pd.DataFrame): """Set the index on pandas DataFrame to match this block. @@ -1314,25 +1297,26 @@ def _forward_slice(self, start: int = 0, stop=None, step: int = 1): # queries. @functools.cache def retrieve_repr_request_results( - self, max_results: int, max_columns: int - ) -> Tuple[pd.DataFrame, Tuple[int, int], bigquery.QueryJob]: + self, max_results: int + ) -> Tuple[pd.DataFrame, int, bigquery.QueryJob]: """ Retrieves a pandas dataframe containing only max_results many rows for use with printing methods. - Returns a tuple of the dataframe preview for printing and the overall number - of rows and columns of the table, as well as the query job used. + Returns a tuple of the dataframe and the overall number of rows of the query. """ - pandas_df, query_job = self.download_pandas_preview(max_results) - row_count = self.session._get_table_row_count(query_job.destination) - column_count = len(self.value_columns) - - formatted_df = pandas_df.set_axis(self.column_labels, axis=1) + # TODO(swast): Select a subset of columns if max_columns is less than the + # number of columns in the schema. + count = self.shape[0] + if count > max_results: + head_block = self.slice(0, max_results) + else: + head_block = self + computed_df, query_job = head_block.to_pandas() + formatted_df = computed_df.set_axis(self.column_labels, axis=1) # we reset the axis and substitute the bf index name for the default formatted_df.index.name = self.index.name - # limit column count - formatted_df = formatted_df.iloc[:, 0:max_columns] - return formatted_df, (row_count, column_count), query_job + return formatted_df, count, query_job def promote_offsets(self, label: Label = None) -> typing.Tuple[Block, str]: result_id = guid.generate_guid() diff --git a/bigframes/core/indexes/index.py b/bigframes/core/indexes/index.py index 48988aaffe..c818b68711 100644 --- a/bigframes/core/indexes/index.py +++ b/bigframes/core/indexes/index.py @@ -205,17 +205,17 @@ def query_job(self) -> Optional[bigquery.QueryJob]: return self._query_job def __repr__(self) -> str: + # TODO(swast): Add a timeout here? If the query is taking a long time, + # maybe we just print the job metadata that we have so far? + # TODO(swast): Avoid downloading the whole series by using job + # metadata, like we do with DataFrame. opts = bigframes.options.display max_results = opts.max_rows - max_columns = opts.max_columns if opts.repr_mode == "deferred": return formatter.repr_query_job(self.query_job) - pandas_df, _, query_job = self._block.retrieve_repr_request_results( - max_results, max_columns - ) + pandas_df, _, query_job = self._block.retrieve_repr_request_results(max_results) self._query_job = query_job - return repr(pandas_df.index) def copy(self, name: Optional[Hashable] = None): diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 1df78dd4cd..066b082490 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -579,16 +579,28 @@ def __setattr__(self, key: str, value): object.__setattr__(self, key, value) def __repr__(self) -> str: - """Converts a DataFrame to a string using pandas dataframe __repr__. + """Converts a DataFrame to a string. Calls to_pandas. - Only represents the first `bigframes.options.display.max_rows` - and `bigframes.options.display.max_columns`. + Only represents the first `bigframes.options.display.max_rows`. """ - if bigframes.options.display.repr_mode == "deferred": + opts = bigframes.options.display + max_results = opts.max_rows + if opts.repr_mode == "deferred": return formatter.repr_query_job(self.query_job) - pandas_df, shape = self._perform_repr_request() - with display_options.pandas_repr(bigframes.options.display): + self._cached() + # TODO(swast): pass max_columns and get the true column count back. Maybe + # get 1 more column than we have requested so that pandas can add the + # ... for us? + pandas_df, row_count, query_job = self._block.retrieve_repr_request_results( + max_results + ) + + self._set_internal_query_job(query_job) + + column_count = len(pandas_df.columns) + + with display_options.pandas_repr(opts): repr_string = repr(pandas_df) # Modify the end of the string to reflect count. @@ -596,40 +608,42 @@ def __repr__(self) -> str: pattern = re.compile("\\[[0-9]+ rows x [0-9]+ columns\\]") if pattern.match(lines[-1]): lines = lines[:-2] - if shape[0] > len(lines) - 1: + + if row_count > len(lines) - 1: lines.append("...") + lines.append("") - lines.append(f"[{shape[0]} rows x {shape[1]} columns]") + lines.append(f"[{row_count} rows x {column_count} columns]") return "\n".join(lines) - def _perform_repr_request(self) -> Tuple[pandas.DataFrame, Tuple[int, int]]: - max_results = bigframes.options.display.max_rows - max_columns = bigframes.options.display.max_columns - self._cached() - pandas_df, shape, query_job = self._block.retrieve_repr_request_results( - max_results, max_columns - ) - self._set_internal_query_job(query_job) - return pandas_df, shape - def _repr_html_(self) -> str: """ Returns an html string primarily for use by notebooks for displaying - a representation of the DataFrame. Displays at most the number of rows - and columns given by `bigframes.options.display.max_rows` and - `bigframes.options.display.max_columns`. + a representation of the DataFrame. Displays 20 rows by default since + many notebooks are not configured for large tables. """ - - if bigframes.options.display.repr_mode == "deferred": + opts = bigframes.options.display + max_results = bigframes.options.display.max_rows + if opts.repr_mode == "deferred": return formatter.repr_query_job_html(self.query_job) - pandas_df, shape = self._perform_repr_request() + self._cached() + # TODO(swast): pass max_columns and get the true column count back. Maybe + # get 1 more column than we have requested so that pandas can add the + # ... for us? + pandas_df, row_count, query_job = self._block.retrieve_repr_request_results( + max_results + ) + + self._set_internal_query_job(query_job) + + column_count = len(pandas_df.columns) - with display_options.pandas_repr(bigframes.options.display): + with display_options.pandas_repr(opts): # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy. html_string = pandas_df._repr_html_() # type:ignore - html_string += f"[{shape[0]} rows x {shape[1]} columns in total]" + html_string += f"[{row_count} rows x {column_count} columns in total]" return html_string def __setitem__(self, key: str, value: SingleItemValue): diff --git a/bigframes/series.py b/bigframes/series.py index f1ac89f514..e7b358c2fe 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -272,16 +272,17 @@ def reset_index( return bigframes.dataframe.DataFrame(block) def __repr__(self) -> str: + # TODO(swast): Add a timeout here? If the query is taking a long time, + # maybe we just print the job metadata that we have so far? + # TODO(swast): Avoid downloading the whole series by using job + # metadata, like we do with DataFrame. opts = bigframes.options.display max_results = opts.max_rows - max_columns = opts.max_columns if opts.repr_mode == "deferred": return formatter.repr_query_job(self.query_job) self._cached() - pandas_df, _, query_job = self._block.retrieve_repr_request_results( - max_results, max_columns - ) + pandas_df, _, query_job = self._block.retrieve_repr_request_results(max_results) self._set_internal_query_job(query_job) return repr(pandas_df.iloc[:, 0]) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 6573934f94..ac266da3bd 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1832,7 +1832,6 @@ def _execute( sorted: bool = True, dry_run=False, col_id_overrides: Mapping[str, str] = {}, - max_results: Optional[int] = None, ) -> tuple[bigquery.table.RowIterator, bigquery.QueryJob]: sql = self._to_sql( array_value, sorted=sorted, col_id_overrides=col_id_overrides @@ -1842,7 +1841,8 @@ def _execute( else: job_config.dry_run = dry_run return self._start_query( - sql=sql, job_config=job_config, max_results=max_results + sql=sql, + job_config=job_config, ) def _peek( @@ -1887,10 +1887,6 @@ def _get_table_size(self, destination_table): table = self.bqclient.get_table(destination_table) return table.num_bytes - def _get_table_row_count(self, destination_table) -> int: - table = self.bqclient.get_table(destination_table) - return table.num_rows - def _rows_to_dataframe( self, row_iterator: bigquery.table.RowIterator, dtypes: Dict ) -> pandas.DataFrame: From 9ac4ed8a374b757b5b19eaa64e8ec4739866c9bd Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Mon, 1 Apr 2024 16:24:29 +0000 Subject: [PATCH 4/4] chore: fix typo `mertics` to `metrics` (#549) * chore: fix typo `mertics` to `metrics` * revert unintended change --- bigframes/ml/metrics/_metrics.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/bigframes/ml/metrics/_metrics.py b/bigframes/ml/metrics/_metrics.py index e8c7400f35..ee86798b33 100644 --- a/bigframes/ml/metrics/_metrics.py +++ b/bigframes/ml/metrics/_metrics.py @@ -19,8 +19,8 @@ import typing from typing import Tuple, Union -import bigframes_vendored.sklearn.metrics._classification as vendored_mertics_classification -import bigframes_vendored.sklearn.metrics._ranking as vendored_mertics_ranking +import bigframes_vendored.sklearn.metrics._classification as vendored_metrics_classification +import bigframes_vendored.sklearn.metrics._ranking as vendored_metrics_ranking import bigframes_vendored.sklearn.metrics._regression as vendored_metrics_regression import numpy as np import pandas as pd @@ -79,7 +79,7 @@ def accuracy_score( return score.sum() -accuracy_score.__doc__ = inspect.getdoc(vendored_mertics_classification.accuracy_score) +accuracy_score.__doc__ = inspect.getdoc(vendored_metrics_classification.accuracy_score) def roc_curve( @@ -149,7 +149,7 @@ def roc_curve( ) -roc_curve.__doc__ = inspect.getdoc(vendored_mertics_ranking.roc_curve) +roc_curve.__doc__ = inspect.getdoc(vendored_metrics_ranking.roc_curve) def roc_auc_score( @@ -171,7 +171,7 @@ def roc_auc_score( return (width_diff * height_avg).sum() -roc_auc_score.__doc__ = inspect.getdoc(vendored_mertics_ranking.roc_auc_score) +roc_auc_score.__doc__ = inspect.getdoc(vendored_metrics_ranking.roc_auc_score) def auc( @@ -185,7 +185,7 @@ def auc( return auc -auc.__doc__ = inspect.getdoc(vendored_mertics_ranking.auc) +auc.__doc__ = inspect.getdoc(vendored_metrics_ranking.auc) def confusion_matrix( @@ -223,7 +223,7 @@ def confusion_matrix( confusion_matrix.__doc__ = inspect.getdoc( - vendored_mertics_classification.confusion_matrix + vendored_metrics_classification.confusion_matrix ) @@ -261,7 +261,7 @@ def recall_score( return recall_score -recall_score.__doc__ = inspect.getdoc(vendored_mertics_classification.recall_score) +recall_score.__doc__ = inspect.getdoc(vendored_metrics_classification.recall_score) def precision_score( @@ -299,7 +299,7 @@ def precision_score( precision_score.__doc__ = inspect.getdoc( - vendored_mertics_classification.precision_score + vendored_metrics_classification.precision_score ) @@ -334,4 +334,4 @@ def f1_score( return f1_score -f1_score.__doc__ = inspect.getdoc(vendored_mertics_classification.f1_score) +f1_score.__doc__ = inspect.getdoc(vendored_metrics_classification.f1_score)