diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index e4d1e54e33a96..9a03948a72487 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -44,7 +44,7 @@ If you are adding an issue, please try to keep it focused on a single, modular b
 If two issues are related, or blocking, please link them rather than combining them.
 
 We will try to keep these issues as up to date as possible, though
-with the rapid rate of develop in this field some may get out of date.
+with the rapid rate of development in this field some may get out of date.
 If you notice this happening, please let us know.
 
 ### 🙋Getting Help
@@ -87,7 +87,7 @@ This will install all requirements for running the package, examples, linting, f
 
 ❗Note: If during installation you receive a `WheelFileValidationError` for `debugpy`, please make sure you are running Poetry v1.5.1. This bug was present in older versions of Poetry (e.g. 1.4.1) and has been resolved in newer releases. If you are still seeing this bug on v1.5.1, you may also try disabling "modern installation" (`poetry config installer.modern-installation false`) and re-installing requirements. See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details.
 
-Now, you should be able to run the common tasks in the following section. To double check, run `make test`, all tests should pass. If they don't you may need to pip install additional dependencies, such as `numexpr` and `openapi_schema_pydantic`.
+Now assuming `make` and `pytest` are installed, you should be able to run the common tasks in the following section. To double check, run `make test` under `libs/langchain`, all tests should pass. If they don't, you may need to pip install additional dependencies, such as `numexpr` and `openapi_schema_pydantic`.
 
 ## ✅ Common Tasks
 
@@ -134,7 +134,7 @@ We recognize linting can be annoying - if you do not want to do it, please conta
 ### Spellcheck
 
 Spellchecking for this project is done via [codespell](https://github.com/codespell-project/codespell).
-Note that `codespell` finds common typos, so could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words.
+Note that `codespell` finds common typos, so it could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words.
 
 To check spelling for this project:
 
diff --git a/docs/extras/integrations/chat_loaders/facebook.ipynb b/docs/extras/integrations/chat_loaders/facebook.ipynb
index ca4ddce0eae3b..ab0a41ebe073f 100644
--- a/docs/extras/integrations/chat_loaders/facebook.ipynb
+++ b/docs/extras/integrations/chat_loaders/facebook.ipynb
@@ -571,7 +571,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.2"
+   "version": "3.10.1"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/integrations/chat_loaders/gmail.ipynb b/docs/extras/integrations/chat_loaders/gmail.ipynb
new file mode 100644
index 0000000000000..30e86d67b9c6e
--- /dev/null
+++ b/docs/extras/integrations/chat_loaders/gmail.ipynb
@@ -0,0 +1,179 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "b3d1705d",
+   "metadata": {},
+   "source": [
+    "# GMail\n",
+    "\n",
+    "This loader goes over how to load data from GMail. There are many ways you could want to load data from GMail. This loader is currently fairly opionated in how to do so. The way it does it is it first looks for all messages that you have sent. It then looks for messages where you are responding to a previous email. It then fetches that previous email, and creates a training example of that email, followed by your email.\n",
+    "\n",
+    "Note that there are clear limitations here. For example, all examples created are only looking at the previous email for context.\n",
+    "\n",
+    "To use:\n",
+    "\n",
+    "- Set up a Google Developer Account: Go to the Google Developer Console, create a project, and enable the Gmail API for that project. This will give you a credentials.json file that you'll need later.\n",
+    "\n",
+    "- Install the Google Client Library: Run the following command to install the Google Client Library:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "84578039",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install --upgrade google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "be18f796",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os.path\n",
+    "import base64\n",
+    "import json\n",
+    "import re\n",
+    "import time\n",
+    "from google.auth.transport.requests import Request\n",
+    "from google.oauth2.credentials import Credentials\n",
+    "from google_auth_oauthlib.flow import InstalledAppFlow\n",
+    "from googleapiclient.discovery import build\n",
+    "import logging\n",
+    "import requests\n",
+    "\n",
+    "SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']\n",
+    "\n",
+    "\n",
+    "creds = None\n",
+    "# The file token.json stores the user's access and refresh tokens, and is\n",
+    "# created automatically when the authorization flow completes for the first\n",
+    "# time.\n",
+    "if os.path.exists('email_token.json'):\n",
+    "    creds = Credentials.from_authorized_user_file('email_token.json', SCOPES)\n",
+    "# If there are no (valid) credentials available, let the user log in.\n",
+    "if not creds or not creds.valid:\n",
+    "    if creds and creds.expired and creds.refresh_token:\n",
+    "        creds.refresh(Request())\n",
+    "    else:\n",
+    "        flow = InstalledAppFlow.from_client_secrets_file(               \n",
+    "            # your creds file here. Please create json file as here https://cloud.google.com/docs/authentication/getting-started\n",
+    "            'creds.json', SCOPES)\n",
+    "        creds = flow.run_local_server(port=0)\n",
+    "    # Save the credentials for the next run\n",
+    "    with open('email_token.json', 'w') as token:\n",
+    "        token.write(creds.to_json())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "a2793ba0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chat_loaders.gmail import GMailLoader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "2154597f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loader = GMailLoader(creds=creds, n=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "0b7d11bd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = loader.load()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "74764bc7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Sometimes there can be errors which we silently ignore\n",
+    "len(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "d9360a85",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chat_loaders.utils import (\n",
+    "    map_ai_messages,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "a9646f7a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This makes messages sent by hchase@langchain.com the AI Messages\n",
+    "# This means you will train an LLM to predict as if it's responding as hchase\n",
+    "training_data = list(map_ai_messages(data, sender=\"Harrison Chase <hchase@langchain.com>\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d1a182f0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/extras/integrations/llms/llamacpp.ipynb b/docs/extras/integrations/llms/llamacpp.ipynb
index 68c8680b05077..b40001d8e02ea 100644
--- a/docs/extras/integrations/llms/llamacpp.ipynb
+++ b/docs/extras/integrations/llms/llamacpp.ipynb
@@ -171,7 +171,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 1,
    "metadata": {
     "tags": []
    },
@@ -192,7 +192,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 2,
    "metadata": {
     "tags": []
    },
@@ -207,7 +207,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 3,
    "metadata": {
     "tags": []
    },
@@ -397,16 +397,96 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
+      "llama_model_load_internal: format     = ggjt v3 (latest)\n",
+      "llama_model_load_internal: n_vocab    = 32000\n",
+      "llama_model_load_internal: n_ctx      = 512\n",
+      "llama_model_load_internal: n_embd     = 5120\n",
+      "llama_model_load_internal: n_mult     = 256\n",
+      "llama_model_load_internal: n_head     = 40\n",
+      "llama_model_load_internal: n_head_kv  = 40\n",
+      "llama_model_load_internal: n_layer    = 40\n",
+      "llama_model_load_internal: n_rot      = 128\n",
+      "llama_model_load_internal: n_gqa      = 1\n",
+      "llama_model_load_internal: rnorm_eps  = 5.0e-06\n",
+      "llama_model_load_internal: n_ff       = 13824\n",
+      "llama_model_load_internal: freq_base  = 10000.0\n",
+      "llama_model_load_internal: freq_scale = 1\n",
+      "llama_model_load_internal: ftype      = 2 (mostly Q4_0)\n",
+      "llama_model_load_internal: model size = 13B\n",
+      "llama_model_load_internal: ggml ctx size =    0.11 MB\n",
+      "llama_model_load_internal: mem required  = 6983.72 MB (+  400.00 MB per state)\n",
+      "llama_new_context_with_model: kv self size  =  400.00 MB\n",
+      "ggml_metal_init: allocating\n",
+      "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
+      "ggml_metal_init: loaded kernel_add                            0x1405ed6b0\n",
+      "ggml_metal_init: loaded kernel_add_row                        0x1405eee00\n",
+      "ggml_metal_init: loaded kernel_mul                            0x1405ee650\n",
+      "ggml_metal_init: loaded kernel_mul_row                        0x1405eda20\n",
+      "ggml_metal_init: loaded kernel_scale                          0x121fc1d80\n",
+      "ggml_metal_init: loaded kernel_silu                           0x121fc1fe0\n",
+      "ggml_metal_init: loaded kernel_relu                           0x121fc2240\n",
+      "ggml_metal_init: loaded kernel_gelu                           0x121fc24e0\n",
+      "ggml_metal_init: loaded kernel_soft_max                       0x121fc2950\n",
+      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x121fc2d60\n",
+      "ggml_metal_init: loaded kernel_get_rows_f16                   0x121fc3160\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x121fc3a20\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x121fc4170\n",
+      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x121fc4890\n",
+      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x121fc5010\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x121fc5750\n",
+      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x121fc5e90\n",
+      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x121fc65d0\n",
+      "ggml_metal_init: loaded kernel_rms_norm                       0x121fc6d20\n",
+      "ggml_metal_init: loaded kernel_norm                           0x121fc7460\n",
+      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x121fc7dd0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x121fc8610\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x121fc8e50\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x1405edc80\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x1405efdc0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x140306f30\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x1403073d0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x140307aa0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_f16_f32                 0x140307f80\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32                0x140308460\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32                0x140308940\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32                0x140308e20\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32                0x140309300\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32                0x1403097e0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32                0x140309cc0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32                0x14030a1a0\n",
+      "ggml_metal_init: loaded kernel_rope                           0x14030a400\n",
+      "ggml_metal_init: loaded kernel_alibi_f32                      0x14030aa00\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x14030afd0\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x14030b5a0\n",
+      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x14030bb70\n",
+      "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
+      "ggml_metal_init: hasUnifiedMemory             = true\n",
+      "ggml_metal_init: maxTransferRate              = built-in GPU\n",
+      "llama_new_context_with_model: compute buffer total size =   91.35 MB\n",
+      "llama_new_context_with_model: max tensor size =    87.89 MB\n",
+      "ggml_metal_add_buffer: allocated 'data            ' buffer, size =  6984.06 MB, ( 6984.50 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'eval            ' buffer, size =     1.36 MB, ( 6985.86 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'kv              ' buffer, size =   402.00 MB, ( 7387.86 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'alloc           ' buffer, size =    90.02 MB, ( 7477.88 / 21845.34)\n",
+      "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n"
+     ]
+    }
+   ],
    "source": [
     "n_gpu_layers = 40  # Change this value based on your model and your GPU VRAM pool.\n",
     "n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.\n",
     "\n",
     "# Make sure the model path is correct for your system!\n",
     "llm = LlamaCpp(\n",
-    "    model_path=\"./ggml-model-q4_0.bin\",\n",
+    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
     "    n_gpu_layers=n_gpu_layers,\n",
     "    n_batch=n_batch,\n",
     "    callback_manager=callback_manager,\n",
@@ -416,36 +496,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "llm_chain = LLMChain(prompt=prompt, llm=llm)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      " We are looking for an NFL team that won the Super Bowl when Justin Bieber (born March 1, 1994) was born. \n",
-      "\n",
-      "First, let's look up which year is closest to when Justin Bieber was born:\n",
       "\n",
-      "* The year before he was born: 1993\n",
-      "* The year of his birth: 1994\n",
-      "* The year after he was born: 1995\n",
       "\n",
-      "We want to know what NFL team won the Super Bowl in the year that is closest to when Justin Bieber was born. Therefore, we should look up the NFL team that won the Super Bowl in either 1993 or 1994.\n",
+      "Justin Bieber was born on March 1, 1994. The Super Bowl is played at the end of the NFL season which runs from September to February.\n",
       "\n",
-      "Now let's find out which NFL team did win the Super Bowl in either of those years:\n",
+      "In 1994, the NFL season ended with Super Bowl XXVIII which was played on January 28th, 1994.\n",
       "\n",
-      "* In 1993, the San Francisco 49ers won the Super Bowl against the Dallas Cowboys by a score of 20-16.\n",
-      "* In 1994, the San Francisco 49ers won the Super Bowl again, this time against the San Diego Chargers by a score of 49-26.\n"
+      "So, there was no Super Bowl in the year Justin Bieber was born. The Super Bowl has only been around since 1967 and is played annually between the champions of the National Football Conference (NFC) and the American Football Conference (AFC)."
      ]
     },
     {
@@ -453,25 +517,27 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "llama_print_timings:        load time =   238.10 ms\n",
-      "llama_print_timings:      sample time =    84.23 ms /   256 runs   (    0.33 ms per token)\n",
-      "llama_print_timings: prompt eval time =   238.04 ms /    49 tokens (    4.86 ms per token)\n",
-      "llama_print_timings:        eval time = 10391.96 ms /   255 runs   (   40.75 ms per token)\n",
-      "llama_print_timings:       total time = 15664.80 ms\n"
+      "llama_print_timings:        load time =   427.90 ms\n",
+      "llama_print_timings:      sample time =    98.36 ms /   133 runs   (    0.74 ms per token,  1352.18 tokens per second)\n",
+      "llama_print_timings: prompt eval time =   427.83 ms /    45 tokens (    9.51 ms per token,   105.18 tokens per second)\n",
+      "llama_print_timings:        eval time =  3687.12 ms /   132 runs   (   27.93 ms per token,    35.80 tokens per second)\n",
+      "llama_print_timings:       total time =  4401.84 ms\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "\" We are looking for an NFL team that won the Super Bowl when Justin Bieber (born March 1, 1994) was born. \\n\\nFirst, let's look up which year is closest to when Justin Bieber was born:\\n\\n* The year before he was born: 1993\\n* The year of his birth: 1994\\n* The year after he was born: 1995\\n\\nWe want to know what NFL team won the Super Bowl in the year that is closest to when Justin Bieber was born. Therefore, we should look up the NFL team that won the Super Bowl in either 1993 or 1994.\\n\\nNow let's find out which NFL team did win the Super Bowl in either of those years:\\n\\n* In 1993, the San Francisco 49ers won the Super Bowl against the Dallas Cowboys by a score of 20-16.\\n* In 1994, the San Francisco 49ers won the Super Bowl again, this time against the San Diego Chargers by a score of 49-26.\\n\""
+       "'\\n\\nJustin Bieber was born on March 1, 1994. The Super Bowl is played at the end of the NFL season which runs from September to February.\\n\\nIn 1994, the NFL season ended with Super Bowl XXVIII which was played on January 28th, 1994.\\n\\nSo, there was no Super Bowl in the year Justin Bieber was born. The Super Bowl has only been around since 1967 and is played annually between the champions of the National Football Conference (NFC) and the American Football Conference (AFC).'"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
+    "\n",
     "question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
     "\n",
     "llm_chain.run(question)"
@@ -497,16 +563,96 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
+      "llama_model_load_internal: format     = ggjt v3 (latest)\n",
+      "llama_model_load_internal: n_vocab    = 32000\n",
+      "llama_model_load_internal: n_ctx      = 512\n",
+      "llama_model_load_internal: n_embd     = 5120\n",
+      "llama_model_load_internal: n_mult     = 256\n",
+      "llama_model_load_internal: n_head     = 40\n",
+      "llama_model_load_internal: n_head_kv  = 40\n",
+      "llama_model_load_internal: n_layer    = 40\n",
+      "llama_model_load_internal: n_rot      = 128\n",
+      "llama_model_load_internal: n_gqa      = 1\n",
+      "llama_model_load_internal: rnorm_eps  = 5.0e-06\n",
+      "llama_model_load_internal: n_ff       = 13824\n",
+      "llama_model_load_internal: freq_base  = 10000.0\n",
+      "llama_model_load_internal: freq_scale = 1\n",
+      "llama_model_load_internal: ftype      = 2 (mostly Q4_0)\n",
+      "llama_model_load_internal: model size = 13B\n",
+      "llama_model_load_internal: ggml ctx size =    0.11 MB\n",
+      "llama_model_load_internal: mem required  = 6983.72 MB (+  400.00 MB per state)\n",
+      "llama_new_context_with_model: kv self size  =  400.00 MB\n",
+      "ggml_metal_init: allocating\n",
+      "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
+      "ggml_metal_init: loaded kernel_add                            0x113b42480\n",
+      "ggml_metal_init: loaded kernel_add_row                        0x113b44210\n",
+      "ggml_metal_init: loaded kernel_mul                            0x113b43a80\n",
+      "ggml_metal_init: loaded kernel_mul_row                        0x113b44880\n",
+      "ggml_metal_init: loaded kernel_scale                          0x113b45010\n",
+      "ggml_metal_init: loaded kernel_silu                           0x113b45650\n",
+      "ggml_metal_init: loaded kernel_relu                           0x113b427f0\n",
+      "ggml_metal_init: loaded kernel_gelu                           0x113b46300\n",
+      "ggml_metal_init: loaded kernel_soft_max                       0x113b46980\n",
+      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x113b46e20\n",
+      "ggml_metal_init: loaded kernel_get_rows_f16                   0x113b47860\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x113b48010\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x113b48880\n",
+      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x113b48f70\n",
+      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x113b49e00\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x113b4a530\n",
+      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x113b4ac70\n",
+      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x113b4b3b0\n",
+      "ggml_metal_init: loaded kernel_rms_norm                       0x113b4bb00\n",
+      "ggml_metal_init: loaded kernel_norm                           0x113b4c1a0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x113b4cba0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x113b4d360\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x113b4dba0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x113b4e560\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x113b4ed10\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x113b4f580\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x113b4fdc0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x113b50740\n",
+      "ggml_metal_init: loaded kernel_mul_mm_f16_f32                 0x113b51250\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32                0x113b51a80\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32                0x113b522b0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32                0x113b52ae0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32                0x113b53310\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32                0x113b53b40\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32                0x113b54370\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32                0x113b54ba0\n",
+      "ggml_metal_init: loaded kernel_rope                           0x113b551a0\n",
+      "ggml_metal_init: loaded kernel_alibi_f32                      0x113b55b10\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x113b56450\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x113b56dc0\n",
+      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x113b576b0\n",
+      "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
+      "ggml_metal_init: hasUnifiedMemory             = true\n",
+      "ggml_metal_init: maxTransferRate              = built-in GPU\n",
+      "llama_new_context_with_model: compute buffer total size =   91.35 MB\n",
+      "llama_new_context_with_model: max tensor size =    87.89 MB\n",
+      "ggml_metal_add_buffer: allocated 'data            ' buffer, size =  6984.06 MB, ( 6984.50 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'eval            ' buffer, size =     1.36 MB, ( 6985.86 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'kv              ' buffer, size =   402.00 MB, ( 7387.86 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'alloc           ' buffer, size =    90.02 MB, ( 7477.88 / 21845.34)AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "n_gpu_layers = 1  # Metal set to 1 is enough.\n",
     "n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
     "\n",
     "# Make sure the model path is correct for your system!\n",
     "llm = LlamaCpp(\n",
-    "    model_path=\"./ggml-model-q4_0.bin\",\n",
+    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
     "    n_gpu_layers=n_gpu_layers,\n",
     "    n_batch=n_batch,\n",
     "    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls\n",
@@ -531,6 +677,349 @@
     "\n",
     "For the first call to the LLM, the performance may be slow due to the model compilation in Metal GPU."
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Grammars\n",
+    "\n",
+    "\n",
+    "We can specify [grammars](https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md) to constrain model outputs.\n",
+    "\n",
+    "Supply the path to the specifed `json.gbnf` file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
+      "llama_model_load_internal: format     = ggjt v3 (latest)\n",
+      "llama_model_load_internal: n_vocab    = 32000\n",
+      "llama_model_load_internal: n_ctx      = 512\n",
+      "llama_model_load_internal: n_embd     = 5120\n",
+      "llama_model_load_internal: n_mult     = 256\n",
+      "llama_model_load_internal: n_head     = 40\n",
+      "llama_model_load_internal: n_head_kv  = 40\n",
+      "llama_model_load_internal: n_layer    = 40\n",
+      "llama_model_load_internal: n_rot      = 128\n",
+      "llama_model_load_internal: n_gqa      = 1\n",
+      "llama_model_load_internal: rnorm_eps  = 5.0e-06\n",
+      "llama_model_load_internal: n_ff       = 13824\n",
+      "llama_model_load_internal: freq_base  = 10000.0\n",
+      "llama_model_load_internal: freq_scale = 1\n",
+      "llama_model_load_internal: ftype      = 2 (mostly Q4_0)\n",
+      "llama_model_load_internal: model size = 13B\n",
+      "llama_model_load_internal: ggml ctx size =    0.11 MB\n",
+      "llama_model_load_internal: mem required  = 6983.72 MB (+  400.00 MB per state)\n",
+      "llama_new_context_with_model: kv self size  =  400.00 MB\n",
+      "ggml_metal_init: allocating\n",
+      "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
+      "ggml_metal_init: loaded kernel_add                            0x1516fb530\n",
+      "ggml_metal_init: loaded kernel_add_row                        0x1516fb790\n",
+      "ggml_metal_init: loaded kernel_mul                            0x1516fb9f0\n",
+      "ggml_metal_init: loaded kernel_mul_row                        0x1516fbc50\n",
+      "ggml_metal_init: loaded kernel_scale                          0x1516fbeb0\n",
+      "ggml_metal_init: loaded kernel_silu                           0x1516fc110\n",
+      "ggml_metal_init: loaded kernel_relu                           0x1516fc370\n",
+      "ggml_metal_init: loaded kernel_gelu                           0x1516fc5d0\n",
+      "ggml_metal_init: loaded kernel_soft_max                       0x1516fc830\n",
+      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x1516fca90\n",
+      "ggml_metal_init: loaded kernel_get_rows_f16                   0x1516fccf0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x1516fcf50\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x1516fd1b0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x1516fd410\n",
+      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x1516fd670\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x1516fd8d0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x1516fdb30\n",
+      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x1516fdd90\n",
+      "ggml_metal_init: loaded kernel_rms_norm                       0x1516fdff0\n",
+      "ggml_metal_init: loaded kernel_norm                           0x1516fe250\n",
+      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x1516fe4b0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x1516fe710\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x1516fe970\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x1516febd0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x1516fee30\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x1516ff090\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x1516ff2f0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x1516ff550\n",
+      "ggml_metal_init: loaded kernel_mul_mm_f16_f32                 0x1516ff7b0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32                0x121fce650\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32                0x121fcdce0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32                0x121fceab0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32                0x121fced10\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32                0x121fcef70\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32                0x121fcf1d0\n",
+      "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32                0x121fcf430\n",
+      "ggml_metal_init: loaded kernel_rope                           0x121fcf690\n",
+      "ggml_metal_init: loaded kernel_alibi_f32                      0x121fcf8f0\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x121fcfb50\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x121fcfdb0\n",
+      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x121fd0010\n",
+      "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
+      "ggml_metal_init: hasUnifiedMemory             = true\n",
+      "ggml_metal_init: maxTransferRate              = built-in GPU\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "root ::= object \n",
+      "object ::= [{] ws object_11 [}] \n",
+      "value ::= object | array | string | number | boolean | [n] [u] [l] [l] \n",
+      "array ::= [[] ws array_15 []] \n",
+      "string ::= [\"] string_18 [\"] ws \n",
+      "number ::= number_19 number_20 ws \n",
+      "boolean ::= boolean_21 ws \n",
+      "ws ::= ws_23 \n",
+      "object_8 ::= string [:] ws value object_10 \n",
+      "object_9 ::= [,] ws string [:] ws value \n",
+      "object_10 ::= object_9 object_10 | \n",
+      "object_11 ::= object_8 | \n",
+      "array_12 ::= value array_14 \n",
+      "array_13 ::= [,] ws value \n",
+      "array_14 ::= array_13 array_14 | \n",
+      "array_15 ::= array_12 | \n",
+      "string_16 ::= [^\"\\] | [\\] string_17 \n",
+      "string_17 ::= [\"\\/bfnrt] | [u] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] \n",
+      "string_18 ::= string_16 string_18 | \n",
+      "number_19 ::= [-] | \n",
+      "number_20 ::= [0-9] number_20 | [0-9] \n",
+      "boolean_21 ::= [t] [r] [u] [e] | [f] [a] [l] [s] [e] \n",
+      "ws_22 ::= [ <U+0009><U+000A>] ws \n",
+      "ws_23 ::= ws_22 | \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "llama_new_context_with_model: compute buffer total size =   91.35 MB\n",
+      "llama_new_context_with_model: max tensor size =    87.89 MB\n",
+      "ggml_metal_add_buffer: allocated 'data            ' buffer, size =  6984.06 MB, (14468.72 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'eval            ' buffer, size =     1.36 MB, (14470.08 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'kv              ' buffer, size =   402.00 MB, (14872.08 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'alloc           ' buffer, size =    90.02 MB, (14962.09 / 21845.34)\n",
+      "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
+      "from_string grammar:\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "n_gpu_layers = 1 \n",
+    "n_batch = 512\n",
+    "llm = LlamaCpp(\n",
+    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
+    "    n_gpu_layers=n_gpu_layers,\n",
+    "    n_batch=n_batch,\n",
+    "    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls\n",
+    "    callback_manager=callback_manager,\n",
+    "    verbose=True,\n",
+    "    grammar_path=\"/Users/rlm/Desktop/Code/langchain-main/langchain/libs/langchain/langchain/llms/grammars/json.gbnf\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Error in LangChainTracer.on_llm_start callback: ctypes objects containing pointers cannot be pickled\n",
+      "Exception ignored in: <function LlamaGrammar.__del__ at 0x1402b15e0>\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/llama_grammar.py\", line 46, in __del__\n",
+      "    if self.grammar is not None:\n",
+      "AttributeError: 'LlamaGrammar' object has no attribute 'grammar'\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\"name\": \"John Doe\", \"age\": 30, \"gender\": \"male\"}"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "llama_print_timings:        load time =   317.62 ms\n",
+      "llama_print_timings:      sample time =   141.83 ms /    22 runs   (    6.45 ms per token,   155.11 tokens per second)\n",
+      "llama_print_timings: prompt eval time =   316.89 ms /     9 tokens (   35.21 ms per token,    28.40 tokens per second)\n",
+      "llama_print_timings:        eval time =   575.93 ms /    21 runs   (   27.43 ms per token,    36.46 tokens per second)\n",
+      "llama_print_timings:       total time =  1087.31 ms\n",
+      "Error in LangChainTracer.on_llm_end callback: ctypes objects containing pointers cannot be pickled\n",
+      "Exception ignored in: <function LlamaGrammar.__del__ at 0x1402b15e0>\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/llama_grammar.py\", line 46, in __del__\n",
+      "    if self.grammar is not None:\n",
+      "AttributeError: 'LlamaGrammar' object has no attribute 'grammar'\n"
+     ]
+    }
+   ],
+   "source": [
+    "result=llm(\"Describe a person in JSON format:\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'John Doe'"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "eval(result)[\"name\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can also try `list.gbnf`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "llama.cpp: loading model from /home/eryk/deepsense/llama-2-7b.ggmlv3.q4_0.bin\n",
+      "llama_model_load_internal: format     = ggjt v3 (latest)\n",
+      "llama_model_load_internal: n_vocab    = 32000\n",
+      "llama_model_load_internal: n_ctx      = 512\n",
+      "llama_model_load_internal: n_embd     = 4096\n",
+      "llama_model_load_internal: n_mult     = 256\n",
+      "llama_model_load_internal: n_head     = 32\n",
+      "llama_model_load_internal: n_head_kv  = 32\n",
+      "llama_model_load_internal: n_layer    = 32\n",
+      "llama_model_load_internal: n_rot      = 128\n",
+      "llama_model_load_internal: n_gqa      = 1\n",
+      "llama_model_load_internal: rnorm_eps  = 5.0e-06\n",
+      "llama_model_load_internal: n_ff       = 11008\n",
+      "llama_model_load_internal: freq_base  = 10000.0\n",
+      "llama_model_load_internal: freq_scale = 1\n",
+      "llama_model_load_internal: ftype      = 2 (mostly Q4_0)\n",
+      "llama_model_load_internal: model size = 7B\n",
+      "llama_model_load_internal: ggml ctx size =    0.08 MB\n",
+      "llama_model_load_internal: mem required  = 3615.73 MB (+  256.00 MB per state)\n",
+      "llama_new_context_with_model: kv self size  =  256.00 MB\n",
+      "AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | \n",
+      "llama_new_context_with_model: compute buffer total size =   71.84 MB\n",
+      "from_string grammar:\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "root ::= [[] items []] EOF \n",
+      "items ::= item items_7 \n",
+      "EOF ::= [<U+000A>] \n",
+      "item ::= string \n",
+      "items_4 ::= [,] items_6 item \n",
+      "ws ::= [ ] \n",
+      "items_6 ::= ws items_6 | \n",
+      "items_7 ::= items_4 items_7 | \n",
+      "string ::= [\"] word string_12 [\"] string_13 \n",
+      "word ::= word_14 \n",
+      "string_10 ::= string_11 word \n",
+      "string_11 ::= ws string_11 | ws \n",
+      "string_12 ::= string_10 string_12 | \n",
+      "string_13 ::= ws string_13 | \n",
+      "word_14 ::= [a-zA-Z] word_14 | [a-zA-Z] \n"
+     ]
+    }
+   ],
+   "source": [
+    "n_gpu_layers = 1 \n",
+    "n_batch = 512\n",
+    "llm = LlamaCpp(\n",
+    "    model_path=\"/home/eryk/deepsense/llama-2-7b.ggmlv3.q4_0.bin\",\n",
+    "    n_gpu_layers=n_gpu_layers,\n",
+    "    n_batch=n_batch,\n",
+    "    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls\n",
+    "    callback_manager=callback_manager,\n",
+    "    verbose=True,\n",
+    "    grammar_path=\"/home/eryk/deepsense/langchain/libs/langchain/langchain/llms/grammars/list.gbnf\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[\"Jane Eyre\" , \"Sense and Sensibility\" , \"A Tale of Two Cities\"]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "llama_print_timings:        load time =  1079.21 ms\n",
+      "llama_print_timings:      sample time =   225.57 ms /    29 runs   (    7.78 ms per token,   128.56 tokens per second)\n",
+      "llama_print_timings: prompt eval time =  1078.34 ms /    11 tokens (   98.03 ms per token,    10.20 tokens per second)\n",
+      "llama_print_timings:        eval time =  4389.99 ms /    28 runs   (  156.79 ms per token,     6.38 tokens per second)\n",
+      "llama_print_timings:       total time =  5807.84 ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "result=llm(\"List of top-3 my favourite books:\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Jane Eyre', 'Sense and Sensibility', 'A Tale of Two Cities']"
+      ]
+     },
+     "execution_count": 85,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "eval(result)"
+   ]
   }
  ],
  "metadata": {
@@ -549,7 +1038,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.9.16"
   }
  },
  "nbformat": 4,
diff --git a/docs/extras/use_cases/more/graph/graph_falkordb_qa.ipynb b/docs/extras/use_cases/more/graph/graph_falkordb_qa.ipynb
new file mode 100644
index 0000000000000..c43e232975028
--- /dev/null
+++ b/docs/extras/use_cases/more/graph/graph_falkordb_qa.ipynb
@@ -0,0 +1,154 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# FalkorDBQAChain"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook shows how to use LLMs to provide a natural language interface to FalkorDB database.\n",
+    "\n",
+    "FalkorDB is a low latency property graph database management system. You can simply run its docker locally:\n",
+    "\n",
+    "```bash\n",
+    "docker run -p 6379:6379 -it --rm falkordb/falkordb:edge\n",
+    "```\n",
+    "\n",
+    "Once launched, you can simply start creating a database on the local machine and connect to it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain.graphs import FalkorDBGraph\n",
+    "from langchain.chains import FalkorDBQAChain"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "graph = FalkorDBGraph(database=\"movies\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "graph.query(\n",
+    "    \"\"\"\n",
+    "MERGE (m:Movie {name:\"Top Gun\"})\n",
+    "WITH m\n",
+    "UNWIND [\"Tom Cruise\", \"Val Kilmer\", \"Anthony Edwards\", \"Meg Ryan\"] AS actor\n",
+    "MERGE (a:Actor {name:actor})\n",
+    "MERGE (a)-[:ACTED_IN]->(m)\n",
+    "\"\"\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "graph.refresh_schema()\n",
+    "import os\n",
+    "os.environ['OPENAI_API_KEY']='API_KEY_HERE'\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain = FalkorDBQAChain.from_llm(\n",
+    "    ChatOpenAI(temperature=0), graph=graph, verbose=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new FalkorDBQAChain chain...\u001b[0m\n",
+      "Generated Cypher:\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (:Movie {title: 'Top Gun'})<-[:ACTED_IN]-(actor:Person)\n",
+      "RETURN actor.name AS output\u001b[0m\n",
+      "Full Context:\n",
+      "\u001b[32;1m\u001b[1;3m[]\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'The actor who played in Top Gun is Tom Cruise.'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain.run(\"Who played in Top Gun?\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/libs/experimental/langchain_experimental/sql/base.py b/libs/experimental/langchain_experimental/sql/base.py
index fcc664a75d0a2..5773e45561553 100644
--- a/libs/experimental/langchain_experimental/sql/base.py
+++ b/libs/experimental/langchain_experimental/sql/base.py
@@ -29,6 +29,15 @@ class SQLDatabaseChain(Chain):
             from langchain import OpenAI, SQLDatabase
             db = SQLDatabase(...)
             db_chain = SQLDatabaseChain.from_llm(OpenAI(), db)
+
+    *Security note*: Make sure that the database connection uses credentials
+        that are narrowly-scoped to only include the permissions this chain needs.
+        Failure to do so may result in data corruption or loss, since this chain may
+        attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted.
+        The best way to guard against such negative outcomes is to (as appropriate)
+        limit the permissions granted to the credentials used with this chain.
+        This issue shows an example negative outcome if these steps are not taken:
+        https://github.com/langchain-ai/langchain/issues/5923
     """
 
     llm_chain: LLMChain
@@ -49,7 +58,7 @@ class SQLDatabaseChain(Chain):
     return_direct: bool = False
     """Whether or not to return the result of querying the SQL table directly."""
     use_query_checker: bool = False
-    """Whether or not the query checker tool should be used to attempt 
+    """Whether or not the query checker tool should be used to attempt
     to fix the initial SQL from the LLM."""
     query_checker_prompt: Optional[BasePromptTemplate] = None
     """The prompt template that should be used by the query checker"""
@@ -197,6 +206,17 @@ def from_llm(
         prompt: Optional[BasePromptTemplate] = None,
         **kwargs: Any,
     ) -> SQLDatabaseChain:
+        """Create a SQLDatabaseChain from an LLM and a database connection.
+
+        *Security note*: Make sure that the database connection uses credentials
+            that are narrowly-scoped to only include the permissions this chain needs.
+            Failure to do so may result in data corruption or loss, since this chain may
+            attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted.
+            The best way to guard against such negative outcomes is to (as appropriate)
+            limit the permissions granted to the credentials used with this chain.
+            This issue shows an example negative outcome if these steps are not taken:
+            https://github.com/langchain-ai/langchain/issues/5923
+        """
         prompt = prompt or SQL_PROMPTS.get(db.dialect, PROMPT)
         llm_chain = LLMChain(llm=llm, prompt=prompt)
         return cls(llm_chain=llm_chain, database=db, **kwargs)
diff --git a/libs/langchain/langchain/callbacks/tracers/evaluation.py b/libs/langchain/langchain/callbacks/tracers/evaluation.py
index 877836b0a5c75..5b178c84e6ecc 100644
--- a/libs/langchain/langchain/callbacks/tracers/evaluation.py
+++ b/libs/langchain/langchain/callbacks/tracers/evaluation.py
@@ -3,10 +3,11 @@
 
 import logging
 from concurrent.futures import Future, ThreadPoolExecutor, wait
-from typing import Any, List, Optional, Sequence, Set, Union
+from typing import Any, Dict, List, Optional, Sequence, Set, Union
 from uuid import UUID
 
-from langsmith import Client, RunEvaluator
+import langsmith
+from langsmith import schemas as langsmith_schemas
 
 from langchain.callbacks.manager import tracing_v2_enabled
 from langchain.callbacks.tracers.base import BaseTracer
@@ -62,13 +63,13 @@ class EvaluatorCallbackHandler(BaseTracer):
         The LangSmith project name to be organize eval chain runs under.
     """
 
-    name: str = "evaluator_callback_handler"
+    name = "evaluator_callback_handler"
 
     def __init__(
         self,
-        evaluators: Sequence[RunEvaluator],
+        evaluators: Sequence[langsmith.RunEvaluator],
         max_workers: Optional[int] = None,
-        client: Optional[Client] = None,
+        client: Optional[langsmith.Client] = None,
         example_id: Optional[Union[UUID, str]] = None,
         skip_unfinished: bool = True,
         project_name: Optional[str] = "evaluators",
@@ -86,10 +87,11 @@ def __init__(
         self.futures: Set[Future] = set()
         self.skip_unfinished = skip_unfinished
         self.project_name = project_name
+        self.logged_feedback: Dict[str, List[langsmith_schemas.Feedback]] = {}
         global _TRACERS
         _TRACERS.append(self)
 
-    def _evaluate_in_project(self, run: Run, evaluator: RunEvaluator) -> None:
+    def _evaluate_in_project(self, run: Run, evaluator: langsmith.RunEvaluator) -> None:
         """Evaluate the run in the project.
 
         Parameters
@@ -102,11 +104,11 @@ def _evaluate_in_project(self, run: Run, evaluator: RunEvaluator) -> None:
         """
         try:
             if self.project_name is None:
-                self.client.evaluate_run(run, evaluator)
+                feedback = self.client.evaluate_run(run, evaluator)
             with tracing_v2_enabled(
                 project_name=self.project_name, tags=["eval"], client=self.client
             ):
-                self.client.evaluate_run(run, evaluator)
+                feedback = self.client.evaluate_run(run, evaluator)
         except Exception as e:
             logger.error(
                 f"Error evaluating run {run.id} with "
@@ -114,6 +116,8 @@ def _evaluate_in_project(self, run: Run, evaluator: RunEvaluator) -> None:
                 exc_info=True,
             )
             raise e
+        example_id = str(run.reference_example_id)
+        self.logged_feedback.setdefault(example_id, []).append(feedback)
 
     def _persist_run(self, run: Run) -> None:
         """Run the evaluator on the run.
diff --git a/libs/langchain/langchain/chains/__init__.py b/libs/langchain/langchain/chains/__init__.py
index 8564d6851a3e2..4bb5242729b93 100644
--- a/libs/langchain/langchain/chains/__init__.py
+++ b/libs/langchain/langchain/chains/__init__.py
@@ -36,6 +36,7 @@
 from langchain.chains.graph_qa.arangodb import ArangoGraphQAChain
 from langchain.chains.graph_qa.base import GraphQAChain
 from langchain.chains.graph_qa.cypher import GraphCypherQAChain
+from langchain.chains.graph_qa.falkordb import FalkorDBQAChain
 from langchain.chains.graph_qa.hugegraph import HugeGraphQAChain
 from langchain.chains.graph_qa.kuzu import KuzuQAChain
 from langchain.chains.graph_qa.nebulagraph import NebulaGraphQAChain
@@ -85,6 +86,7 @@
     "ConstitutionalChain",
     "ConversationChain",
     "ConversationalRetrievalChain",
+    "FalkorDBQAChain",
     "FlareChain",
     "GraphCypherQAChain",
     "GraphQAChain",
diff --git a/libs/langchain/langchain/chains/graph_qa/falkordb.py b/libs/langchain/langchain/chains/graph_qa/falkordb.py
new file mode 100644
index 0000000000000..b5ad8da4876b9
--- /dev/null
+++ b/libs/langchain/langchain/chains/graph_qa/falkordb.py
@@ -0,0 +1,141 @@
+"""Question answering over a graph."""
+from __future__ import annotations
+
+import re
+from typing import Any, Dict, List, Optional
+
+from langchain.base_language import BaseLanguageModel
+from langchain.callbacks.manager import CallbackManagerForChainRun
+from langchain.chains.base import Chain
+from langchain.chains.graph_qa.prompts import CYPHER_GENERATION_PROMPT, CYPHER_QA_PROMPT
+from langchain.chains.llm import LLMChain
+from langchain.graphs import FalkorDBGraph
+from langchain.pydantic_v1 import Field
+from langchain.schema import BasePromptTemplate
+
+INTERMEDIATE_STEPS_KEY = "intermediate_steps"
+
+
+def extract_cypher(text: str) -> str:
+    """
+    Extract Cypher code from a text.
+    Args:
+        text: Text to extract Cypher code from.
+
+    Returns:
+        Cypher code extracted from the text.
+    """
+    # The pattern to find Cypher code enclosed in triple backticks
+    pattern = r"```(.*?)```"
+
+    # Find all matches in the input text
+    matches = re.findall(pattern, text, re.DOTALL)
+
+    return matches[0] if matches else text
+
+
+class FalkorDBQAChain(Chain):
+    """Chain for question-answering against a graph by generating Cypher statements."""
+
+    graph: FalkorDBGraph = Field(exclude=True)
+    cypher_generation_chain: LLMChain
+    qa_chain: LLMChain
+    input_key: str = "query"  #: :meta private:
+    output_key: str = "result"  #: :meta private:
+    top_k: int = 10
+    """Number of results to return from the query"""
+    return_intermediate_steps: bool = False
+    """Whether or not to return the intermediate steps along with the final answer."""
+    return_direct: bool = False
+    """Whether or not to return the result of querying the graph directly."""
+
+    @property
+    def input_keys(self) -> List[str]:
+        """Return the input keys.
+
+        :meta private:
+        """
+        return [self.input_key]
+
+    @property
+    def output_keys(self) -> List[str]:
+        """Return the output keys.
+
+        :meta private:
+        """
+        _output_keys = [self.output_key]
+        return _output_keys
+
+    @property
+    def _chain_type(self) -> str:
+        return "graph_cypher_chain"
+
+    @classmethod
+    def from_llm(
+        cls,
+        llm: BaseLanguageModel,
+        *,
+        qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT,
+        cypher_prompt: BasePromptTemplate = CYPHER_GENERATION_PROMPT,
+        **kwargs: Any,
+    ) -> FalkorDBQAChain:
+        """Initialize from LLM."""
+        qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
+        cypher_generation_chain = LLMChain(llm=llm, prompt=cypher_prompt)
+
+        return cls(
+            qa_chain=qa_chain,
+            cypher_generation_chain=cypher_generation_chain,
+            **kwargs,
+        )
+
+    def _call(
+        self,
+        inputs: Dict[str, Any],
+        run_manager: Optional[CallbackManagerForChainRun] = None,
+    ) -> Dict[str, Any]:
+        """Generate Cypher statement, use it to look up in db and answer question."""
+        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
+        callbacks = _run_manager.get_child()
+        question = inputs[self.input_key]
+
+        intermediate_steps: List = []
+
+        generated_cypher = self.cypher_generation_chain.run(
+            {"question": question, "schema": self.graph.schema}, callbacks=callbacks
+        )
+
+        # Extract Cypher code if it is wrapped in backticks
+        generated_cypher = extract_cypher(generated_cypher)
+
+        _run_manager.on_text("Generated Cypher:", end="\n", verbose=self.verbose)
+        _run_manager.on_text(
+            generated_cypher, color="green", end="\n", verbose=self.verbose
+        )
+
+        intermediate_steps.append({"query": generated_cypher})
+
+        # Retrieve and limit the number of results
+        context = self.graph.query(generated_cypher)[: self.top_k]
+
+        if self.return_direct:
+            final_result = context
+        else:
+            _run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
+            _run_manager.on_text(
+                str(context), color="green", end="\n", verbose=self.verbose
+            )
+
+            intermediate_steps.append({"context": context})
+
+            result = self.qa_chain(
+                {"question": question, "context": context},
+                callbacks=callbacks,
+            )
+            final_result = result[self.qa_chain.output_key]
+
+        chain_result: Dict[str, Any] = {self.output_key: final_result}
+        if self.return_intermediate_steps:
+            chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps
+
+        return chain_result
diff --git a/libs/langchain/langchain/chat_loaders/gmail.py b/libs/langchain/langchain/chat_loaders/gmail.py
new file mode 100644
index 0000000000000..4e88accdeea0d
--- /dev/null
+++ b/libs/langchain/langchain/chat_loaders/gmail.py
@@ -0,0 +1,110 @@
+import base64
+import re
+from typing import Any, Iterator
+
+from langchain.chat_loaders.base import BaseChatLoader, ChatSession
+from langchain.schema.messages import HumanMessage
+
+
+def _extract_email_content(msg: Any) -> HumanMessage:
+    from_email = None
+    for values in msg["payload"]["headers"]:
+        name = values["name"]
+        if name == "From":
+            from_email = values["value"]
+    if from_email is None:
+        raise ValueError
+    for part in msg["payload"]["parts"]:
+        if part["mimeType"] == "text/plain":
+            data = part["body"]["data"]
+            data = base64.urlsafe_b64decode(data).decode("utf-8")
+            # Regular expression to split the email body at the first
+            # occurrence of a line that starts with "On ... wrote:"
+            pattern = re.compile(r"\r\nOn .+(\r\n)*wrote:\r\n")
+            # Split the email body and extract the first part
+            newest_response = re.split(pattern, data)[0]
+            message = HumanMessage(
+                content=newest_response, additional_kwargs={"sender": from_email}
+            )
+            return message
+    raise ValueError
+
+
+def _get_message_data(service: Any, message: Any) -> ChatSession:
+    msg = service.users().messages().get(userId="me", id=message["id"]).execute()
+    message_content = _extract_email_content(msg)
+    in_reply_to = None
+    email_data = msg["payload"]["headers"]
+    for values in email_data:
+        name = values["name"]
+        if name == "In-Reply-To":
+            in_reply_to = values["value"]
+    if in_reply_to is None:
+        raise ValueError
+
+    thread_id = msg["threadId"]
+
+    thread = service.users().threads().get(userId="me", id=thread_id).execute()
+    messages = thread["messages"]
+
+    response_email = None
+    for message in messages:
+        email_data = message["payload"]["headers"]
+        for values in email_data:
+            if values["name"] == "Message-ID":
+                message_id = values["value"]
+                if message_id == in_reply_to:
+                    response_email = message
+    if response_email is None:
+        raise ValueError
+    starter_content = _extract_email_content(response_email)
+    return ChatSession(messages=[starter_content, message_content])
+
+
+class GMailLoader(BaseChatLoader):
+    """This loader goes over how to load data from GMail.
+
+    There are many ways you could want to load data from GMail.
+    This loader is currently fairly opinionated in how to do so.
+    The way it does it is it first looks for all messages that you have sent.
+    It then looks for messages where you are responding to a previous email.
+    It then fetches that previous email, and creates a training example
+    of that email, followed by your email.
+
+    Note that there are clear limitations here. For example,
+    all examples created are only looking at the previous email for context.
+
+    To use:
+
+    - Set up a Google Developer Account:
+        Go to the Google Developer Console, create a project,
+        and enable the Gmail API for that project.
+        This will give you a credentials.json file that you'll need later.
+    """
+
+    def __init__(self, creds: Any, n: int = 100, raise_error: bool = False) -> None:
+        super().__init__()
+        self.creds = creds
+        self.n = n
+        self.raise_error = raise_error
+
+    def lazy_load(self) -> Iterator[ChatSession]:
+        from googleapiclient.discovery import build
+
+        service = build("gmail", "v1", credentials=self.creds)
+        results = (
+            service.users()
+            .messages()
+            .list(userId="me", labelIds=["SENT"], maxResults=self.n)
+            .execute()
+        )
+        messages = results.get("messages", [])
+        for message in messages:
+            try:
+                yield _get_message_data(service, message)
+            except Exception as e:
+                # TODO: handle errors better
+                if self.raise_error:
+                    raise e
+                else:
+                    pass
diff --git a/libs/langchain/langchain/graphs/__init__.py b/libs/langchain/langchain/graphs/__init__.py
index 9699750db2fa4..b2a43168c9a70 100644
--- a/libs/langchain/langchain/graphs/__init__.py
+++ b/libs/langchain/langchain/graphs/__init__.py
@@ -1,6 +1,7 @@
 """**Graphs** provide a natural language interface to graph databases."""
 
 from langchain.graphs.arangodb_graph import ArangoGraph
+from langchain.graphs.falkordb_graph import FalkorDBGraph
 from langchain.graphs.hugegraph import HugeGraph
 from langchain.graphs.kuzu_graph import KuzuGraph
 from langchain.graphs.memgraph_graph import MemgraphGraph
@@ -20,4 +21,5 @@
     "HugeGraph",
     "RdfGraph",
     "ArangoGraph",
+    "FalkorDBGraph",
 ]
diff --git a/libs/langchain/langchain/graphs/falkordb_graph.py b/libs/langchain/langchain/graphs/falkordb_graph.py
new file mode 100644
index 0000000000000..189189f641445
--- /dev/null
+++ b/libs/langchain/langchain/graphs/falkordb_graph.py
@@ -0,0 +1,67 @@
+from typing import Any, Dict, List
+
+node_properties_query = """
+MATCH (n)
+UNWIND labels(n) as l
+UNWIND keys(n) as p
+RETURN {label:l, properties: collect(distinct p)} AS output
+"""
+
+rel_properties_query = """
+MATCH ()-[r]->()
+UNWIND keys(r) as p
+RETURN {type:type(r), properties: collect(distinct p)} AS output
+"""
+
+rel_query = """
+MATCH (n)-[r]->(m)
+WITH labels(n)[0] AS src, labels(m)[0] AS dst, type(r) AS type
+RETURN DISTINCT "(:" + src + ")-[:" + type + "]->(:" + dst + ")" AS output
+"""
+
+
+class FalkorDBGraph:
+    """FalkorDB wrapper for graph operations."""
+
+    def __init__(
+        self, database: str, host: str = "localhost", port: int = 6379
+    ) -> None:
+        """Create a new FalkorDB graph wrapper instance."""
+        try:
+            import redis
+            from redis.commands.graph import Graph
+        except ImportError:
+            raise ImportError(
+                "Could not import redis python package. "
+                "Please install it with `pip install redis`."
+            )
+
+        self._driver = redis.Redis(host=host, port=port)
+        self._graph = Graph(self._driver, database)
+
+        try:
+            self.refresh_schema()
+        except Exception as e:
+            raise ValueError(f"Could not refresh schema. Error: {e}")
+
+    @property
+    def get_schema(self) -> str:
+        """Returns the schema of the FalkorDB database"""
+        return self.schema
+
+    def refresh_schema(self) -> None:
+        """Refreshes the schema of the FalkorDB database"""
+        self.schema = (
+            f"Node properties: {node_properties_query}\n"
+            f"Relationships properties: {rel_properties_query}\n"
+            f"Relationships: {rel_query}\n"
+        )
+
+    def query(self, query: str, params: dict = {}) -> List[Dict[str, Any]]:
+        """Query FalkorDB database."""
+
+        try:
+            data = self._graph.query(query, params)
+            return data.result_set
+        except Exception as e:
+            raise ValueError("Generated Cypher Statement is not valid\n" f"{e}")
diff --git a/libs/langchain/langchain/llms/grammars/json.gbnf b/libs/langchain/langchain/llms/grammars/json.gbnf
new file mode 100644
index 0000000000000..61bd2b2e65bf9
--- /dev/null
+++ b/libs/langchain/langchain/llms/grammars/json.gbnf
@@ -0,0 +1,29 @@
+# Grammar for subset of JSON - doesn't support full string or number syntax
+
+root  ::= object
+value ::= object | array | string | number | boolean | "null"
+
+object ::=
+  "{" ws (
+            string ":" ws value
+    ("," ws string ":" ws value)*
+  )? "}"
+
+array  ::=
+  "[" ws (
+            value
+    ("," ws value)*
+  )? "]"
+
+string  ::=
+  "\"" (
+    [^"\\] |
+    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
+  )* "\"" ws
+
+# Only plain integers currently
+number  ::= "-"? [0-9]+ ws
+boolean ::= ("true" | "false") ws
+
+# Optional space: by convention, applied in this grammar after literal chars when allowed
+ws ::= ([ \t\n] ws)?
\ No newline at end of file
diff --git a/libs/langchain/langchain/llms/grammars/list.gbnf b/libs/langchain/langchain/llms/grammars/list.gbnf
new file mode 100644
index 0000000000000..30ea6e0c8499d
--- /dev/null
+++ b/libs/langchain/langchain/llms/grammars/list.gbnf
@@ -0,0 +1,14 @@
+root ::= "[" items "]" EOF
+
+items ::= item ("," ws* item)*
+
+item ::= string
+
+string  ::=
+  "\"" word (ws+ word)* "\"" ws*
+
+word ::= [a-zA-Z]+
+
+ws ::= " "
+
+EOF ::= "\n"
\ No newline at end of file
diff --git a/libs/langchain/langchain/llms/llamacpp.py b/libs/langchain/langchain/llms/llamacpp.py
index d5af66dc4c942..0e4b7e8f635aa 100644
--- a/libs/langchain/langchain/llms/llamacpp.py
+++ b/libs/langchain/langchain/llms/llamacpp.py
@@ -1,5 +1,8 @@
+from __future__ import annotations
+
 import logging
-from typing import Any, Dict, Iterator, List, Optional
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Union
 
 from langchain.callbacks.manager import CallbackManagerForLLMRun
 from langchain.llms.base import LLM
@@ -8,6 +11,9 @@
 from langchain.utils import get_pydantic_field_names
 from langchain.utils.utils import build_extra_kwargs
 
+if TYPE_CHECKING:
+    from llama_cpp import LlamaGrammar
+
 logger = logging.getLogger(__name__)
 
 
@@ -113,12 +119,35 @@ class LlamaCpp(LLM):
     streaming: bool = True
     """Whether to stream the results, token by token."""
 
+    grammar_path: Optional[Union[str, Path]] = None
+    """
+    grammar_path: Path to the .gbnf file that defines formal grammars
+    for constraining model outputs. For instance, the grammar can be used
+    to force the model to generate valid JSON or to speak exclusively in emojis. At most
+    one of grammar_path and grammar should be passed in.
+    """
+    grammar: Optional[Union[str, LlamaGrammar]] = None
+    """
+    grammar: formal grammar for constraining model outputs. For instance, the grammar 
+    can be used to force the model to generate valid JSON or to speak exclusively in 
+    emojis. At most one of grammar_path and grammar should be passed in.
+    """
+
     verbose: bool = True
     """Print verbose output to stderr."""
 
     @root_validator()
     def validate_environment(cls, values: Dict) -> Dict:
         """Validate that llama-cpp-python library is installed."""
+        try:
+            from llama_cpp import Llama, LlamaGrammar
+        except ImportError:
+            raise ImportError(
+                "Could not import llama-cpp-python library. "
+                "Please install the llama-cpp-python library to "
+                "use this embedding model: pip install llama-cpp-python"
+            )
+
         model_path = values["model_path"]
         model_param_names = [
             "rope_freq_scale",
@@ -146,21 +175,26 @@ def validate_environment(cls, values: Dict) -> Dict:
         model_params.update(values["model_kwargs"])
 
         try:
-            from llama_cpp import Llama
-
             values["client"] = Llama(model_path, **model_params)
-        except ImportError:
-            raise ImportError(
-                "Could not import llama-cpp-python library. "
-                "Please install the llama-cpp-python library to "
-                "use this embedding model: pip install llama-cpp-python"
-            )
         except Exception as e:
             raise ValueError(
                 f"Could not load Llama model from path: {model_path}. "
                 f"Received error {e}"
             )
 
+        if values["grammar"] and values["grammar_path"]:
+            grammar = values["grammar"]
+            grammar_path = values["grammar_path"]
+            raise ValueError(
+                "Can only pass in one of grammar and grammar_path. Received "
+                f"{grammar=} and {grammar_path=}."
+            )
+        elif isinstance(values["grammar"], str):
+            values["grammar"] = LlamaGrammar.from_string(values["grammar"])
+        elif values["grammar_path"]:
+            values["grammar"] = LlamaGrammar.from_file(values["grammar_path"])
+        else:
+            pass
         return values
 
     @root_validator(pre=True)
@@ -176,7 +210,7 @@ def build_model_kwargs(cls, values: Dict[str, Any]) -> Dict[str, Any]:
     @property
     def _default_params(self) -> Dict[str, Any]:
         """Get the default parameters for calling llama_cpp."""
-        return {
+        params = {
             "suffix": self.suffix,
             "max_tokens": self.max_tokens,
             "temperature": self.temperature,
@@ -187,6 +221,9 @@ def _default_params(self) -> Dict[str, Any]:
             "repeat_penalty": self.repeat_penalty,
             "top_k": self.top_k,
         }
+        if self.grammar:
+            params["grammar"] = self.grammar
+        return params
 
     @property
     def _identifying_params(self) -> Dict[str, Any]:
@@ -252,7 +289,10 @@ def _call(
             # and return the combined strings from the first choices's text:
             combined_text_output = ""
             for chunk in self._stream(
-                prompt=prompt, stop=stop, run_manager=run_manager, **kwargs
+                prompt=prompt,
+                stop=stop,
+                run_manager=run_manager,
+                **kwargs,
             ):
                 combined_text_output += chunk.text
             return combined_text_output
diff --git a/libs/langchain/langchain/prompts/chat.py b/libs/langchain/langchain/prompts/chat.py
index 7e7e5809f31f9..59fcb30c868c5 100644
--- a/libs/langchain/langchain/prompts/chat.py
+++ b/libs/langchain/langchain/prompts/chat.py
@@ -674,18 +674,18 @@ def _create_template_from_message_type(
     Returns:
         a message prompt template of the appropriate type.
     """
-    if message_type == "human":
+    if message_type in ("human", "user"):
         message: BaseMessagePromptTemplate = HumanMessagePromptTemplate.from_template(
             template
         )
-    elif message_type == "ai":
+    elif message_type in ("ai", "assistant"):
         message = AIMessagePromptTemplate.from_template(template)
     elif message_type == "system":
         message = SystemMessagePromptTemplate.from_template(template)
     else:
         raise ValueError(
-            f"Unexpected message type: {message_type}. Use one of 'human', 'ai', "
-            f"or 'system'."
+            f"Unexpected message type: {message_type}. Use one of 'human',"
+            f" 'user', 'ai', 'assistant', or 'system'."
         )
     return message
 
diff --git a/libs/langchain/langchain/smith/evaluation/runner_utils.py b/libs/langchain/langchain/smith/evaluation/runner_utils.py
index 9e06fcd65f49f..438bc7914006d 100644
--- a/libs/langchain/langchain/smith/evaluation/runner_utils.py
+++ b/libs/langchain/langchain/smith/evaluation/runner_utils.py
@@ -11,6 +11,7 @@
 import warnings
 from enum import Enum
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Coroutine,
@@ -44,6 +45,9 @@
 from langchain.smith.evaluation.config import EvalConfig, RunEvalConfig
 from langchain.smith.evaluation.string_run_evaluator import StringRunEvaluatorChain
 
+if TYPE_CHECKING:
+    import pandas as pd
+
 logger = logging.getLogger(__name__)
 
 MODEL_OR_CHAIN_FACTORY = Union[
@@ -63,6 +67,31 @@ class InputFormatError(Exception):
 ## Shared Utilities
 
 
+class TestResult(dict):
+    """A dictionary of the results of a single test run."""
+
+    def to_dataframe(self) -> pd.DataFrame:
+        """Convert the results to a dataframe."""
+        try:
+            import pandas as pd
+        except ImportError as e:
+            raise ImportError(
+                "Pandas is required to convert the results to a dataframe."
+                " to install pandas, run `pip install pandas`."
+            ) from e
+
+        indices = []
+        records = []
+        for example_id, result in self["results"].items():
+            feedback = result["feedback"]
+            records.append(
+                {**{f.key: f.score for f in feedback}, "output": result["output"]}
+            )
+            indices.append(example_id)
+
+        return pd.DataFrame(records, index=indices)
+
+
 def _get_eval_project_url(api_url: str, project_id: str) -> str:
     """Get the project url from the api url."""
     parsed = urlparse(api_url)
@@ -667,7 +696,7 @@ async def _arun_llm_or_chain(
     tags: Optional[List[str]] = None,
     callbacks: Optional[List[BaseCallbackHandler]] = None,
     input_mapper: Optional[Callable[[Dict], Any]] = None,
-) -> Union[List[dict], List[str], List[LLMResult], List[ChatResult]]:
+) -> Union[dict, str, LLMResult, ChatResult]:
     """Asynchronously run the Chain or language model.
 
     Args:
@@ -689,10 +718,10 @@ async def _arun_llm_or_chain(
                 tracer.example_id = example.id
     else:
         previous_example_ids = None
-    outputs = []
     chain_or_llm = (
         "LLM" if isinstance(llm_or_chain_factory, BaseLanguageModel) else "Chain"
     )
+    result = None
     try:
         if isinstance(llm_or_chain_factory, BaseLanguageModel):
             output: Any = await _arun_llm(
@@ -711,15 +740,15 @@ async def _arun_llm_or_chain(
                 callbacks=callbacks,
                 input_mapper=input_mapper,
             )
-        outputs.append(output)
+        result = output
     except Exception as e:
         logger.warning(f"{chain_or_llm} failed for example {example.id}. Error: {e}")
-        outputs.append({"Error": str(e)})
+        result = {"Error": str(e)}
     if callbacks and previous_example_ids:
         for example_id, tracer in zip(previous_example_ids, callbacks):
             if hasattr(tracer, "example_id"):
                 tracer.example_id = example_id
-    return outputs
+    return result
 
 
 async def _gather_with_concurrency(
@@ -856,7 +885,7 @@ async def _arun_on_examples(
         wrapped_model, examples, evaluation, data_type
     )
     examples = _validate_example_inputs(examples, wrapped_model, input_mapper)
-    results: Dict[str, List[Any]] = {}
+    results: Dict[str, dict] = {}
 
     async def process_example(
         example: Example, callbacks: List[BaseCallbackHandler], job_state: dict
@@ -869,7 +898,7 @@ async def process_example(
             callbacks=callbacks,
             input_mapper=input_mapper,
         )
-        results[str(example.id)] = result
+        results[str(example.id)] = {"output": result}
         job_state["num_processed"] += 1
         if verbose:
             print(
@@ -890,8 +919,14 @@ async def process_example(
         ),
         *(functools.partial(process_example, e) for e in examples),
     )
+    all_feedback = {}
     for handler in evaluation_handlers:
         handler.wait_for_futures()
+        all_feedback.update(handler.logged_feedback)
+    # join the results and feedback on the example id
+    for example_id, output_dict in results.items():
+        feedback = all_feedback.get(example_id, [])
+        output_dict["feedback"] = feedback
     return results
 
 
@@ -978,7 +1013,7 @@ def _run_llm_or_chain(
     tags: Optional[List[str]] = None,
     callbacks: Optional[List[BaseCallbackHandler]] = None,
     input_mapper: Optional[Callable[[Dict], Any]] = None,
-) -> Union[List[dict], List[str], List[LLMResult], List[ChatResult]]:
+) -> Union[dict, str, LLMResult, ChatResult]:
     """
     Run the Chain or language model synchronously.
 
@@ -1001,10 +1036,10 @@ def _run_llm_or_chain(
                 tracer.example_id = example.id
     else:
         previous_example_ids = None
-    outputs = []
     chain_or_llm = (
         "LLM" if isinstance(llm_or_chain_factory, BaseLanguageModel) else "Chain"
     )
+    result = None
     try:
         if isinstance(llm_or_chain_factory, BaseLanguageModel):
             output: Any = _run_llm(
@@ -1023,18 +1058,18 @@ def _run_llm_or_chain(
                 tags=tags,
                 input_mapper=input_mapper,
             )
-        outputs.append(output)
+        result = output
     except Exception as e:
         logger.warning(
             f"{chain_or_llm} failed for example {example.id} with inputs:"
             f" {example.inputs}.\nError: {e}",
         )
-        outputs.append({"Error": str(e)})
+        result = {"Error": str(e)}
     if callbacks and previous_example_ids:
         for example_id, tracer in zip(previous_example_ids, callbacks):
             if hasattr(tracer, "example_id"):
                 tracer.example_id = example_id
-    return outputs
+    return result
 
 
 def _run_on_examples(
@@ -1075,7 +1110,7 @@ def _run_on_examples(
     Returns:
         A dictionary mapping example ids to the model outputs.
     """
-    results: Dict[str, Any] = {}
+    results: Dict[str, dict] = {}
     wrapped_model = _wrap_in_chain_factory(llm_or_chain_factory)
     project_name = _get_project_name(project_name, wrapped_model)
     tracer = LangChainTracer(
@@ -1085,11 +1120,11 @@ def _run_on_examples(
         wrapped_model, examples, evaluation, data_type
     )
     examples = _validate_example_inputs(examples, wrapped_model, input_mapper)
-    evalution_handler = EvaluatorCallbackHandler(
+    evaluation_handler = EvaluatorCallbackHandler(
         evaluators=run_evaluators or [],
         client=client,
     )
-    callbacks: List[BaseCallbackHandler] = [tracer, evalution_handler]
+    callbacks: List[BaseCallbackHandler] = [tracer, evaluation_handler]
     for i, example in enumerate(examples):
         result = _run_llm_or_chain(
             example,
@@ -1100,9 +1135,14 @@ def _run_on_examples(
         )
         if verbose:
             print(f"{i+1} processed", flush=True, end="\r")
-        results[str(example.id)] = result
+        results[str(example.id)] = {"output": result}
     tracer.wait_for_futures()
-    evalution_handler.wait_for_futures()
+    evaluation_handler.wait_for_futures()
+    all_feedback = evaluation_handler.logged_feedback
+    # join the results and feedback on the example id
+    for example_id, output_dict in results.items():
+        feedback = all_feedback.get(example_id, [])
+        output_dict["feedback"] = feedback
     return results
 
 
@@ -1276,10 +1316,10 @@ def _evaluate_strings(self, prediction, reference=None, input=None, **kwargs) ->
         input_mapper=input_mapper,
         data_type=dataset.data_type,
     )
-    return {
-        "project_name": project_name,
-        "results": results,
-    }
+    return TestResult(
+        project_name=project_name,
+        results=results,
+    )
 
 
 def _handle_coroutine(coro: Coroutine) -> Any:
@@ -1461,7 +1501,7 @@ def _evaluate_strings(self, prediction, reference=None, input=None, **kwargs) ->
             data_type=dataset.data_type,
         )
         results = _handle_coroutine(coro)
-    return {
-        "project_name": project_name,
-        "results": results,
-    }
+    return TestResult(
+        project_name=project_name,
+        results=results,
+    )
diff --git a/libs/langchain/langchain/vectorstores/qdrant.py b/libs/langchain/langchain/vectorstores/qdrant.py
index cdc5bea8efbd5..0be0766f31143 100644
--- a/libs/langchain/langchain/vectorstores/qdrant.py
+++ b/libs/langchain/langchain/vectorstores/qdrant.py
@@ -1298,7 +1298,7 @@ def from_texts(
                 embeddings = OpenAIEmbeddings()
                 qdrant = Qdrant.from_texts(texts, embeddings, "localhost")
         """
-        qdrant = cls._construct_instance(
+        qdrant = cls.construct_instance(
             texts,
             embedding,
             location,
@@ -1474,7 +1474,7 @@ async def afrom_texts(
                 embeddings = OpenAIEmbeddings()
                 qdrant = await Qdrant.afrom_texts(texts, embeddings, "localhost")
         """
-        qdrant = await cls._aconstruct_instance(
+        qdrant = await cls.aconstruct_instance(
             texts,
             embedding,
             location,
@@ -1510,7 +1510,7 @@ async def afrom_texts(
         return qdrant
 
     @classmethod
-    def _construct_instance(
+    def construct_instance(
         cls: Type[Qdrant],
         texts: List[str],
         embedding: Embeddings,
@@ -1676,7 +1676,7 @@ def _construct_instance(
         return qdrant
 
     @classmethod
-    async def _aconstruct_instance(
+    async def aconstruct_instance(
         cls: Type[Qdrant],
         texts: List[str],
         embedding: Embeddings,
diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml
index 3a5f392a9a4cc..795b5c4a53869 100644
--- a/libs/langchain/pyproject.toml
+++ b/libs/langchain/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain"
-version = "0.0.274"
+version = "0.0.275"
 description = "Building applications with LLMs through composability"
 authors = []
 license = "MIT"
diff --git a/libs/langchain/tests/integration_tests/graphs/test_falkordb.py b/libs/langchain/tests/integration_tests/graphs/test_falkordb.py
new file mode 100644
index 0000000000000..de6c77a49317b
--- /dev/null
+++ b/libs/langchain/tests/integration_tests/graphs/test_falkordb.py
@@ -0,0 +1,34 @@
+import unittest
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+from langchain.graphs import FalkorDBGraph
+
+
+class TestFalkorDB(unittest.TestCase):
+    def setUp(self) -> None:
+        self.host = "localhost"
+        self.graph = "test_falkordb"
+        self.port = 6379
+
+    @patch("redis.Redis")
+    def test_init(self, mock_client: Any) -> None:
+        mock_client.return_value = MagicMock()
+        FalkorDBGraph(database=self.graph, host=self.host, port=self.port)
+
+    @patch("redis.Redis")
+    def test_execute(self, mock_client: Any) -> None:
+        mock_client.return_value = MagicMock()
+        graph = FalkorDBGraph(database=self.graph, host=self.host, port=self.port)
+
+        query = "RETURN 1"
+        result = graph.query(query)
+        self.assertIsInstance(result, MagicMock)
+
+    @patch("redis.Redis")
+    def test_refresh_schema(self, mock_client: Any) -> None:
+        mock_client.return_value = MagicMock()
+        graph = FalkorDBGraph(database=self.graph, host=self.host, port=self.port)
+
+        graph.refresh_schema()
+        self.assertNotEqual(graph.get_schema, "")
diff --git a/libs/langchain/tests/unit_tests/smith/evaluation/test_runner_utils.py b/libs/langchain/tests/unit_tests/smith/evaluation/test_runner_utils.py
index 5c34f9032faf1..914958031dcf1 100644
--- a/libs/langchain/tests/unit_tests/smith/evaluation/test_runner_utils.py
+++ b/libs/langchain/tests/unit_tests/smith/evaluation/test_runner_utils.py
@@ -182,14 +182,12 @@ def input_mapper(inputs: dict) -> dict:
         return {"the right input": inputs["the wrong input"]}
 
     result = _run_llm_or_chain(example, lambda: mock_chain, input_mapper=input_mapper)
-    assert len(result) == 1
-    assert result[0] == {"output": "2", "the right input": "1"}
+    assert result == {"output": "2", "the right input": "1"}
     bad_result = _run_llm_or_chain(
         example,
         lambda: mock_chain,
     )
-    assert len(bad_result) == 1
-    assert "Error" in bad_result[0]
+    assert "Error" in bad_result
 
     # Try with LLM
     def llm_input_mapper(inputs: dict) -> str:
@@ -197,9 +195,7 @@ def llm_input_mapper(inputs: dict) -> str:
         return "the right input"
 
     mock_llm = FakeLLM(queries={"the right input": "somenumber"})
-    result = _run_llm_or_chain(example, mock_llm, input_mapper=llm_input_mapper)
-    assert len(result) == 1
-    llm_result = result[0]
+    llm_result = _run_llm_or_chain(example, mock_llm, input_mapper=llm_input_mapper)
     assert isinstance(llm_result, str)
     assert llm_result == "somenumber"
 
@@ -300,8 +296,8 @@ async def mock_arun_chain(
         tags: Optional[List[str]] = None,
         callbacks: Optional[Any] = None,
         **kwargs: Any,
-    ) -> List[Dict[str, Any]]:
-        return [{"result": f"Result for example {example.id}"}]
+    ) -> Dict[str, Any]:
+        return {"result": f"Result for example {example.id}"}
 
     def mock_create_project(*args: Any, **kwargs: Any) -> Any:
         proj = mock.MagicMock()
@@ -328,9 +324,10 @@ def mock_create_project(*args: Any, **kwargs: Any) -> Any:
         )
 
         expected = {
-            uuid_: [
-                {"result": f"Result for example {uuid.UUID(uuid_)}"} for _ in range(1)
-            ]
+            uuid_: {
+                "output": {"result": f"Result for example {uuid.UUID(uuid_)}"},
+                "feedback": [],
+            }
             for uuid_ in uuids
         }
         assert results["results"] == expected