updated Makefile (#153 )

Cleaned up `makefile`
Update evaluators (#157 )
2026-07-01 22:34:02 -04:00 · 2023-12-20 09:24:06 -05:00 · 2023-12-19 17:30:24 -05:00 · 2023-12-19 13:39:08 -08:00 · 2023-12-19 15:01:38 -05:00 · 2023-12-19 14:57:37 -05:00
159 changed files with 13379 additions and 874 deletions
@@ -114,7 +114,7 @@ jobs:
        shell: bash
        run: |
          echo "Attempting to build docs..."
-          make build_docs
+          make docs_build
  test_datasets:
    timeout-minutes: 5
    runs-on: ubuntu-latest
@@ -34,7 +34,7 @@ jobs:
      - name: Sphinx build
        shell: bash
        run: |
-          make build_docs
+          make docs_build
      - name: Publish Docs
        uses: peaceiris/actions-gh-pages@v3
        with:
@@ -3,32 +3,7 @@
 # Default target executed when no arguments are given to make.
 all: help

-######################
-# TESTING AND COVERAGE
-######################
-
-# Define a variable for the test file path.
-TEST_FILE ?= tests/unit_tests/
-
-test:
-	poetry run pytest --disable-socket --allow-unix-socket $(TEST_FILE)
-
-test_watch:
-	poetry run ptw . -- $(TEST_FILE)
-
-build_docs:
-	# Copy README.md to docs/index.md
-	cp README.md ./docs/source/index.md
-	# Append to the table of contents the contents of the file
-	cat ./docs/source/toc.segment >> ./docs/source/index.md
-	poetry run sphinx-build "./docs/source" "./docs/build"
-
-clean_docs:
-	rm -rf ./docs/build
-
-######################
-# LINTING AND FORMATTING
-######################
+# LINTING AND FORMATTING:

 # Define a variable for Python and notebook files.
 lint format: PYTHON_FILES=.
@@ -48,19 +23,45 @@ spell_check:
 spell_fix:
 	poetry run codespell --toml pyproject.toml -w

-######################
-# HELP
-######################

+# TESTING AND COVERAGE:
+
+# Define a variable for the test file path.
+TEST_FILE ?= tests/unit_tests/
+
+test:
+	poetry run pytest --disable-socket --allow-unix-socket $(TEST_FILE)
+
+test_watch:
+	poetry run ptw . -- $(TEST_FILE)
+
+
+# DOCUMENTATION:
+
+docs_clean:
+	rm -rf ./docs/build
+
+docs_build:
+	# Copy README.md to docs/index.md
+	cp README.md ./docs/source/index.md
+	# Append to the table of contents the contents of the file
+	cat ./docs/source/toc.segment >> ./docs/source/index.md
+	poetry run sphinx-build "./docs/source" "./docs/build"
+
+
+# HELP:
 help:
-	@echo '===================='
-	@echo '-- LINTING --'
-	@echo 'format                       - run code formatters'
-	@echo 'lint                         - run linters'
-	@echo 'spell_check                 	- run codespell on the project'
-	@echo 'spell_fix                		- run codespell on the project and fix the errors'
-	@echo '-- TESTS --'
-	@echo 'coverage                     - run unit tests and generate coverage report'
-	@echo 'test                         - run unit tests'
-	@echo 'test TEST_FILE=<test_file>   - run all tests in file'
-	@echo '-- DOCUMENTATION tasks are from the top-level Makefile --'
+	@echo ''
+	@echo 'LINTING:'
+	@echo '  format             - run code formatters'
+	@echo '  lint               - run linters'
+	@echo '  spell_check        - run codespell'
+	@echo '  spell_fix          - run codespell and fix the errors'
+	@echo 'TESTS:'
+	@echo '  test               - run unit tests'
+	@echo '  test TEST_FILE=<test_file>   - run tests in <test_file>'
+	@echo '  coverage           - run unit tests and generate coverage report'
+	@echo 'DOCUMENTATION:'
+	@echo '  docs_clean         - delete the docs/build directory'
+	@echo '  docs_build         - build the documentation'
+	@echo ''
@@ -1,6 +1,4 @@
-🚧 Under Active Development 🚧
-
-# 🦜💪 LangChain Benchmarks
+# 🦜💯 LangChain Benchmarks

 [![Release Notes](https://img.shields.io/github/release/langchain-ai/langchain-benchmarks)](https://github.com/langchain-ai/langchain-benchmarks/releases)
 [![CI](https://github.com/langchain-ai/langchain-benchmarks/actions/workflows/ci.yml/badge.svg)](https://github.com/langchain-ai/langchain-benchmarks/actions/workflows/ci.yml)
@@ -35,7 +33,7 @@ pip install -U langchain-benchmarks
 All the benchmarks come with an associated benchmark dataset stored in [LangSmith](https://smith.langchain.com). To take advantage of the eval and debugging experience, [sign up](https://smith.langchain.com), and set your API key in your environment:

 ```bash
-export LANGCHAIN_API_KEY=sk-...
+export LANGCHAIN_API_KEY=ls-...
 ```

 ## Repo Structure
@@ -1,8 +1,7 @@
 from chat_langchain.chain import chain
 from fastapi import FastAPI
-from openai_functions_agent import agent_executor as openai_functions_agent_chain
-
 from langserve import add_routes
+from openai_functions_agent import agent_executor as openai_functions_agent_chain

 app = FastAPI()

@@ -1 +1,3 @@
 chromadb/
+index.md
+Untitled.ipynb
@@ -1,225 +1,226 @@
 {
-    "cells": [
-        {
-            "cell_type": "markdown",
-            "id": "033684fb-65b2-4586-a959-68c614741ca2",
-            "metadata": {},
-            "source": [
-                "# Datasets\n",
-                "[![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain-benchmarks/blob/main/docs/source/notebooks/datasets.ipynb)\n",
-                "\n",
-                "Here, we'll see how to work with LangSmith datasets."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "%pip install -U langchain-benchmarks"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 1,
-            "id": "6d272fbf-710e-4a49-a0da-67e010541905",
-            "metadata": {
-                "tags": []
-            },
-            "outputs": [],
-            "source": [
-                "from langchain_benchmarks import clone_public_dataset, download_public_dataset"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "18ee0f96-e5c4-4ae9-aebf-7d8b88c51662",
-            "metadata": {},
-            "source": [
-                "Let's first download the dataset to the local file system"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 2,
-            "id": "58b94f6d-0c91-4361-9b22-f758ffaa150a",
-            "metadata": {
-                "tags": []
-            },
-            "outputs": [
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "Fetching examples...\n"
-                    ]
-                },
-                {
-                    "data": {
-                        "application/vnd.jupyter.widget-view+json": {
-                            "model_id": "5a2fad8c0c3549ec96a3b38fe8a002b0",
-                            "version_major": 2,
-                            "version_minor": 0
-                        },
-                        "text/plain": [
-                            "  0%|          | 0/21 [00:00<?, ?it/s]"
-                        ]
-                    },
-                    "metadata": {},
-                    "output_type": "display_data"
-                },
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "Done fetching examples.\n"
-                    ]
-                }
-            ],
-            "source": [
-                "download_public_dataset(\n",
-                "    \"https://smith.langchain.com/public/452ccafc-18e1-4314-885b-edd735f17b9d/examples\"\n",
-                ")"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "841db832-b0d3-4fd1-8531-1154ec9b3caa",
-            "metadata": {},
-            "source": [
-                "we can take a look at the first two examples"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 3,
-            "id": "664e90fc-af84-4c5f-a3dd-5d9ffe649650",
-            "metadata": {
-                "tags": []
-            },
-            "outputs": [
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "[\n",
-                        "  {\n",
-                        "    \"created_at\": \"2023-11-15T15:26:53.511629\",\n",
-                        "    \"dataset_id\": \"9f73165c-d333-4d14-8f59-bd7eede5db08\",\n",
-                        "    \"id\": \"0703a989-2693-4039-a1f6-7281fc1b4cb0\",\n",
-                        "    \"inputs\": {\n",
-                        "      \"question\": \"do bob and alice live in the same city?\"\n",
-                        "    },\n",
-                        "    \"modified_at\": \"2023-11-15T15:26:53.511629\",\n",
-                        "    \"outputs\": {\n",
-                        "      \"expected_steps\": [\n",
-                        "        \"find_users_by_name\",\n",
-                        "        \"get_user_location\",\n",
-                        "        \"get_city_for_location\",\n",
-                        "        \"get_user_location\",\n",
-                        "        \"get_city_for_location\"\n",
-                        "      ],\n",
-                        "      \"order_matters\": false,\n",
-                        "      \"reference\": \"no\"\n",
-                        "    },\n",
-                        "    \"runs\": []\n",
-                        "  },\n",
-                        "  {\n",
-                        "    \"created_at\": \"2023-11-15T15:26:53.491359\",\n",
-                        "    \"dataset_id\": \"9f73165c-d333-4d14-8f59-bd7eede5db08\",\n",
-                        "    \"id\": \"b258b95a-9524-4da7-b758-c5481109322d\",\n",
-                        "    \"inputs\": {\n",
-                        "      \"question\": \"Is it likely that Donna is outside with an umbrella at this time?\"\n",
-                        "    },\n",
-                        "    \"modified_at\": \"2023-11-15T15:26:53.491359\",\n",
-                        "    \"outputs\": {\n",
-                        "      \"expected_steps\": [\n",
-                        "        \"find_users_by_name\",\n",
-                        "        \"get_user_location\",\n",
-                        "        \"get_current_time_for_location\",\n",
-                        "        \"get_current_weather_for_location\"\n",
-                        "      ],\n",
-                        "      \"order_matters\": false,\n",
-                        "      \"reference\": \"yes\"\n",
-                        "    },\n",
-                        "    \"runs\": []\n",
-                        "  }\n",
-                        "]\n"
-                    ]
-                }
-            ],
-            "source": [
-                "import json\n",
-                "\n",
-                "with open(\"./e95d45da-aaa3-44b3-ba2b-7c15ff6e46f5.json\", \"r\", encoding=\"utf-8\") as f:\n",
-                "    print(json.dumps(json.load(f)[:2], indent=2, sort_keys=True))"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "2c6cf01f-466b-406d-b4c7-2395747780fd",
-            "metadata": {},
-            "source": [
-                "We can also clone the dataset to our local tenant"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "e4dea4df-2f1c-436b-a71c-49ffb2295ccc",
-            "metadata": {},
-            "source": [
-                "Executing this command will clone the dataset to your own LangSmith tenant. \n",
-                "For this to work you must have a [LangSmith account](https://smith.langchain.com/) set up."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "import os\n",
-                "\n",
-                "# Get from https://smith.langchain.com/settings\n",
-                "os.environ[\"LANGCHAIN_API_KEY\"] = \"ls_...\""
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "id": "18d0b905-2a6a-4752-a7cb-8653bd9049e3",
-            "metadata": {
-                "tags": []
-            },
-            "outputs": [],
-            "source": [
-                "clone_public_dataset(\n",
-                "    \"https://smith.langchain.com/public/452ccafc-18e1-4314-885b-edd735f17b9d/examples\",\n",
-                "    dataset_name=\"Agent Dataset\",\n",
-                ")"
-            ]
-        }
-    ],
-    "metadata": {
-        "kernelspec": {
-            "display_name": "Python 3 (ipykernel)",
-            "language": "python",
-            "name": "python3"
-        },
-        "language_info": {
-            "codemirror_mode": {
-                "name": "ipython",
-                "version": 3
-            },
-            "file_extension": ".py",
-            "mimetype": "text/x-python",
-            "name": "python",
-            "nbconvert_exporter": "python",
-            "pygments_lexer": "ipython3",
-            "version": "3.11.2"
-        }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "033684fb-65b2-4586-a959-68c614741ca2",
+   "metadata": {},
+   "source": [
+    "# Datasets\n",
+    "\n",
+    "Here, we'll see how to work with LangSmith datasets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "474292e6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -U langchain-benchmarks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6d272fbf-710e-4a49-a0da-67e010541905",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain_benchmarks import clone_public_dataset, download_public_dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "18ee0f96-e5c4-4ae9-aebf-7d8b88c51662",
+   "metadata": {},
+   "source": [
+    "Let's first download the dataset to the local file system"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "58b94f6d-0c91-4361-9b22-f758ffaa150a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fetching examples...\n"
+     ]
    },
-    "nbformat": 4,
-    "nbformat_minor": 5
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5a2fad8c0c3549ec96a3b38fe8a002b0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/21 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done fetching examples.\n"
+     ]
+    }
+   ],
+   "source": [
+    "download_public_dataset(\n",
+    "    \"https://smith.langchain.com/public/452ccafc-18e1-4314-885b-edd735f17b9d/examples\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "841db832-b0d3-4fd1-8531-1154ec9b3caa",
+   "metadata": {},
+   "source": [
+    "we can take a look at the first two examples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "664e90fc-af84-4c5f-a3dd-5d9ffe649650",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[\n",
+      "  {\n",
+      "    \"created_at\": \"2023-11-15T15:26:53.511629\",\n",
+      "    \"dataset_id\": \"9f73165c-d333-4d14-8f59-bd7eede5db08\",\n",
+      "    \"id\": \"0703a989-2693-4039-a1f6-7281fc1b4cb0\",\n",
+      "    \"inputs\": {\n",
+      "      \"question\": \"do bob and alice live in the same city?\"\n",
+      "    },\n",
+      "    \"modified_at\": \"2023-11-15T15:26:53.511629\",\n",
+      "    \"outputs\": {\n",
+      "      \"expected_steps\": [\n",
+      "        \"find_users_by_name\",\n",
+      "        \"get_user_location\",\n",
+      "        \"get_city_for_location\",\n",
+      "        \"get_user_location\",\n",
+      "        \"get_city_for_location\"\n",
+      "      ],\n",
+      "      \"order_matters\": false,\n",
+      "      \"reference\": \"no\"\n",
+      "    },\n",
+      "    \"runs\": []\n",
+      "  },\n",
+      "  {\n",
+      "    \"created_at\": \"2023-11-15T15:26:53.491359\",\n",
+      "    \"dataset_id\": \"9f73165c-d333-4d14-8f59-bd7eede5db08\",\n",
+      "    \"id\": \"b258b95a-9524-4da7-b758-c5481109322d\",\n",
+      "    \"inputs\": {\n",
+      "      \"question\": \"Is it likely that Donna is outside with an umbrella at this time?\"\n",
+      "    },\n",
+      "    \"modified_at\": \"2023-11-15T15:26:53.491359\",\n",
+      "    \"outputs\": {\n",
+      "      \"expected_steps\": [\n",
+      "        \"find_users_by_name\",\n",
+      "        \"get_user_location\",\n",
+      "        \"get_current_time_for_location\",\n",
+      "        \"get_current_weather_for_location\"\n",
+      "      ],\n",
+      "      \"order_matters\": false,\n",
+      "      \"reference\": \"yes\"\n",
+      "    },\n",
+      "    \"runs\": []\n",
+      "  }\n",
+      "]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "\n",
+    "with open(\"./e95d45da-aaa3-44b3-ba2b-7c15ff6e46f5.json\", \"r\", encoding=\"utf-8\") as f:\n",
+    "    print(json.dumps(json.load(f)[:2], indent=2, sort_keys=True))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2c6cf01f-466b-406d-b4c7-2395747780fd",
+   "metadata": {},
+   "source": [
+    "We can also clone the dataset to our local tenant"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e4dea4df-2f1c-436b-a71c-49ffb2295ccc",
+   "metadata": {},
+   "source": [
+    "Executing this command will clone the dataset to your own LangSmith tenant. \n",
+    "For this to work you must have a [LangSmith account](https://smith.langchain.com/) set up."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7eb38ea6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "# Get from https://smith.langchain.com/settings\n",
+    "os.environ[\"LANGCHAIN_API_KEY\"] = \"ls_...\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18d0b905-2a6a-4752-a7cb-8653bd9049e3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "clone_public_dataset(\n",
+    "    \"https://smith.langchain.com/public/452ccafc-18e1-4314-885b-edd735f17b9d/examples\",\n",
+    "    dataset_name=\"Agent Dataset\",\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
 }
@@ -688,8 +688,6 @@
   },
   "outputs": [],
   "source": [
-    "import pandas as pd\n",
-    "\n",
    "df = test_run.to_dataframe().join(claude_test_run.to_dataframe(), rsuffix=\"_claude\")"
   ]
  },
@@ -14,7 +14,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 1,
   "id": "86912590-a90a-4351-8ab4-89192cdee1e7",
   "metadata": {},
   "outputs": [
@@ -26,19 +26,24 @@
       "<tr><th>Name            </th><th>Type          </th><th>Dataset ID                                                                                                                                                 </th><th>Description  </th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
-       "<tr><td>Email Extraction</td><td>ExtractionTask</td><td><a href=\"https://smith.langchain.com/public/36bdfe7d-3cd1-4b36-b957-d12d95810a2b/d\" target=\"_blank\" rel=\"noopener\">36bdfe7d-3cd1-4b36-b957-d12d95810a2b</a></td><td>A dataset of 42 real emails deduped from a spam folder, with semantic HTML tags removed, as well as a script for initial extraction and formatting of other emails from an arbitrary .mbox file like the one exported by Gmail.\n",
+       "<tr><td>Email Extraction</td><td>ExtractionTask</td><td><a href=\"https://smith.langchain.com/public/a1742786-bde5-4f51-a1d8-e148e5251ddb/d\" target=\"_blank\" rel=\"noopener\">a1742786-bde5-4f51-a1d8-e148e5251ddb</a></td><td>A dataset of 42 real emails deduped from a spam folder, with semantic HTML tags removed, as well as a script for initial extraction and formatting of other emails from an arbitrary .mbox file like the one exported by Gmail.\n",
       "\n",
       "Some additional cleanup of the data was done by hand after the initial pass.\n",
       "\n",
       "See https://github.com/jacoblee93/oss-model-extraction-evals.              </td></tr>\n",
+       "<tr><td>Chat Extraction </td><td>ExtractionTask</td><td><a href=\"https://smith.langchain.com/public/00f4444c-9460-4a82-b87a-f50096f1cfef/d\" target=\"_blank\" rel=\"noopener\">00f4444c-9460-4a82-b87a-f50096f1cfef</a></td><td>A dataset meant to test the ability of an LLM to extract and infer\n",
+       "structured information from a dialogue. The dialogue is between a user and a support\n",
+       "engineer. Outputs should be structured as a JSON object and test both the ability\n",
+       "of the LLM to correctly structure the information and its ability to perform simple \n",
+       "classification tasks.              </td></tr>\n",
       "</tbody>\n",
       "</table>"
      ],
      "text/plain": [
-       "Registry(tasks=[ExtractionTask(name='Email Extraction', dataset_id='https://smith.langchain.com/public/36bdfe7d-3cd1-4b36-b957-d12d95810a2b/d', description='A dataset of 42 real emails deduped from a spam folder, with semantic HTML tags removed, as well as a script for initial extraction and formatting of other emails from an arbitrary .mbox file like the one exported by Gmail.\\n\\nSome additional cleanup of the data was done by hand after the initial pass.\\n\\nSee https://github.com/jacoblee93/oss-model-extraction-evals.\\n    ', schema=<class 'langchain_benchmarks.extraction.tasks.email_task.Email'>, instructions=ChatPromptTemplate(input_variables=['email'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are an expert researcher.')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['email'], template='What can you tell me about the following email? Make sure to extract the question in the correct format. Here is the email:\\n ```\\n{email}\\n```'))]))])"
+       "Registry(tasks=[ExtractionTask(name='Email Extraction', dataset_id='https://smith.langchain.com/public/a1742786-bde5-4f51-a1d8-e148e5251ddb/d', description='A dataset of 42 real emails deduped from a spam folder, with semantic HTML tags removed, as well as a script for initial extraction and formatting of other emails from an arbitrary .mbox file like the one exported by Gmail.\\n\\nSome additional cleanup of the data was done by hand after the initial pass.\\n\\nSee https://github.com/jacoblee93/oss-model-extraction-evals.\\n    ', schema=<class 'langchain_benchmarks.extraction.tasks.email_task.Email'>, instructions=ChatPromptTemplate(input_variables=['input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are an expert researcher.')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='What can you tell me about the following email? Make sure to extract the question in the correct format. Here is the email:\\n ```\\n{input}\\n```'))])), ExtractionTask(name='Chat Extraction', dataset_id='https://smith.langchain.com/public/00f4444c-9460-4a82-b87a-f50096f1cfef/d', description='A dataset meant to test the ability of an LLM to extract and infer\\nstructured information from a dialogue. The dialogue is between a user and a support\\nengineer. Outputs should be structured as a JSON object and test both the ability\\nof the LLM to correctly structure the information and its ability to perform simple \\nclassification tasks.', schema=<class 'langchain_benchmarks.extraction.tasks.chat_extraction.schema.GenerateTicket'>, instructions=ChatPromptTemplate(input_variables=['dialogue'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpdesk assistant responsible with extracting information and generating tickets. Dialogues are between a user and a support engineer.')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['dialogue'], template='Generate a ticket for the following question-response pair:\\n<Dialogue>\\n{dialogue}\\n</Dialogue>'))]))])"
      ]
     },
-     "execution_count": 4,
+     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -85,9 +90,11 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 2,
   "id": "9c7865bd-8251-4579-85a3-f9085d96f497",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
    "from langchain.chat_models import ChatOpenAI\n",
@@ -115,7 +122,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.6"
+   "version": "3.11.2"
  }
 },
 "nbformat": 4,
@@ -286,7 +286,7 @@
    ")\n",
    "\n",
    "vectorstore = Chroma(\n",
-    "    collection_name=f\"lcbm-b-huggingface-gte-base\",\n",
+    "    collection_name=\"lcbm-b-huggingface-gte-base\",\n",
    "    embedding_function=embeddings,\n",
    "    persist_directory=\"./chromadb\",\n",
    ")\n",
@@ -0,0 +1,610 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "9fa3470d-9448-4792-9f65-6978fc58cf84",
+   "metadata": {},
+   "source": [
+    "# Multi-modal eval: Baseline\n",
+    "\n",
+    "`Multi-modal slide decks` is a public dataset that contains a dataset of question-answer pairs from slide decks with visual content.\n",
+    "\n",
+    "The question-answer pairs are derived from the visual content in the decks, testing the ability of RAG to perform visual reasoning.\n",
+    "\n",
+    "As a baseline, we evaluate this dataset using text-based RAG pipeline, below.\n",
+    "\n",
+    "This will not reason about visual content and will simply load the text from the slides. \n",
+    "\n",
+    "## Pre-requisites"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "47220461-d4e9-4f1d-9c57-672ca947ca0d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# %pip install -U langchain langsmith langchain_benchmarks\n",
+    "# %pip install --quiet chromadb openai pypdf pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "196de967-6de6-40da-aa75-e836923ab5e3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import getpass\n",
+    "import os\n",
+    "\n",
+    "os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n",
+    "env_vars = [\"LANGCHAIN_API_KEY\", \"OPENAI_API_KEY\"]\n",
+    "for var in env_vars:\n",
+    "    if var not in os.environ:\n",
+    "        os.environ[var] = getpass.getpass(prompt=f\"Enter your {var}: \")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "10da8e11-6288-4131-bd60-d5aa86928acc",
+   "metadata": {},
+   "source": [
+    "## Dataset\n",
+    "\n",
+    "We can browse the available LangChain benchmark datasets for retrieval."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "2ff97905-14a6-413c-99be-58b7a9c8d4c1",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "<thead>\n",
+       "<tr><th>Name                   </th><th>Type         </th><th>Dataset ID                                                                                                                                                 </th><th>Description  </th></tr>\n",
+       "</thead>\n",
+       "<tbody>\n",
+       "<tr><td>LangChain Docs Q&A     </td><td>RetrievalTask</td><td><a href=\"https://smith.langchain.com/public/452ccafc-18e1-4314-885b-edd735f17b9d/d\" target=\"_blank\" rel=\"noopener\">452ccafc-18e1-4314-885b-edd735f17b9d</a></td><td>Questions and answers based on a snapshot of the LangChain python docs.\n",
+       "\n",
+       "The environment provides the documents and the retriever information.\n",
+       "\n",
+       "Each example is composed of a question and reference answer.\n",
+       "\n",
+       "Success is measured based on the accuracy of the answer relative to the reference answer.\n",
+       "We also measure the faithfulness of the model's response relative to the retrieved documents (if any).              </td></tr>\n",
+       "<tr><td>Semi-structured Reports</td><td>RetrievalTask</td><td><a href=\"https://smith.langchain.com/public/c47d9617-ab99-4d6e-a6e6-92b8daf85a7d/d\" target=\"_blank\" rel=\"noopener\">c47d9617-ab99-4d6e-a6e6-92b8daf85a7d</a></td><td>Questions and answers based on PDFs containing tables and charts.\n",
+       "\n",
+       "The task provides the raw documents as well as factory methods to easily index them\n",
+       "and create a retriever.\n",
+       "\n",
+       "Each example is composed of a question and reference answer.\n",
+       "\n",
+       "Success is measured based on the accuracy of the answer relative to the reference answer.\n",
+       "We also measure the faithfulness of the model's response relative to the retrieved documents (if any).              </td></tr>\n",
+       "<tr><td>Multi-modal slide decks</td><td>RetrievalTask</td><td><a href=\"https://smith.langchain.com/public/40afc8e7-9d7e-44ed-8971-2cae1eb59731/d\" target=\"_blank\" rel=\"noopener\">40afc8e7-9d7e-44ed-8971-2cae1eb59731</a></td><td>This public dataset is a work-in-progress and will be extended over time.\n",
+       "        \n",
+       "Questions and answers based on slide decks containing visual tables and charts.\n",
+       "\n",
+       "Each example is composed of a question and reference answer.\n",
+       "\n",
+       "Success is measured based on the accuracy of the answer relative to the reference answer.              </td></tr>\n",
+       "</tbody>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "Registry(tasks=[RetrievalTask(name='LangChain Docs Q&A', dataset_id='https://smith.langchain.com/public/452ccafc-18e1-4314-885b-edd735f17b9d/d', description=\"Questions and answers based on a snapshot of the LangChain python docs.\\n\\nThe environment provides the documents and the retriever information.\\n\\nEach example is composed of a question and reference answer.\\n\\nSuccess is measured based on the accuracy of the answer relative to the reference answer.\\nWe also measure the faithfulness of the model's response relative to the retrieved documents (if any).\\n\", get_docs=<function load_cached_docs at 0x104485800>, retriever_factories={'basic': <function _chroma_retriever_factory at 0x1360289a0>, 'parent-doc': <function _chroma_parent_document_retriever_factory at 0x136028a40>, 'hyde': <function _chroma_hyde_retriever_factory at 0x136028ae0>}, architecture_factories={'conversational-retrieval-qa': <function default_response_chain at 0x126ba2660>}), RetrievalTask(name='Semi-structured Reports', dataset_id='https://smith.langchain.com/public/c47d9617-ab99-4d6e-a6e6-92b8daf85a7d/d', description=\"Questions and answers based on PDFs containing tables and charts.\\n\\nThe task provides the raw documents as well as factory methods to easily index them\\nand create a retriever.\\n\\nEach example is composed of a question and reference answer.\\n\\nSuccess is measured based on the accuracy of the answer relative to the reference answer.\\nWe also measure the faithfulness of the model's response relative to the retrieved documents (if any).\\n\", get_docs=<function load_docs at 0x136029620>, retriever_factories={'basic': <function _chroma_retriever_factory at 0x1360296c0>, 'parent-doc': <function _chroma_parent_document_retriever_factory at 0x136029760>, 'hyde': <function _chroma_hyde_retriever_factory at 0x136029800>}, architecture_factories={}), RetrievalTask(name='Multi-modal slide decks', dataset_id='https://smith.langchain.com/public/40afc8e7-9d7e-44ed-8971-2cae1eb59731/d', description='This public dataset is a work-in-progress and will be extended over time.\\n        \\nQuestions and answers based on slide decks containing visual tables and charts.\\n\\nEach example is composed of a question and reference answer.\\n\\nSuccess is measured based on the accuracy of the answer relative to the reference answer.\\n', get_docs={}, retriever_factories={}, architecture_factories={})])"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_benchmarks import clone_public_dataset, registry\n",
+    "\n",
+    "registry = registry.filter(Type=\"RetrievalTask\")\n",
+    "registry"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2fb7dc3d-28f1-4c28-b0d0-3784d04b81ce",
+   "metadata": {},
+   "source": [
+    "`Multi-modal slide decks` is the relevant dataset for our task."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "219a4141-4a5f-48e4-ae05-5a824e2193fd",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "<tbody>\n",
+       "<tr><td>Name                  </td><td>Multi-modal slide decks                                                                                                                                    </td></tr>\n",
+       "<tr><td>Type                  </td><td>RetrievalTask                                                                                                                                              </td></tr>\n",
+       "<tr><td>Dataset ID            </td><td><a href=\"https://smith.langchain.com/public/40afc8e7-9d7e-44ed-8971-2cae1eb59731/d\" target=\"_blank\" rel=\"noopener\">40afc8e7-9d7e-44ed-8971-2cae1eb59731</a></td></tr>\n",
+       "<tr><td>Description           </td><td>This public dataset is a work-in-progress and will be extended over time.\n",
+       "        \n",
+       "Questions and answers based on slide decks containing visual tables and charts.\n",
+       "\n",
+       "Each example is composed of a question and reference answer.\n",
+       "\n",
+       "Success is measured based on the accuracy of the answer relative to the reference answer.                                                                                                                                                            </td></tr>\n",
+       "<tr><td>Retriever Factories   </td><td>                                                                                                                                                           </td></tr>\n",
+       "<tr><td>Architecture Factories</td><td>                                                                                                                                                           </td></tr>\n",
+       "<tr><td>get_docs              </td><td>{}                                                                                                                                                         </td></tr>\n",
+       "</tbody>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "RetrievalTask(name='Multi-modal slide decks', dataset_id='https://smith.langchain.com/public/40afc8e7-9d7e-44ed-8971-2cae1eb59731/d', description='This public dataset is a work-in-progress and will be extended over time.\\n        \\nQuestions and answers based on slide decks containing visual tables and charts.\\n\\nEach example is composed of a question and reference answer.\\n\\nSuccess is measured based on the accuracy of the answer relative to the reference answer.\\n', get_docs={}, retriever_factories={}, architecture_factories={})"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "task = registry[\"Multi-modal slide decks\"]\n",
+    "task"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2d6569b5-e79a-41b7-9745-c2f8a1dd704e",
+   "metadata": {},
+   "source": [
+    "Clone the dataset so that it's available in our LangSmith datasets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "d2caa086-9549-4c74-bba9-ba80d5a7b218",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dataset Multi-modal slide decks already exists. Skipping.\n",
+      "You can access the dataset at https://smith.langchain.com/o/ebbaf2eb-769b-4505-aca2-d11de10372a4/datasets/08a29acb-5ad6-42ce-a482-574c9e2e5306.\n"
+     ]
+    }
+   ],
+   "source": [
+    "clone_public_dataset(task.dataset_id, dataset_name=task.name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bf350917-a1e5-46f4-81cd-c1678ab9220f",
+   "metadata": {},
+   "source": [
+    "Fetch the associated PDFs from remote cache for the dataset so that we can perform ingestion."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "99ce6afb-2317-4bc1-9faf-4f828095ad91",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain_benchmarks.rag.tasks.multi_modal_slide_decks import get_file_names\n",
+    "\n",
+    "file_names = list(get_file_names())  # PosixPath"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "848a4cdb-6c08-4c01-81ce-16ab83a7fdff",
+   "metadata": {},
+   "source": [
+    "## Load\n",
+    "\n",
+    "Load and split the files for indexing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "6ce85810-98a7-406e-b44e-ce860ac35986",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "There are 98 text elements in DDOG_Q3_earnings_deck.pdf\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain.document_loaders import PyPDFLoader\n",
+    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+    "\n",
+    "\n",
+    "def load_and_split(file):\n",
+    "    \"\"\"\n",
+    "    Load and split PDF files\n",
+    "    :param file: PosixPath path for pdf\n",
+    "    :return: A list of text chunks\n",
+    "    \"\"\"\n",
+    "\n",
+    "    loader = PyPDFLoader(str(file))\n",
+    "    pdf_pages = loader.load()\n",
+    "\n",
+    "    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(\n",
+    "        chunk_size=100, chunk_overlap=50\n",
+    "    )\n",
+    "\n",
+    "    # Get chunks\n",
+    "    docs = text_splitter.split_documents(pdf_pages)\n",
+    "    texts = [d.page_content for d in docs]\n",
+    "    print(f\"There are {len(texts)} text elements in {file.name}\")\n",
+    "    return texts\n",
+    "\n",
+    "\n",
+    "texts = []\n",
+    "for fi in file_names:\n",
+    "    texts.extend(load_and_split(fi))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "eb01925d-b7d1-47a1-bd90-805178d3c4a9",
+   "metadata": {},
+   "source": [
+    "## Index\n",
+    "\n",
+    "Embed (OpenAIEmbeddings) and store splits in a vectorstore (Chroma)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ceb31f71-45fb-4b12-bc1c-31981de334bb",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.embeddings import OpenAIEmbeddings\n",
+    "from langchain.vectorstores import Chroma\n",
+    "\n",
+    "vectorstore_baseline = Chroma.from_texts(\n",
+    "    texts=texts, collection_name=\"baseline-multi-modal\", embedding=OpenAIEmbeddings()\n",
+    ")\n",
+    "\n",
+    "retriever_baseline = vectorstore_baseline.as_retriever()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e6dcbb01-f480-456d-b972-c732eb26c393",
+   "metadata": {},
+   "source": [
+    "## RAG\n",
+    "\n",
+    "Create a pipeline for retrieval of relevant chunks based on semantic similarity to the input question.\n",
+    "\n",
+    "Pass the images to GPT-4 for answer synthesis."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "ea233664-e527-42f1-a820-0c2271e16c20",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain.prompts import ChatPromptTemplate\n",
+    "from langchain.schema.output_parser import StrOutputParser\n",
+    "from langchain.schema.runnable import RunnablePassthrough\n",
+    "\n",
+    "\n",
+    "def rag_chain(retriever):\n",
+    "    \"\"\"\n",
+    "    RAG pipeline for the indexed presentations\n",
+    "    :param retriever: PosixPath path for pdf\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # Prompt template\n",
+    "    template = \"\"\"Answer the question based only on the following context, which can include text and tables:\n",
+    "    {context}\n",
+    "    Question: {question}\n",
+    "    \"\"\"\n",
+    "    prompt = ChatPromptTemplate.from_template(template)\n",
+    "\n",
+    "    # LLM\n",
+    "    model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
+    "\n",
+    "    # RAG pipeline\n",
+    "    chain = (\n",
+    "        {\n",
+    "            \"context\": retriever | (lambda x: \"\\n\\n\".join([i.page_content for i in x])),\n",
+    "            \"question\": RunnablePassthrough(),\n",
+    "        }\n",
+    "        | prompt\n",
+    "        | model\n",
+    "        | StrOutputParser()\n",
+    "    )\n",
+    "    return chain\n",
+    "\n",
+    "\n",
+    "# Create RAG chain\n",
+    "chain = rag_chain(retriever_baseline)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "95df1446-143d-4f4c-a15b-2a379266d8bf",
+   "metadata": {},
+   "source": [
+    "## Eval\n",
+    "\n",
+    "Run evaluation on our dataset:\n",
+    "\n",
+    "* `task.name` is the dataset of QA pairs that we cloned\n",
+    "* `eval_config` specifies the [LangSmith evaluator](https://docs.smith.langchain.com/evaluation/evaluator-implementations#correctness-qa-evaluation) for our dataset, which will use GPT-4 as a grader\n",
+    "* The grader will evaluate the chain-generated answer to each question relative to ground truth"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "479ce09d-642e-4b3b-9e4e-e9c2b7f0e9ca",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "View the evaluation results for project '866f-baseline' at:\n",
+      "https://smith.langchain.com/o/ebbaf2eb-769b-4505-aca2-d11de10372a4/datasets/08a29acb-5ad6-42ce-a482-574c9e2e5306/compare?selectedSessions=30199d47-50d7-4c5c-a55a-e74157e05951\n",
+      "\n",
+      "View all tests for Dataset Multi-modal slide decks at:\n",
+      "https://smith.langchain.com/o/ebbaf2eb-769b-4505-aca2-d11de10372a4/datasets/08a29acb-5ad6-42ce-a482-574c9e2e5306\n",
+      "[------------------------------------------------->] 10/10"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<h3>Experiment Results:</h3>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>output</th>\n",
+       "      <th>feedback.COT Contextual Accuracy</th>\n",
+       "      <th>error</th>\n",
+       "      <th>execution_time</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>10</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>10.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>unique</th>\n",
+       "      <td>10</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>top</th>\n",
+       "      <td>Datadog has 20 total customers.</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>freq</th>\n",
+       "      <td>1</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.200000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>4.674478</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.421637</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.864273</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.307960</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>4.113816</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>4.700962</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>5.018359</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6.188082</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                 output  feedback.COT Contextual Accuracy  \\\n",
+       "count                                10                         10.000000   \n",
+       "unique                               10                               NaN   \n",
+       "top     Datadog has 20 total customers.                               NaN   \n",
+       "freq                                  1                               NaN   \n",
+       "mean                                NaN                          0.200000   \n",
+       "std                                 NaN                          0.421637   \n",
+       "min                                 NaN                          0.000000   \n",
+       "25%                                 NaN                          0.000000   \n",
+       "50%                                 NaN                          0.000000   \n",
+       "75%                                 NaN                          0.000000   \n",
+       "max                                 NaN                          1.000000   \n",
+       "\n",
+       "       error  execution_time  \n",
+       "count      0       10.000000  \n",
+       "unique     0             NaN  \n",
+       "top      NaN             NaN  \n",
+       "freq     NaN             NaN  \n",
+       "mean     NaN        4.674478  \n",
+       "std      NaN        0.864273  \n",
+       "min      NaN        3.307960  \n",
+       "25%      NaN        4.113816  \n",
+       "50%      NaN        4.700962  \n",
+       "75%      NaN        5.018359  \n",
+       "max      NaN        6.188082  "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import uuid\n",
+    "\n",
+    "from langchain.smith import RunEvalConfig\n",
+    "from langsmith.client import Client\n",
+    "\n",
+    "# Evaluator configuration\n",
+    "client = Client()\n",
+    "eval_config = RunEvalConfig(\n",
+    "    evaluators=[\"cot_qa\"],\n",
+    ")\n",
+    "\n",
+    "# Experiments\n",
+    "chain_map = {\n",
+    "    \"baseline\": chain,\n",
+    "}\n",
+    "\n",
+    "# Run evaluation\n",
+    "run_id = uuid.uuid4().hex[:4]\n",
+    "test_runs = {}\n",
+    "for project_name, chain in chain_map.items():\n",
+    "    test_runs[project_name] = client.run_on_dataset(\n",
+    "        dataset_name=task.name,\n",
+    "        llm_or_chain_factory=lambda: (lambda x: x[\"Question\"]) | chain,\n",
+    "        evaluation=eval_config,\n",
+    "        verbose=True,\n",
+    "        project_name=f\"{run_id}-{project_name}\",\n",
+    "        project_metadata={\"chain\": project_name},\n",
+    "    )"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -412,8 +412,6 @@
    }
   ],
   "source": [
-    "from functools import partial\n",
-    "\n",
    "from langsmith.client import Client\n",
    "\n",
    "from langchain_benchmarks.rag import get_eval_config\n",
@@ -0,0 +1,317 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "b6856d11-40d5-48e5-9eb3-423f479933a1",
+   "metadata": {},
+   "source": [
+    "# Semi-structured eval: Chunk size tuning\n",
+    "\n",
+    "`Semi-structured Reports` is a public dataset that contains question-answer pairs from documents with text and tables.\n",
+    "\n",
+    "The question-answer pairs are derived from the tables as well as some of the paragraphs in the docs.\n",
+    "\n",
+    "We evaluation performance of various chunk sizes with RAG. \n",
+    "\n",
+    "## Pre-requisites"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c387b660-967d-4d2f-8c38-af125f7b7a8b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %pip install -U langchain langsmith langchain_benchmarks\n",
+    "# %pip install --quiet chromadb openai pypdf tiktoken fireworks-ai"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e9e332b1-7da4-47fc-8d9a-4d65fbfc6953",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import getpass\n",
+    "import os\n",
+    "\n",
+    "os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n",
+    "env_vars = [\"LANGCHAIN_API_KEY\", \"OPENAI_API_KEY\", \"FIREWORKS_API_KEY\"]\n",
+    "for var in env_vars:\n",
+    "    if var not in os.environ:\n",
+    "        os.environ[var] = getpass.getpass(prompt=f\"Enter your {var}: \")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b1a19f23-468c-4aeb-a0e9-0765a85f3f0b",
+   "metadata": {},
+   "source": [
+    "## Dataset\n",
+    "\n",
+    "Fetch the associated PDFs from remote cache for the dataset so that we can perform ingestion."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "a94d9aa5-acd8-4032-ad8f-f995dec4d13c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "from langchain_benchmarks import clone_public_dataset, registry\n",
+    "from langchain_benchmarks.rag.tasks.semi_structured_reports import get_file_names\n",
+    "\n",
+    "# Task\n",
+    "task = registry[\"Semi-structured Reports\"]\n",
+    "\n",
+    "# Files used\n",
+    "paths = list(get_file_names())\n",
+    "files = [str(p) for p in paths]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "12b52285-358c-4752-ad6b-25ffb629e309",
+   "metadata": {},
+   "source": [
+    "Clone the dataset so that it's available in our LangSmith datasets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "1ecca7af-c3e7-42d1-97dd-c7d9777207cb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dataset Semi-structured Reports already exists. Skipping.\n",
+      "You can access the dataset at https://smith.langchain.com/o/1fa8b1f4-fcb9-4072-9aa9-983e35ad61b8/datasets/6549a3a5-1cb9-463f-951d-0166cb9cf45c.\n"
+     ]
+    }
+   ],
+   "source": [
+    "clone_public_dataset(task.dataset_id, dataset_name=task.name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "64f37705-0190-4b7a-9d88-63bfd904fbd9",
+   "metadata": {},
+   "source": [
+    "## Load and index\n",
+    "\n",
+    "We load each file, split it, embed with `OpenAIEmbeddings`, and create an index with `Chroma` vectorstore."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7eb9e333-77e6-48f9-b221-9bded023b978",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import ChatFireworks, ChatOpenAI\n",
+    "from langchain.document_loaders import PyPDFLoader\n",
+    "from langchain.embeddings import OpenAIEmbeddings\n",
+    "from langchain.prompts import ChatPromptTemplate\n",
+    "from langchain.schema.output_parser import StrOutputParser\n",
+    "from langchain.schema.runnable import RunnablePassthrough\n",
+    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+    "from langchain.vectorstores import Chroma\n",
+    "\n",
+    "\n",
+    "def load_and_split(file, token_count, split_document=True):\n",
+    "    \"\"\"\n",
+    "    Load and optionally split PDF files.\n",
+    "\n",
+    "    Args:\n",
+    "        file (str): File path.\n",
+    "        token_count (int): Token count for splitting.\n",
+    "        split_document (bool): Flag for splitting or returning pages.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    loader = PyPDFLoader(file)\n",
+    "    pdf_pages = loader.load()\n",
+    "\n",
+    "    if split_document:\n",
+    "        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(\n",
+    "            chunk_size=token_count, chunk_overlap=50\n",
+    "        )\n",
+    "\n",
+    "        docs = text_splitter.split_documents(pdf_pages)\n",
+    "        texts = [d.page_content for d in docs]\n",
+    "    else:\n",
+    "        texts = [d.page_content for d in pdf_pages]\n",
+    "\n",
+    "    print(f\"There are {len(texts)} text elements\")\n",
+    "    return texts\n",
+    "\n",
+    "\n",
+    "def load_files(files, token_count, split_document):\n",
+    "    \"\"\"\n",
+    "    Load files.\n",
+    "\n",
+    "    Args:\n",
+    "        files (list): List of file names.\n",
+    "        dir (str): Directory path.\n",
+    "        token_count (int): Token count for splitting.\n",
+    "        split_document (bool): Flag for splitting documents.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    texts = []\n",
+    "    for fi in files:\n",
+    "        texts.extend(load_and_split(fi, token_count, split_document))\n",
+    "    return texts\n",
+    "\n",
+    "\n",
+    "def make_retriever(texts, expt):\n",
+    "    \"\"\"\n",
+    "    Make vector store.\n",
+    "\n",
+    "    Args:\n",
+    "        texts (list): List of texts.\n",
+    "        expt (str): Experiment name.\n",
+    "    \"\"\"\n",
+    "    vectorstore = Chroma.from_texts(\n",
+    "        texts=texts, collection_name=expt, embedding=OpenAIEmbeddings()\n",
+    "    )\n",
+    "    retriever = vectorstore.as_retriever()\n",
+    "    return retriever\n",
+    "\n",
+    "\n",
+    "def rag_chain(retriever, llm):\n",
+    "    \"\"\"\n",
+    "    RAG chain.\n",
+    "\n",
+    "    Args:\n",
+    "        retriever: The retriever to use.\n",
+    "        llm: The llm to use.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # Prompt template\n",
+    "    template = \"\"\"Answer the question based only on the following context, which can include text and tables:\n",
+    "    {context}\n",
+    "    Question: {question}\n",
+    "    \"\"\"\n",
+    "    prompt = ChatPromptTemplate.from_template(template)\n",
+    "\n",
+    "    # LLM\n",
+    "    if llm == \"mixtral\":\n",
+    "        model = ChatFireworks(\n",
+    "            model=\"accounts/fireworks/models/mixtral-8x7b-instruct\", temperature=0\n",
+    "        )\n",
+    "    else:\n",
+    "        model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
+    "\n",
+    "    # RAG pipeline\n",
+    "    chain = (\n",
+    "        {\n",
+    "            \"context\": retriever | (lambda x: \"\\n\\n\".join([i.page_content for i in x])),\n",
+    "            \"question\": RunnablePassthrough(),\n",
+    "        }\n",
+    "        | prompt\n",
+    "        | model\n",
+    "        | StrOutputParser()\n",
+    "    )\n",
+    "    return chain\n",
+    "\n",
+    "\n",
+    "# Experiment configurations\n",
+    "experiments = [\n",
+    "    (None, False, \"page_split-oai\", \"oai\"),\n",
+    "    (50, True, \"50_tok_split-oai\", \"oai\"),\n",
+    "    (100, True, \"100_tok_split-oai\", \"oai\"),\n",
+    "    (250, True, \"250_tok_split-oai\", \"oai\"),\n",
+    "    (250, True, \"250_tok_split-mixtral\", \"mixtral\"),\n",
+    "]\n",
+    "\n",
+    "# Run\n",
+    "stor_chain = {}\n",
+    "for token_count, split_document, expt, llm in experiments:\n",
+    "    texts = load_files(files, token_count, split_document)\n",
+    "    retriever = make_retriever(texts, expt)\n",
+    "    stor_chain[expt] = rag_chain(retriever, llm)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "29515a91-3cb1-41bd-a2d4-6cf6ce7806c2",
+   "metadata": {},
+   "source": [
+    "## Eval\n",
+    "\n",
+    "Run eval onm our dataset, `Semi-structured Reports`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "edd2e7f9-b3f6-4885-bf05-96f1c1758b20",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import uuid\n",
+    "\n",
+    "from langchain.smith import RunEvalConfig\n",
+    "from langsmith.client import Client\n",
+    "\n",
+    "# Config\n",
+    "client = Client()\n",
+    "eval_config = RunEvalConfig(\n",
+    "    evaluators=[\"cot_qa\"],\n",
+    ")\n",
+    "\n",
+    "# Experiments\n",
+    "chain_map = {\n",
+    "    \"page_split\": stor_chain[\"page_split-oai\"],\n",
+    "    \"baseline-50-tok\": stor_chain[\"50_tok_split-oai\"],\n",
+    "    \"baseline-100-tok\": stor_chain[\"100_tok_split-oai\"],\n",
+    "    \"baseline-250-tok\": stor_chain[\"250_tok_split-oai\"],\n",
+    "    \"baseline-250-tok-mixtral\": stor_chain[\"250_tok_split-mixtral\"],\n",
+    "}\n",
+    "\n",
+    "# Run evaluation\n",
+    "run_id = uuid.uuid4().hex[:4]\n",
+    "test_runs = {}\n",
+    "for project_name, chain in chain_map.items():\n",
+    "    test_runs[project_name] = client.run_on_dataset(\n",
+    "        dataset_name=task.name,\n",
+    "        llm_or_chain_factory=lambda: (lambda x: x[\"question\"]) | chain,\n",
+    "        evaluation=eval_config,\n",
+    "        verbose=True,\n",
+    "        project_name=f\"{run_id}-{project_name}\",\n",
+    "        project_metadata={\"chain\": project_name},\n",
+    "    )"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Leonid Ganeline	a0ea197b28	updated `Makefile` (#153 ) Cleaned up `makefile`	2023-12-20 09:24:06 -05:00
Eugene Yurtsev	74b11de9ae	Update evaluators (#157 ) Update to remove user warning	2023-12-19 17:30:24 -05:00
William FH	c2b70436e5	Add runnable agent factory (#156 ) Not sure if it's "easier" but it involves less thinking about benchmarking abstractions	2023-12-19 13:39:08 -08:00
Eugene Yurtsev	af9a9800e5	Register the new dataset (#155 ) Register the new dataset	2023-12-19 15:01:38 -05:00
Eugene Yurtsev	e7bac2cbb8	Change multiverse math to multiverse math (tiny) and add another multiverse math set (#154 ) * This PR adds a multiverse math consisting of 20 questions. * Question about rounding has been removed to simplify evaluation.	2023-12-19 14:57:37 -05:00
Eugene Yurtsev	d595394243	Update Math Evaluator (#152 ) Try another evaluator that ignores the question	2023-12-19 13:52:13 -05:00
William FH	27efb7b53c	Add Gemini (#151 )	2023-12-18 20:27:59 -08:00
William FH	0c1fe17417	Add to toc (#149 )	2023-12-18 18:10:41 -08:00
William FH	3f308e7ae4	Update Benchmark (#148 ) - Ran all benchmarks again - Add options to overwrite or archive existing test runs - Updated some of the aggregation logic at the end	2023-12-18 17:29:03 -08:00
William FH	c85a17bac2	Include assistant factory in benchmark all (#147 )	2023-12-18 13:42:36 -08:00
Eugene Yurtsev	a91672f619	Update notebooks (#146 ) Update notebooks for tool usage - Use task.get_eval_config() - Add benchmark all to introduction	2023-12-18 12:03:20 -05:00
Eugene Yurtsev	81daa09d05	Update example in multiverse math (#145 ) Update example	2023-12-18 11:19:40 -05:00
William FH	07be2e4555	OAI Assistant (#144 ) Similar to our functions agent	2023-12-17 10:35:10 -08:00
Eugene Yurtsev	4a642d576a	Update openai function factory, update benchmark all (#143 ) * Update open ai agent factory to be consistent with other factories. * Update benchmark all to add anthropic sdk.	2023-12-15 09:44:05 -05:00
William FH	8ee7108302	Run w/o langsmith (#137 )	2023-12-14 21:09:49 -08:00
William FH	a9461af96f	Parser Fix (#142 ) Needs to be the tool parser not the functions parser	2023-12-14 21:09:38 -08:00
William FH	4d42a32342	🐶 (#136 )	2023-12-14 21:07:42 -08:00
Eugene Yurtsev	21add2715b	Add anthropic agent based on tool user repo (#139 ) For benchmarking, this is comparing against anthropic implementation.	2023-12-14 22:21:55 -05:00
William FH	3ded353c5a	Fix openai output parser used (#138 )	2023-12-14 18:19:41 -08:00
William FH	b619226480	Add Anyscale Model (#135 )	2023-12-14 15:32:16 -08:00
Eugene Yurtsev	612f9346c5	Update benchmark all notebook (#134 ) Benchmark all	2023-12-14 16:33:07 -05:00
Eugene Yurtsev	90bec45008	Version 0.0.10 (#133 ) Bump requirement on minimal langsmith client	2023-12-14 13:31:13 -05:00
Eugene Yurtsev	5157e30fe7	Update min langsmith client (#132 ) Update min langsmith client	2023-12-14 13:29:57 -05:00
Eugene Yurtsev	eb2d9e2b63	Update notebooks, model registry and make release (#131 ) see release notes	2023-12-14 13:25:13 -05:00
Eugene Yurtsev	09d214522f	Add version (#130 ) Add __version__ and lint	2023-12-14 12:20:27 -05:00
Eugene Yurtsev	8798735ea4	Adds custom agents to the langchain benchmarking repo (#120 ) * This PR adds code for running custom agents to the langchain benchmarking repo. * The agent code is good enough for experimentation / prototyping, but I don't think it's good enough for the langchain repo: -- The abstractions aren't fully implemented and aren't ready for production use -- but OK for research -- For production use, one may want to remove all the intermediate abstractions to keep the agent as simple as possible I was thinking initially of including this in a different repo, but I think it's over-complicating things, probably OK to include some reference implementations inside of langchain benchmarks.	2023-12-14 12:05:59 -05:00
William FH	7ed859c068	Add gemini mm examples (#125 )	2023-12-13 17:07:22 -08:00
Eugene Yurtsev	417e6faccf	Update fireworks models (#128 ) Update fireworks models	2023-12-13 15:31:15 -05:00
Eugene Yurtsev	aeae13ba63	remove with_rate_limit from public api (#127 ) Because it's not a runnable binding it breaks things like .bind etc, let's use it internally but not expose to users	2023-12-13 15:30:53 -05:00
Eugene Yurtsev	825d8ec9bb	Add __contains__ to model registry (#126 ) Code is already using `in` checks but they will fail since __contains__ isn't defined correctly	2023-12-13 15:24:34 -05:00
Eugene Yurtsev	44a5c3530a	Bump ruff fix up first party identity for import sorting (#124 ) Minor change to bump ruff and fix up config	2023-12-13 13:51:17 -05:00
Eugene Yurtsev	14de11a420	Add rate limiter (#121 ) This PR adds a simple rate limiter based on a token bucket. I would love to extend RunnableBinding with this, we just need to make sure there's no funny async vs. threading business. This should be sufficient for benchmarking for now.	2023-12-13 13:12:21 -05:00
Lance Martin	b15620ee9c	Minor clean, add Mixtral (#123 )	2023-12-13 07:59:12 -08:00
Lance Martin	13e7f2df0a	Add semi-structured eval (#83 ) 1/ Create datasets for semi-structured eval 2/ Benchmark several methods	2023-12-12 14:04:31 -08:00
Eugene Yurtsev	888fce5060	Release 0.0.8 (#122 ) See release notes	2023-12-12 11:39:05 -05:00
Eugene Yurtsev	148a3e4f89	Benchmark all tool usage notebook (#118 ) * This PR adds a notebook that benchmarks all tool usage tasks. To make it easier to work with task registry: * Add `type` property to tasks. * Add __iter__ and __len__ and support for slices in __item__ for task registry.	2023-12-11 22:49:46 -05:00
Eugene Yurtsev	0e10f3227f	Add gpt-4 models (#117 ) Register gpt-4 models	2023-12-11 22:11:40 -05:00
William FH	b0667043ea	Move mixtral models (#115 )	2023-12-11 16:04:13 -08:00
William FH	bd5eac5abd	Add mixtral tool use examples (#114 )	2023-12-11 14:40:02 -08:00
William FH	dbb85200ac	Update evaluator (#113 )	2023-12-11 09:52:13 -08:00
Eugene Yurtsev	c1023a14b8	Tool Tasks: Add eval params to task definition (#112 ) This will make it easier to treat all tasks uniformly.	2023-12-11 09:52:02 -08:00
Eugene Yurtsev	8899acc989	Update model registry (#111 ) * Add more models * Fix path to language models * Notebook instantiates some of the models to test that it works	2023-12-08 22:52:02 -05:00
Eugene Yurtsev	c0e7f51626	Add Model Registry (#110 ) Add a model registry to make it easier to iterate through models for bench-marking purposes	2023-12-08 12:09:10 -05:00
Eugene Yurtsev	9f827eaca5	Update README.md (#108 )	2023-12-07 13:38:25 -05:00
Eugene Yurtsev	d9fc08b05c	Update README.md (#107 )	2023-12-07 13:34:55 -05:00
Lance Martin	8a5ba6d575	Minor cleanup to multi-modal embeddings docs (#105 )	2023-12-05 13:40:42 -08:00
William FH	8204930f2b	0.0.7 (#104 ) Adds the multimodal benchmark.	2023-12-05 13:14:44 -08:00
Lance Martin	013fe6a153	Multi modal RAG benchmark (#101 ) * Example notebooks for eval of multi-modal RAG w/ mm-embd and mv-retriever vs baseline top-k RAG --------- Co-authored-by: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com>	2023-12-05 12:04:01 -08:00
William FH	01ffffd04c	Update Chat Extraction Notebook (#102 )	2023-12-03 18:28:51 -08:00
William FH	4ddbbc0ff8	Add Archived (#53 ) Moves the other tasks to the archive	2023-12-01 10:56:44 -08:00
William FH	5ffdbb5c4c	Add chat categorization dataset (#98 ) This task is meant to test a couple things: 1. Classification -> both on common things where it is expected to perform well (e.g., sentiment, toxicity -> which currently is always 0) 2. Structured json output -> the schema is nested, which confused some of the smaller 7b models i tested out but works fine for llama 32b code instruct (and OAI/anthropic) Includes a couple common things like enums.	2023-12-01 10:17:50 -08:00