mirror of
https://github.com/langchain-ai/langchain-benchmarks.git
synced 2026-07-01 01:37:54 -04:00
Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 34cd281494 | |||
| 99cf03a50a | |||
| b36a339a65 | |||
| 442cb47fc9 | |||
| b7795c7df1 | |||
| ac161de968 | |||
| d91944bb07 | |||
| 8798bd3105 | |||
| 621eea5d93 | |||
| b6590a8745 | |||
| 458ffa70ea | |||
| ebe5c117c2 | |||
| adff80af11 |
@@ -1,94 +0,0 @@
|
||||
name: pydantic v1/v2 compatibility
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
working-directory:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
timeout-minutes: 5
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: Pydantic v1/v2 compatibility - Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: pydantic-cross-compat
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: poetry install --with test
|
||||
|
||||
- name: Install the opposite major version of pydantic
|
||||
# If normal tests use pydantic v1, here we'll use v2, and vice versa.
|
||||
shell: bash
|
||||
run: |
|
||||
# Determine the major part of pydantic version
|
||||
REGULAR_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
|
||||
|
||||
if [[ "$REGULAR_VERSION" == "1" ]]; then
|
||||
PYDANTIC_DEP=">=2.1,<3"
|
||||
TEST_WITH_VERSION="2"
|
||||
elif [[ "$REGULAR_VERSION" == "2" ]]; then
|
||||
PYDANTIC_DEP="<2"
|
||||
TEST_WITH_VERSION="1"
|
||||
else
|
||||
echo "Unexpected pydantic major version '$REGULAR_VERSION', cannot determine which version to use for cross-compatibility test."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Install via `pip` instead of `poetry add` to avoid changing lockfile,
|
||||
# which would prevent caching from working: the cache would get saved
|
||||
# to a different key than where it gets loaded from.
|
||||
poetry run pip install "pydantic${PYDANTIC_DEP}"
|
||||
|
||||
# Ensure that the correct pydantic is installed now.
|
||||
echo "Checking pydantic version... Expecting ${TEST_WITH_VERSION}"
|
||||
|
||||
# Determine the major part of pydantic version
|
||||
CURRENT_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
|
||||
|
||||
# Check that the major part of pydantic version is as expected, if not
|
||||
# raise an error
|
||||
if [[ "$CURRENT_VERSION" != "$TEST_WITH_VERSION" ]]; then
|
||||
echo "Error: expected pydantic version ${CURRENT_VERSION} to have been installed, but found: ${TEST_WITH_VERSION}"
|
||||
exit 1
|
||||
fi
|
||||
echo "Found pydantic version ${CURRENT_VERSION}, as expected"
|
||||
- name: Run pydantic compatibility tests
|
||||
shell: bash
|
||||
run: make test
|
||||
|
||||
- name: Ensure the tests did not create any additional files
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
STATUS="$(git status)"
|
||||
echo "$STATUS"
|
||||
|
||||
# grep will exit non-zero if the target message isn't found,
|
||||
# and `set -e` above will cause the step to fail.
|
||||
echo "$STATUS" | grep 'nothing to commit, working tree clean'
|
||||
@@ -31,12 +31,6 @@ jobs:
|
||||
working-directory: .
|
||||
secrets: inherit
|
||||
|
||||
pydantic-compatibility:
|
||||
uses:
|
||||
./.github/workflows/_pydantic_compatibility.yml
|
||||
with:
|
||||
working-directory: .
|
||||
secrets: inherit
|
||||
test:
|
||||
timeout-minutes: 5
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -5,6 +5,12 @@ on:
|
||||
schedule:
|
||||
- cron: '0 0 * * 0' # Runs at midnight (00:00) every Sunday (UTC time)
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
LANGCHAIN_API_KEY: ${{ secrets.LANGCHAIN_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
|
||||
jobs:
|
||||
run_tool_benchmarks:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -27,7 +33,12 @@ jobs:
|
||||
poetry install --with test,lint,typing,docs
|
||||
|
||||
- name: Multiverse math benchmark
|
||||
run: python scripts/multiverse_math_benchmark.py
|
||||
|
||||
run: |
|
||||
cd scripts
|
||||
poetry run python multiverse_math_benchmark.py
|
||||
|
||||
- name: Query analysis benchmark
|
||||
run: python scripts/query_analysis_benchmark.py
|
||||
run: |
|
||||
cd scripts
|
||||
poetry run python query_analysis_benchmark.py
|
||||
@@ -43,7 +43,7 @@ Explore Agent Traces on LangSmith:
|
||||
* [Relational Data](https://smith.langchain.com/public/22721064-dcf6-4e42-be65-e7c46e6835e7/d)
|
||||
* [Tool Usage (1-tool)](https://smith.langchain.com/public/ac23cb40-e392-471f-b129-a893a77b6f62/d)
|
||||
* [Tool Usage (26-tools)](https://smith.langchain.com/public/366bddca-62b3-4b6e-849b-a478abab73db/d)
|
||||
* [Mutiverse Math](https://smith.langchain.com/public/983faff2-54b9-4875-9bf2-c16913e7d489/d)
|
||||
* [Multiverse Math](https://smith.langchain.com/public/983faff2-54b9-4875-9bf2-c16913e7d489/d)
|
||||
|
||||
## Installation
|
||||
|
||||
|
||||
+1
-1
@@ -1,8 +1,8 @@
|
||||
from langchain.chat_models import ChatAnthropic
|
||||
from langchain.prompts import ChatPromptTemplate
|
||||
from langchain.pydantic_v1 import BaseModel
|
||||
from langchain.schema.output_parser import StrOutputParser
|
||||
from langchain.schema.runnable import RunnableLambda
|
||||
from pydantic import BaseModel
|
||||
|
||||
from .prompts import answer_prompt
|
||||
from .retriever_agent import executor
|
||||
|
||||
+1
-1
@@ -4,12 +4,12 @@ from langchain.agents import AgentExecutor
|
||||
from langchain.agents.format_scratchpad import format_to_openai_functions
|
||||
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
|
||||
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from langchain.schema.messages import AIMessage, HumanMessage
|
||||
from langchain.tools import tool
|
||||
from langchain.tools.render import format_tool_to_openai_function
|
||||
from langchain_docs_retriever.retriever import get_retriever
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
# This is used to tell the model how to best use the retriever.
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from enum import Enum
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class QuestionCategory(str, Enum):
|
||||
|
||||
@@ -2,7 +2,7 @@ from enum import Enum
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain.prompts import ChatPromptTemplate
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain_benchmarks.schema import ExtractionTask
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from langchain.smith import RunEvalConfig
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
from langsmith.evaluation import EvaluationResult, run_evaluator
|
||||
from langsmith.schemas import Example, Run
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain_benchmarks.schema import ExtractionTask
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ from typing import Callable, Iterable, List, Optional
|
||||
from langchain.indexes import SQLRecordManager, index
|
||||
from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser
|
||||
from langchain.prompts import ChatPromptTemplate
|
||||
from langchain.pydantic_v1 import BaseModel
|
||||
from langchain.retrievers.multi_vector import MultiVectorRetriever
|
||||
from langchain.retrievers.parent_document_retriever import ParentDocumentRetriever
|
||||
from langchain.schema.document import Document
|
||||
@@ -18,6 +17,7 @@ from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.storage import InMemoryStore
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import BaseModel
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -175,7 +175,7 @@ class AgentTrajectoryEvaluator(RunEvaluator):
|
||||
eval_llm = eval_llm or ChatOpenAI(
|
||||
model="gpt-4",
|
||||
temperature=0,
|
||||
model_kwargs={"seed": 42},
|
||||
seed=42,
|
||||
max_retries=1,
|
||||
request_timeout=60,
|
||||
)
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
from datetime import datetime
|
||||
from typing import List, Literal, Union, cast
|
||||
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from langchain.tools import BaseTool, tool
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langsmith.client import Client
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain_benchmarks.schema import ToolUsageEnvironment, ToolUsageTask
|
||||
|
||||
|
||||
Generated
+865
-784
File diff suppressed because it is too large
Load Diff
+12
-9
@@ -1,20 +1,21 @@
|
||||
[tool.poetry]
|
||||
name = "langchain-benchmarks"
|
||||
version = "0.0.14"
|
||||
version = "0.0.15"
|
||||
description = "🦜💪 Flex those feathers!"
|
||||
authors = ["LangChain AI"]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.8.1"
|
||||
langchain = "^0.2.7"
|
||||
langchain-community = "^0.2"
|
||||
python = "^3.9"
|
||||
langchain = "^0.3"
|
||||
langchain-community = "^0.3"
|
||||
langchain-core= "^0.3.12"
|
||||
langsmith = ">=0.0.70"
|
||||
tqdm = "^4"
|
||||
ipywidgets = "^8"
|
||||
tabulate = ">=0.8.0"
|
||||
langchain-openai = "^0.1.14"
|
||||
langchain-openai = "^0.2"
|
||||
|
||||
[tool.poetry.group.dev]
|
||||
optional = true
|
||||
@@ -57,10 +58,12 @@ pytest-socket = "^0.6.0"
|
||||
pytest-watch = "^4.2.0"
|
||||
pytest-timeout = "^2.2.0"
|
||||
freezegun = "^1.3.1"
|
||||
langchain-anthropic = "^0.1.19"
|
||||
langchain-fireworks = "^0.1.4"
|
||||
langchain-mistralai = "^0.1.9"
|
||||
langchain-groq = "^0.1.6"
|
||||
langchain-anthropic = "^0.2"
|
||||
langchain-fireworks = "^0.2"
|
||||
langchain-mistralai = "^0.2"
|
||||
langchain-groq = "^0.2"
|
||||
langchain-core = "^0.3.12"
|
||||
faiss-cpu = ">=1.8.0"
|
||||
|
||||
[tool.ruff]
|
||||
select = [
|
||||
|
||||
Reference in New Issue
Block a user