mirror of
https://github.com/langchain-ai/rag-research-agent-template-js.git
synced 2026-07-01 20:04:02 -04:00
Initial commit
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
node_modules
|
||||
dist
|
||||
@@ -0,0 +1,31 @@
|
||||
# To separate your traces from other application
|
||||
LANGCHAIN_PROJECT=retrieval-agent
|
||||
|
||||
# The following depend on your selected configuration
|
||||
|
||||
# LLM choice:
|
||||
ANTHROPIC_API_KEY=....
|
||||
FIREWORKS_API_KEY=...
|
||||
OPENAI_API_KEY=...
|
||||
|
||||
# Embeddings choice
|
||||
OPENAI_API_KEY=...
|
||||
COHERE_API_KEY=...
|
||||
|
||||
# Retrieval provider
|
||||
|
||||
## Elastic cloud:
|
||||
ELASTICSEARCH_URL=...
|
||||
ELASTICSEARCH_API_KEY=...
|
||||
|
||||
## Elastic local:
|
||||
ELASTICSEARCH_URL=http://host.docker.internal:9200
|
||||
ELASTICSEARCH_USER=elaastic
|
||||
ELASTICSEARCH_PASSWORD=changeme
|
||||
|
||||
## Pinecone
|
||||
PINECONE_API_KEY=...
|
||||
PINECONE_INDEX_NAME=...
|
||||
|
||||
## MongoDB Atlas
|
||||
MONGODB_URI=... # Full connection string
|
||||
@@ -0,0 +1,62 @@
|
||||
module.exports = {
|
||||
extends: [
|
||||
"eslint:recommended",
|
||||
"prettier",
|
||||
"plugin:@typescript-eslint/recommended",
|
||||
],
|
||||
parserOptions: {
|
||||
ecmaVersion: 12,
|
||||
parser: "@typescript-eslint/parser",
|
||||
project: "./tsconfig.json",
|
||||
sourceType: "module",
|
||||
},
|
||||
plugins: ["import", "@typescript-eslint", "no-instanceof"],
|
||||
ignorePatterns: [
|
||||
".eslintrc.cjs",
|
||||
"scripts",
|
||||
"node_modules",
|
||||
"dist",
|
||||
"dist-cjs",
|
||||
"*.js",
|
||||
"*.cjs",
|
||||
"*.d.ts",
|
||||
],
|
||||
rules: {
|
||||
"no-process-env": 0,
|
||||
"no-instanceof/no-instanceof": 2,
|
||||
"@typescript-eslint/explicit-module-boundary-types": 0,
|
||||
"@typescript-eslint/no-empty-function": 0,
|
||||
"@typescript-eslint/no-non-null-assertion": 0,
|
||||
"@typescript-eslint/no-shadow": 0,
|
||||
"@typescript-eslint/no-empty-interface": 0,
|
||||
"@typescript-eslint/no-use-before-define": ["error", "nofunc"],
|
||||
"@typescript-eslint/no-unused-vars": ["warn", { args: "none" }],
|
||||
"@typescript-eslint/no-floating-promises": "error",
|
||||
"@typescript-eslint/no-misused-promises": "error",
|
||||
camelcase: 0,
|
||||
"class-methods-use-this": 0,
|
||||
"import/extensions": [2, "ignorePackages"],
|
||||
"import/no-extraneous-dependencies": [
|
||||
"error",
|
||||
{ devDependencies: ["**/*.test.ts"] },
|
||||
],
|
||||
"import/no-unresolved": 0,
|
||||
"import/prefer-default-export": 0,
|
||||
"keyword-spacing": "error",
|
||||
"max-classes-per-file": 0,
|
||||
"max-len": 0,
|
||||
"no-await-in-loop": 0,
|
||||
"no-bitwise": 0,
|
||||
"no-console": 0,
|
||||
"no-restricted-syntax": 0,
|
||||
"no-shadow": 0,
|
||||
"no-continue": 0,
|
||||
"no-underscore-dangle": 0,
|
||||
"no-use-before-define": 0,
|
||||
"no-useless-constructor": 0,
|
||||
"no-return-await": 0,
|
||||
"consistent-return": 0,
|
||||
"no-else-return": 0,
|
||||
"new-cap": ["error", { properties: false, capIsNew: false }],
|
||||
},
|
||||
};
|
||||
+19
@@ -0,0 +1,19 @@
|
||||
index.cjs
|
||||
index.js
|
||||
index.d.ts
|
||||
node_modules
|
||||
dist
|
||||
.yarn/*
|
||||
!.yarn/patches
|
||||
!.yarn/plugins
|
||||
!.yarn/releases
|
||||
!.yarn/sdks
|
||||
!.yarn/versions
|
||||
|
||||
.turbo
|
||||
**/.turbo
|
||||
**/.eslintcache
|
||||
|
||||
.env
|
||||
.ipynb_checkpoints
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 LangChain
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -0,0 +1,706 @@
|
||||
# LangGraph Retrieval Agent Template
|
||||
|
||||
<!-- TODO: Add CI shields -->
|
||||
<!-- [](https://github.com/langchain-ai/retrieval-agent-template-js/actions/workflows/unit-tests.yml)
|
||||
[](https://github.com/langchain-ai/retrieval-agent-template-js/actions/workflows/integration-tests.yml) -->
|
||||
|
||||
[](https://langgraph-studio.vercel.app/templates/open?githubUrl=https://github.com/langchain-ai/retrieval-agent-template-js)
|
||||
|
||||
This is a starter project to help you get started with developing a retrieval agent using [LangGraph.js](https://github.com/langchain-ai/langgraphjs) in [LangGraph Studio](https://github.com/langchain-ai/langgraph-studio).
|
||||
|
||||

|
||||
|
||||
It contains example graphs exported from `src/retrieval_agent/graph.ts` that implement a retrieval-based question answering system.
|
||||
|
||||
## What it does
|
||||
|
||||
This project has two graphs: an "index" graph, and a "retrieval" graph.
|
||||
|
||||
The index graph takes in document objects and strings, and it indexes them for the configured `userId`.
|
||||
|
||||
```json
|
||||
[{ "page_content": "I have 1 cat." }]
|
||||
```
|
||||
|
||||
The retrieval chat bot manages a chat history and responds based on fetched context. It:
|
||||
|
||||
1. Takes a user **query** as input
|
||||
2. Searches for documents in filtered by userId based on the conversation history
|
||||
3. Responds using the retrieved information and conversation context
|
||||
|
||||
By default, it's set up to answer questions based on the user's indexed documents, which are filtered by the user's ID for personalized responses.
|
||||
|
||||
## Getting Started
|
||||
|
||||
Assuming you have already [installed LangGraph Studio](https://github.com/langchain-ai/langgraph-studio?tab=readme-ov-file#download), to set up:
|
||||
|
||||
1. Create a `.env` file.
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
2. Select your retriever & index, and save the access instructions to your `.env` file.
|
||||
|
||||
<!--
|
||||
Setup instruction auto-generated by `langgraph template lock`. DO NOT EDIT MANUALLY.
|
||||
-->
|
||||
|
||||
### Setup Retriever
|
||||
|
||||
The defaults values for `retrieverProvider` are shown below:
|
||||
|
||||
```yaml
|
||||
retrieverProvider: elastic
|
||||
```
|
||||
|
||||
Follow the instructions below to get set up, or pick one of the additional options.
|
||||
|
||||
#### Setup Elasticsearch
|
||||
|
||||
**Elastic Cloud**
|
||||
|
||||
1. Signup for a free trial with [Elastic Cloud](https://cloud.elastic.co/registration?onboarding_token=search&cta=cloud-registration&tech=trial&plcmt=article%20content&pg=langchain).
|
||||
2. Get the Elasticsearch URL, found under Applications of your deployment.
|
||||
3. Create an API key. See the [official elastic documentation](https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key) for more information.
|
||||
4. Copy the URL and API key to your `.env` file created above:
|
||||
|
||||
```
|
||||
ELASTICSEARCH_URL=<ES_URL>
|
||||
ELASTICSEARCH_API_KEY=<API_KEY>
|
||||
```
|
||||
|
||||
**Local Elasticsearch (Docker)**
|
||||
|
||||
```
|
||||
docker run -p 127.0.0.1:9200:9200 -d --name elasticsearch --network elastic-net -e ELASTIC_PASSWORD=changeme -e "discovery.type=single-node" -e "xpack.security.http.ssl.enabled=false" -e "xpack.license.self_generated.type=trial" docker.elastic.co/elasticsearch/elasticsearch:8.15.1
|
||||
```
|
||||
|
||||
See the [official Elastic documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/run-elasticsearch-locally.html) for more information on running it locally.
|
||||
|
||||
Then populate the following in your `.env` file:
|
||||
|
||||
```
|
||||
# As both Elasticsearch and LangGraph Studio runs in Docker, we need to use host.docker.internal to access.
|
||||
|
||||
ELASTICSEARCH_URL=http://host.docker.internal:9200
|
||||
ELASTICSEARCH_USER=elastic
|
||||
ELASTICSEARCH_PASSWORD=changeme
|
||||
```
|
||||
|
||||
#### MongoDB Atlas
|
||||
|
||||
MongoDB Atlas is a fully-managed cloud database that includes vector search capabilities for AI-powered applications.
|
||||
|
||||
1. Create a free Atlas cluster:
|
||||
|
||||
- Go to the [MongoDB Atlas website](https://www.mongodb.com/cloud/atlas/register) and sign up for a free account.
|
||||
- After logging in, create a free cluster by following the on-screen instructions.
|
||||
|
||||
2. Create a vector search index
|
||||
|
||||
- Follow the instructions at [the Mongo docs](https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-type/)
|
||||
- By default, we use the collection `langgraph_retrieval_agent.default` - create the index there
|
||||
- Add an indexed filter for path `user_id`
|
||||
- **IMPORTANT**: select Atlas Vector Search NOT Atlas Search when creating the index
|
||||
Your final JSON editor configuration should look something like the following:
|
||||
|
||||
```json
|
||||
{
|
||||
"fields": [
|
||||
{
|
||||
"numDimensions": 1536,
|
||||
"path": "embedding",
|
||||
"similarity": "cosine",
|
||||
"type": "vector"
|
||||
},
|
||||
{
|
||||
"path": "user_id",
|
||||
"type": "filter"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The exact numDimensions may differ if you select a different embedding model.
|
||||
|
||||
2. Set up your environment:
|
||||
|
||||
- In the Atlas dashboard, click on "Connect" for your cluster.
|
||||
- Choose "Connect your application" and copy the provided connection string.
|
||||
- Create a `.env` file in your project root if you haven't already.
|
||||
- Add your MongoDB Atlas connection string to the `.env` file:
|
||||
|
||||
```
|
||||
MONGODB_URI="mongodb+srv://username:password@your-cluster-url.mongodb.net/?retryWrites=true&w=majority&appName=your-cluster-name"
|
||||
```
|
||||
|
||||
Replace `username`, `password`, `your-cluster-url`, and `your-cluster-name` with your actual credentials and cluster information.
|
||||
|
||||
#### Pinecone Serverless
|
||||
|
||||
Pinecone is a managed, cloud-native vector database that provides long-term memory for high-performance AI applications.
|
||||
|
||||
1. Sign up for a Pinecone account at [https://login.pinecone.io/login](https://login.pinecone.io/login) if you haven't already.
|
||||
|
||||
2. After logging in, generate an API key from the Pinecone console.
|
||||
|
||||
3. Create a serverless index:
|
||||
|
||||
- Choose a name for your index (e.g., "example-index")
|
||||
- Set the dimension based on your embedding model (e.g., 1536 for OpenAI embeddings)
|
||||
- Select "cosine" as the metric
|
||||
- Choose "Serverless" as the index type
|
||||
- Select your preferred cloud provider and region (e.g., AWS us-east-1)
|
||||
|
||||
4. Once you have created your index and obtained your API key, add them to your `.env` file:
|
||||
|
||||
```
|
||||
PINECONE_API_KEY=your-api-key
|
||||
PINECONE_INDEX_NAME=your-index-name
|
||||
```
|
||||
|
||||
### Setup Model
|
||||
|
||||
The defaults values for `responseModel`, `queryModel` are shown below:
|
||||
|
||||
```yaml
|
||||
responseModel: anthropic/claude-3-5-sonnet-20240620
|
||||
queryModel: anthropic/claude-3-haiku-20240307
|
||||
```
|
||||
|
||||
Follow the instructions below to get set up, or pick one of the additional options.
|
||||
|
||||
#### Anthropic
|
||||
|
||||
To use Anthropic's chat models:
|
||||
|
||||
1. Sign up for an [Anthropic API key](https://console.anthropic.com/) if you haven't already.
|
||||
2. Once you have your API key, add it to your `.env` file:
|
||||
|
||||
```
|
||||
ANTHROPIC_API_KEY=your-api-key
|
||||
```
|
||||
|
||||
#### OpenAI
|
||||
|
||||
To use OpenAI's chat models:
|
||||
|
||||
1. Sign up for an [OpenAI API key](https://platform.openai.com/signup).
|
||||
2. Once you have your API key, add it to your `.env` file:
|
||||
|
||||
```
|
||||
OPENAI_API_KEY=your-api-key
|
||||
```
|
||||
|
||||
### Setup Embedding Model
|
||||
|
||||
The defaults values for `embeddingModel` are shown below:
|
||||
|
||||
```yaml
|
||||
embeddingModel: openai/text-embedding-3-small
|
||||
```
|
||||
|
||||
Follow the instructions below to get set up, or pick one of the additional options.
|
||||
|
||||
#### OpenAI
|
||||
|
||||
To use OpenAI's embeddings:
|
||||
|
||||
1. Sign up for an [OpenAI API key](https://platform.openai.com/signup).
|
||||
2. Once you have your API key, add it to your `.env` file:
|
||||
|
||||
```
|
||||
OPENAI_API_KEY=your-api-key
|
||||
```
|
||||
|
||||
#### Cohere
|
||||
|
||||
To use Cohere's embeddings:
|
||||
|
||||
1. Sign up for a [Cohere API key](https://dashboard.cohere.com/welcome/register).
|
||||
2. Once you have your API key, add it to your `.env` file:
|
||||
|
||||
```bash
|
||||
COHERE_API_KEY=your-api-key
|
||||
```
|
||||
|
||||
<!--
|
||||
End setup instructions
|
||||
-->
|
||||
|
||||
## Using
|
||||
|
||||
Once you've set up your retriever saved your model secrets, it's time to try it out! First, let's add some information to the index. Open studio, select the "indexer" graph from the dropdown in the top-left, provide an example user ID in the configuration at the bottom, and then add some content to chat over.
|
||||
|
||||
```json
|
||||
[{ "page_content": "My cat knows python." }]
|
||||
```
|
||||
|
||||
When you upload content, it will be indexed under the configured user ID. You know it's complete when the indexer "delete"'s the content from its graph memory (since it's been persisted in your configured storage provider).
|
||||
|
||||
Next, open the "retrieval_graph" using the dropdown in the top-left. Ask it about your cat to confirm it can fetch the required information! If you change the `userId` at any time, notice how it no longer has access to your information. The graph is doing simple filtering of content so you only access the information under the provided ID.
|
||||
|
||||
## How to customize
|
||||
|
||||
You can customize this retrieval agent template in several ways:
|
||||
|
||||
1. **Change the retriever**: You can switch between different vector stores (Elasticsearch, MongoDB, Pinecone) by modifying the `retrieverProvider` in the configuration. Each provider has its own setup instructions in the "Getting Started" section above.
|
||||
|
||||
2. **Modify the embedding model**: You can change the embedding model used for document indexing and query embedding by updating the `embeddingModel` in the configuration. Options include various OpenAI and Cohere models.
|
||||
|
||||
3. **Adjust search parameters**: Fine-tune the retrieval process by modifying the `searchKwargs` in the configuration. This allows you to control aspects like the number of documents retrieved or similarity thresholds.
|
||||
|
||||
4. **Customize the response generation**: You can modify the `responseSystemPrompt` to change how the agent formulates its responses. This allows you to adjust the agent's personality or add specific instructions for answer generation.
|
||||
|
||||
5. **Change the language model**: Update the `responseModel` in the configuration to use different language models for response generation. Options include various Claude models from Anthropic, as well as models from other providers like Fireworks AI.
|
||||
|
||||
6. **Extend the graph**: You can add new nodes or modify existing ones in the `src/retrieval_agent/graph.ts` file to introduce additional processing steps or decision points in the agent's workflow.
|
||||
|
||||
7. **Add new tools**: Implement new tools or API integrations in `src/retrieval_agent/tools.ts` to expand the agent's capabilities beyond simple retrieval and response generation.
|
||||
|
||||
8. **Modify prompts**: Update the prompts used for query generation and response formulation in `src/retrieval_agent/prompts.ts` to better suit your specific use case or to improve the agent's performance.
|
||||
|
||||
Remember to test your changes thoroughly to ensure they improve the agent's performance for your specific use case.
|
||||
|
||||
## Development
|
||||
|
||||
While iterating on your graph, you can edit past state and rerun your app from past states to debug specific nodes. Local changes will be automatically applied via hot reload. Try adding an interrupt before the agent calls tools, updating the default system message in `src/retrieval_agent/utils.ts` to take on a persona, or adding additional nodes and edges!
|
||||
|
||||
Follow up requests will be appended to the same thread. You can create an entirely new thread, clearing previous history, using the `+` button in the top right.
|
||||
|
||||
You can find the latest (under construction) docs on [LangGraph](https://github.com/langchain-ai/langgraphjs) here, including examples and other references. Using those guides can help you pick the right patterns to adapt here for your use case.
|
||||
|
||||
LangGraph Studio also integrates with [LangSmith](https://smith.langchain.com/) for more in-depth tracing and collaboration with teammates.
|
||||
|
||||
<!--
|
||||
Configuration auto-generated by `langgraph template lock`. DO NOT EDIT MANUALLY.
|
||||
{
|
||||
"config_schemas": {
|
||||
"indexer": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"embeddingModel": {
|
||||
"type": "string",
|
||||
"default": "openai/text-embedding-3-small",
|
||||
"description": "Name of the embedding model to use. Must be a valid embedding model name.",
|
||||
"environment": [
|
||||
{
|
||||
"value": "cohere/embed-english-light-v2.0",
|
||||
"variables": "COHERE_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "cohere/embed-english-light-v3.0",
|
||||
"variables": "COHERE_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "cohere/embed-english-v2.0",
|
||||
"variables": "COHERE_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "cohere/embed-english-v3.0",
|
||||
"variables": "COHERE_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "cohere/embed-multilingual-light-v3.0",
|
||||
"variables": "COHERE_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "cohere/embed-multilingual-v2.0",
|
||||
"variables": "COHERE_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "cohere/embed-multilingual-v3.0",
|
||||
"variables": "COHERE_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/text-embedding-3-large",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/text-embedding-3-small",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/text-embedding-ada-002",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
}
|
||||
]
|
||||
},
|
||||
"retrieverProvider": {
|
||||
"enum": [
|
||||
"elastic",
|
||||
"elastic-local",
|
||||
"mongodb",
|
||||
"pinecone"
|
||||
],
|
||||
"default": "elastic",
|
||||
"description": "The vector store provider to use for retrieval. Options are 'elastic', 'pinecone', or 'mongodb'.",
|
||||
"environment": [
|
||||
{
|
||||
"value": "elastic",
|
||||
"variables": [
|
||||
"ELASTICSEARCH_URL",
|
||||
"ELASTICSEARCH_API_KEY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"value": "elastic-local",
|
||||
"variables": [
|
||||
"ELASTICSEARCH_URL",
|
||||
"ELASTICSEARCH_USER",
|
||||
"ELASTICSEARCH_PASSWORD"
|
||||
]
|
||||
},
|
||||
{
|
||||
"value": "mongodb",
|
||||
"variables": [
|
||||
"MONGODB_URI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"value": "pinecone",
|
||||
"variables": [
|
||||
"PINECONE_API_KEY",
|
||||
"PINECONE_INDEX_NAME"
|
||||
]
|
||||
}
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"retrieval_graph": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"embeddingModel": {
|
||||
"type": "string",
|
||||
"default": "openai/text-embedding-3-small",
|
||||
"description": "Name of the embedding model to use. Must be a valid embedding model name.",
|
||||
"environment": [
|
||||
{
|
||||
"value": "cohere/embed-english-light-v2.0",
|
||||
"variables": "COHERE_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "cohere/embed-english-light-v3.0",
|
||||
"variables": "COHERE_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "cohere/embed-english-v2.0",
|
||||
"variables": "COHERE_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "cohere/embed-english-v3.0",
|
||||
"variables": "COHERE_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "cohere/embed-multilingual-light-v3.0",
|
||||
"variables": "COHERE_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "cohere/embed-multilingual-v2.0",
|
||||
"variables": "COHERE_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "cohere/embed-multilingual-v3.0",
|
||||
"variables": "COHERE_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/text-embedding-3-large",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/text-embedding-3-small",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/text-embedding-ada-002",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
}
|
||||
]
|
||||
},
|
||||
"retrieverProvider": {
|
||||
"enum": [
|
||||
"elastic",
|
||||
"elastic-local",
|
||||
"mongodb",
|
||||
"pinecone"
|
||||
],
|
||||
"default": "elastic",
|
||||
"description": "The vector store provider to use for retrieval. Options are 'elastic', 'pinecone', or 'mongodb'.",
|
||||
"environment": [
|
||||
{
|
||||
"value": "elastic",
|
||||
"variables": [
|
||||
"ELASTICSEARCH_URL",
|
||||
"ELASTICSEARCH_API_KEY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"value": "elastic-local",
|
||||
"variables": [
|
||||
"ELASTICSEARCH_URL",
|
||||
"ELASTICSEARCH_USER",
|
||||
"ELASTICSEARCH_PASSWORD"
|
||||
]
|
||||
},
|
||||
{
|
||||
"value": "mongodb",
|
||||
"variables": [
|
||||
"MONGODB_URI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"value": "pinecone",
|
||||
"variables": [
|
||||
"PINECONE_API_KEY",
|
||||
"PINECONE_INDEX_NAME"
|
||||
]
|
||||
}
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
"responseModel": {
|
||||
"type": "string",
|
||||
"default": "anthropic/claude-3-5-sonnet-20240620",
|
||||
"description": "The language model used for generating responses. Should be in the form: provider/model-name.",
|
||||
"environment": [
|
||||
{
|
||||
"value": "anthropic/claude-1.2",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "anthropic/claude-2.0",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "anthropic/claude-2.1",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "anthropic/claude-3-5-sonnet-20240620",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "anthropic/claude-3-haiku-20240307",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "anthropic/claude-3-opus-20240229",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "anthropic/claude-3-sonnet-20240229",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "anthropic/claude-instant-1.2",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-3.5-turbo",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-3.5-turbo-0125",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-3.5-turbo-0301",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-3.5-turbo-0613",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-3.5-turbo-1106",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-3.5-turbo-16k",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-3.5-turbo-16k-0613",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-0125-preview",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-0314",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-0613",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-1106-preview",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-32k",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-32k-0314",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-32k-0613",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-turbo",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-turbo-preview",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-vision-preview",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4o",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4o-mini",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
}
|
||||
]
|
||||
},
|
||||
"queryModel": {
|
||||
"type": "string",
|
||||
"default": "anthropic/claude-3-haiku-20240307",
|
||||
"description": "The language model used for processing and refining queries. Should be in the form: provider/model-name.",
|
||||
"environment": [
|
||||
{
|
||||
"value": "anthropic/claude-1.2",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "anthropic/claude-2.0",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "anthropic/claude-2.1",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "anthropic/claude-3-5-sonnet-20240620",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "anthropic/claude-3-haiku-20240307",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "anthropic/claude-3-opus-20240229",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "anthropic/claude-3-sonnet-20240229",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "anthropic/claude-instant-1.2",
|
||||
"variables": "ANTHROPIC_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-3.5-turbo",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-3.5-turbo-0125",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-3.5-turbo-0301",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-3.5-turbo-0613",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-3.5-turbo-1106",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-3.5-turbo-16k",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-3.5-turbo-16k-0613",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-0125-preview",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-0314",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-0613",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-1106-preview",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-32k",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-32k-0314",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-32k-0613",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-turbo",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-turbo-preview",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4-vision-preview",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4o",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
},
|
||||
{
|
||||
"value": "openai/gpt-4o-mini",
|
||||
"variables": "OPENAI_API_KEY"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
-->
|
||||
@@ -0,0 +1,18 @@
|
||||
export default {
|
||||
preset: "ts-jest/presets/default-esm",
|
||||
moduleNameMapper: {
|
||||
"^(\\.{1,2}/.*)\\.js$": "$1",
|
||||
},
|
||||
transform: {
|
||||
"^.+\\.tsx?$": [
|
||||
"ts-jest",
|
||||
{
|
||||
useESM: true,
|
||||
},
|
||||
],
|
||||
},
|
||||
extensionsToTreatAsEsm: [".ts"],
|
||||
setupFiles: ["dotenv/config"],
|
||||
passWithNoTests: true,
|
||||
testTimeout: 20_000,
|
||||
};
|
||||
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"node_version": "20",
|
||||
"graphs": {
|
||||
"retrieval_graph": "./src/retrieval_graph/graph.ts:graph",
|
||||
"indexer": "./src/retrieval_graph/index_graph.ts:graph"
|
||||
},
|
||||
"env": ".env"
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
{
|
||||
"name": "retrieval-graph",
|
||||
"version": "0.0.1",
|
||||
"description": "A starter template containing an example retrieval agent.",
|
||||
"main": "src/retrieval_graph/graph.ts",
|
||||
"author": "Your Name",
|
||||
"license": "MIT",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"packageManager": "yarn@1.22.22",
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"clean": "rm -rf dist",
|
||||
"test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --testPathPattern=\\.test\\.ts$ --testPathIgnorePatterns=\\.int\\.test\\.ts$",
|
||||
"test:int": "node --experimental-vm-modules node_modules/jest/bin/jest.js --testPathPattern=\\.int\\.test\\.ts$",
|
||||
"lint": "eslint src",
|
||||
"format": "prettier --write ."
|
||||
},
|
||||
"dependencies": {
|
||||
"@elastic/elasticsearch": "^8.15.0",
|
||||
"@langchain/anthropic": "^0.3.1",
|
||||
"@langchain/cohere": "^0.3.0",
|
||||
"@langchain/community": "^0.3.1",
|
||||
"@langchain/core": "^0.3.3",
|
||||
"@langchain/langgraph": "^0.2.8",
|
||||
"@langchain/mongodb": "^0.1.0",
|
||||
"@langchain/openai": "^0.3.0",
|
||||
"@langchain/pinecone": "^0.1.0",
|
||||
"@pinecone-database/pinecone": "^3.0.3",
|
||||
"langchain": "^0.3.2",
|
||||
"mongodb": "^6.9.0",
|
||||
"ts-node": "^10.9.2",
|
||||
"uuid": "^10.0.0",
|
||||
"zod": "^3.23.8"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@eslint/eslintrc": "^3.1.0",
|
||||
"@eslint/js": "^9.9.1",
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@tsconfig/recommended": "^1.0.7",
|
||||
"@types/jest": "^29.5.0",
|
||||
"@typescript-eslint/eslint-plugin": "^5.59.8",
|
||||
"@typescript-eslint/parser": "^5.59.8",
|
||||
"dotenv": "^16.4.5",
|
||||
"eslint": "^8.41.0",
|
||||
"eslint-config-prettier": "^8.8.0",
|
||||
"eslint-plugin-import": "^2.27.5",
|
||||
"eslint-plugin-no-instanceof": "^1.0.1",
|
||||
"eslint-plugin-prettier": "^4.2.1",
|
||||
"jest": "^29.7.0",
|
||||
"prettier": "^3.3.3",
|
||||
"ts-jest": "^29.1.0",
|
||||
"typescript": "^5.3.3"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
/**
|
||||
* Define the configurable parameters for the agent.
|
||||
*/
|
||||
import { RunnableConfig } from "@langchain/core/runnables";
|
||||
import {
|
||||
RESPONSE_SYSTEM_PROMPT_TEMPLATE,
|
||||
QUERY_SYSTEM_PROMPT_TEMPLATE,
|
||||
} from "./prompts.js";
|
||||
import { Annotation } from "@langchain/langgraph";
|
||||
|
||||
/**
|
||||
* typeof ConfigurationAnnotation.State class for indexing and retrieval operations.
|
||||
*
|
||||
* This annotation defines the parameters needed for configuring the indexing and
|
||||
* retrieval processes, including user identification, embedding model selection,
|
||||
* retriever provider choice, and search parameters.
|
||||
*/
|
||||
export const IndexConfigurationAnnotation = Annotation.Root({
|
||||
/**
|
||||
* Unique identifier for the user.
|
||||
*/
|
||||
userId: Annotation<string>,
|
||||
|
||||
/**
|
||||
* Name of the embedding model to use. Must be a valid embedding model name.
|
||||
*/
|
||||
embeddingModel: Annotation<string>,
|
||||
|
||||
/**
|
||||
* The vector store provider to use for retrieval.
|
||||
* Options are 'elastic', 'elastic-local', 'pinecone', or 'mongodb'.
|
||||
*/
|
||||
retrieverProvider: Annotation<
|
||||
"elastic" | "elastic-local" | "pinecone" | "mongodb"
|
||||
>,
|
||||
|
||||
/**
|
||||
* Additional keyword arguments to pass to the search function of the retriever.
|
||||
*/
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
searchKwargs: Annotation<Record<string, any>>,
|
||||
});
|
||||
|
||||
/**
|
||||
* Create an typeof IndexConfigurationAnnotation.State instance from a RunnableConfig object.
|
||||
*
|
||||
* @param config - The configuration object to use.
|
||||
* @returns An instance of typeof IndexConfigurationAnnotation.State with the specified configuration.
|
||||
*/
|
||||
export function ensureIndexConfiguration(
|
||||
config: RunnableConfig | undefined = undefined,
|
||||
): typeof IndexConfigurationAnnotation.State {
|
||||
const configurable = (config?.configurable || {}) as Partial<
|
||||
typeof IndexConfigurationAnnotation.State
|
||||
>;
|
||||
return {
|
||||
userId: configurable.userId || "default", // Give a default user for shared docs
|
||||
embeddingModel:
|
||||
configurable.embeddingModel || "openai/text-embedding-3-small",
|
||||
retrieverProvider: configurable.retrieverProvider || "elastic",
|
||||
searchKwargs: configurable.searchKwargs || {},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* The complete configuration for the agent.
|
||||
*/
|
||||
export const ConfigurationAnnotation = Annotation.Root({
|
||||
...IndexConfigurationAnnotation.spec,
|
||||
/**
|
||||
* The system prompt used for generating responses.
|
||||
*/
|
||||
responseSystemPromptTemplate: Annotation<string>,
|
||||
|
||||
/**
|
||||
* The language model used for generating responses. Should be in the form: provider/model-name.
|
||||
*/
|
||||
responseModel: Annotation<string>,
|
||||
|
||||
/**
|
||||
* The system prompt used for processing and refining queries.
|
||||
*/
|
||||
querySystemPromptTemplate: Annotation<string>,
|
||||
|
||||
/**
|
||||
* The language model used for processing and refining queries. Should be in the form: provider/model-name.
|
||||
*/
|
||||
queryModel: Annotation<string>,
|
||||
});
|
||||
|
||||
/**
|
||||
* Create a typeof ConfigurationAnnotation.State instance from a RunnableConfig object.
|
||||
*
|
||||
* @param config - The configuration object to use.
|
||||
* @returns An instance of typeof ConfigurationAnnotation.State with the specified configuration.
|
||||
*/
|
||||
export function ensureConfiguration(
|
||||
config: RunnableConfig | undefined = undefined,
|
||||
): typeof ConfigurationAnnotation.State {
|
||||
const indexConfig = ensureIndexConfiguration(config);
|
||||
const configurable = (config?.configurable || {}) as Partial<
|
||||
typeof ConfigurationAnnotation.State
|
||||
>;
|
||||
|
||||
return {
|
||||
...indexConfig,
|
||||
responseSystemPromptTemplate:
|
||||
configurable.responseSystemPromptTemplate ||
|
||||
RESPONSE_SYSTEM_PROMPT_TEMPLATE,
|
||||
responseModel:
|
||||
configurable.responseModel || "anthropic/claude-3-5-sonnet-20240620",
|
||||
querySystemPromptTemplate:
|
||||
configurable.querySystemPromptTemplate || QUERY_SYSTEM_PROMPT_TEMPLATE,
|
||||
queryModel: configurable.queryModel || "anthropic/claude-3-haiku-20240307",
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
import { RunnableConfig } from "@langchain/core/runnables";
|
||||
import { StateGraph } from "@langchain/langgraph";
|
||||
import {
|
||||
ConfigurationAnnotation,
|
||||
ensureConfiguration,
|
||||
} from "./configuration.js";
|
||||
import { StateAnnotation, InputStateAnnotation } from "./state.js";
|
||||
import { formatDocs, getMessageText, loadChatModel } from "./utils.js";
|
||||
import { z } from "zod";
|
||||
import { makeRetriever } from "./retrieval.js";
|
||||
// Define the function that calls the model
|
||||
|
||||
const SearchQuery = z.object({
|
||||
query: z.string().describe("Search the indexed documents for a query."),
|
||||
});
|
||||
|
||||
async function generateQuery(
|
||||
state: typeof StateAnnotation.State,
|
||||
config?: RunnableConfig,
|
||||
): Promise<typeof StateAnnotation.Update> {
|
||||
const messages = state.messages;
|
||||
if (messages.length === 1) {
|
||||
// It's the first user question. We will use the input directly to search.
|
||||
const humanInput = getMessageText(messages[messages.length - 1]);
|
||||
return { queries: [humanInput] };
|
||||
} else {
|
||||
const configuration = ensureConfiguration(config);
|
||||
// Feel free to customize the prompt, model, and other logic!
|
||||
const systemMessage = configuration.querySystemPromptTemplate
|
||||
.replace("{queries}", (state.queries || []).join("\n- "))
|
||||
.replace("{systemTime}", new Date().toISOString());
|
||||
|
||||
const messageValue = [
|
||||
{ role: "system", content: systemMessage },
|
||||
...state.messages,
|
||||
];
|
||||
const model = (
|
||||
await loadChatModel(configuration.responseModel)
|
||||
).withStructuredOutput(SearchQuery);
|
||||
|
||||
const generated = await model.invoke(messageValue);
|
||||
return {
|
||||
queries: [generated.query],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function retrieve(
|
||||
state: typeof StateAnnotation.State,
|
||||
config: RunnableConfig,
|
||||
): Promise<typeof StateAnnotation.Update> {
|
||||
const query = state.queries[state.queries.length - 1];
|
||||
const retriever = await makeRetriever(config);
|
||||
const response = await retriever.invoke(query);
|
||||
return { retrievedDocs: response };
|
||||
}
|
||||
|
||||
async function respond(
|
||||
state: typeof StateAnnotation.State,
|
||||
config: RunnableConfig,
|
||||
): Promise<typeof StateAnnotation.Update> {
|
||||
/**
|
||||
* Call the LLM powering our "agent".
|
||||
*/
|
||||
const configuration = ensureConfiguration(config);
|
||||
|
||||
const model = await loadChatModel(configuration.responseModel);
|
||||
|
||||
const retrievedDocs = formatDocs(state.retrievedDocs);
|
||||
// Feel free to customize the prompt, model, and other logic!
|
||||
const systemMessage = configuration.responseSystemPromptTemplate
|
||||
.replace("{retrievedDocs}", retrievedDocs)
|
||||
.replace("{systemTime}", new Date().toISOString());
|
||||
const messageValue = [
|
||||
{ role: "system", content: systemMessage },
|
||||
...state.messages,
|
||||
];
|
||||
const response = await model.invoke(messageValue);
|
||||
// We return a list, because this will get added to the existing list
|
||||
return { messages: [response] };
|
||||
}
|
||||
|
||||
// Lay out the nodes and edges to define a graph
|
||||
const builder = new StateGraph(
|
||||
{
|
||||
stateSchema: StateAnnotation,
|
||||
// The only input field is the user
|
||||
input: InputStateAnnotation,
|
||||
},
|
||||
ConfigurationAnnotation,
|
||||
)
|
||||
.addNode("generateQuery", generateQuery)
|
||||
.addNode("retrieve", retrieve)
|
||||
.addNode("respond", respond)
|
||||
.addEdge("__start__", "generateQuery")
|
||||
.addEdge("generateQuery", "retrieve")
|
||||
.addEdge("retrieve", "respond");
|
||||
|
||||
// Finally, we compile it!
|
||||
// This compiles it into a graph you can invoke and deploy.
|
||||
export const graph = builder.compile({
|
||||
interruptBefore: [], // if you want to update the state before calling the tools
|
||||
interruptAfter: [],
|
||||
});
|
||||
|
||||
graph.name = "Retrieval Graph"; // Customizes the name displayed in LangSmith
|
||||
@@ -0,0 +1,58 @@
|
||||
/**
|
||||
* This "graph" simply exposes an endpoint for a user to upload docs to be indexed.
|
||||
*/
|
||||
|
||||
import { Document } from "@langchain/core/documents";
|
||||
import { RunnableConfig } from "@langchain/core/runnables";
|
||||
import { StateGraph } from "@langchain/langgraph";
|
||||
|
||||
import { IndexStateAnnotation } from "./state.js";
|
||||
import { makeRetriever } from "./retrieval.js";
|
||||
import {
|
||||
ensureIndexConfiguration,
|
||||
IndexConfigurationAnnotation,
|
||||
} from "./configuration.js";
|
||||
|
||||
function ensureDocsHaveUserId(
|
||||
docs: Document[],
|
||||
config: RunnableConfig,
|
||||
): Document[] {
|
||||
const configuration = ensureIndexConfiguration(config);
|
||||
const userId = configuration.userId;
|
||||
return docs.map((doc) => {
|
||||
return new Document({
|
||||
pageContent: doc.pageContent,
|
||||
metadata: { ...doc.metadata, user_id: userId },
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function indexDocs(
|
||||
state: typeof IndexStateAnnotation.State,
|
||||
config?: RunnableConfig,
|
||||
): Promise<typeof IndexStateAnnotation.Update> {
|
||||
if (!config) {
|
||||
throw new Error("ConfigurationAnnotation required to run index_docs.");
|
||||
}
|
||||
const docs = state.docs;
|
||||
const retriever = await makeRetriever(config);
|
||||
const stampedDocs = ensureDocsHaveUserId(docs, config);
|
||||
|
||||
await retriever.addDocuments(stampedDocs);
|
||||
return { docs: "delete" };
|
||||
}
|
||||
|
||||
// Define a new graph
|
||||
|
||||
const builder = new StateGraph(
|
||||
IndexStateAnnotation,
|
||||
IndexConfigurationAnnotation,
|
||||
)
|
||||
.addNode("indexDocs", indexDocs)
|
||||
.addEdge("__start__", "indexDocs");
|
||||
|
||||
// Finally, we compile it!
|
||||
// This compiles it into a graph you can invoke and deploy.
|
||||
export const graph = builder.compile();
|
||||
|
||||
graph.name = "Index Graph"; // Customizes the name displayed in LangSmith
|
||||
@@ -0,0 +1,17 @@
|
||||
/**
|
||||
* Default prompts.
|
||||
*/
|
||||
|
||||
export const RESPONSE_SYSTEM_PROMPT_TEMPLATE = `You are a helpful AI assistant. Answer the user's questions based on the retrieved documents.
|
||||
|
||||
{retrievedDocs}
|
||||
|
||||
System time: {systemTime}`;
|
||||
|
||||
export const QUERY_SYSTEM_PROMPT_TEMPLATE = `Generate search queries to retrieve documents that may help answer the user's question. Previously, you made the following queries:
|
||||
|
||||
<previous_queries/>
|
||||
{queries}
|
||||
</previous_queries>
|
||||
|
||||
System time: {systemTime}`;
|
||||
@@ -0,0 +1,156 @@
|
||||
import { Client } from "@elastic/elasticsearch";
|
||||
import { ElasticVectorSearch } from "@langchain/community/vectorstores/elasticsearch";
|
||||
import { RunnableConfig } from "@langchain/core/runnables";
|
||||
import { VectorStoreRetriever } from "@langchain/core/vectorstores";
|
||||
import { MongoDBAtlasVectorSearch } from "@langchain/mongodb";
|
||||
import { PineconeStore } from "@langchain/pinecone";
|
||||
import { MongoClient } from "mongodb";
|
||||
import { ensureConfiguration } from "./configuration.js";
|
||||
import { Pinecone as PineconeClient } from "@pinecone-database/pinecone";
|
||||
import { Embeddings } from "@langchain/core/embeddings";
|
||||
import { CohereEmbeddings } from "@langchain/cohere";
|
||||
import { OpenAIEmbeddings } from "@langchain/openai";
|
||||
|
||||
async function makeElasticRetriever(
|
||||
configuration: ReturnType<typeof ensureConfiguration>,
|
||||
embeddingModel: Embeddings,
|
||||
): Promise<VectorStoreRetriever> {
|
||||
const elasticUrl = process.env.ELASTICSEARCH_URL;
|
||||
if (!elasticUrl) {
|
||||
throw new Error("ELASTICSEARCH_URL environment variable is not defined");
|
||||
}
|
||||
|
||||
let auth: { username: string; password: string } | { apiKey: string };
|
||||
if (configuration.retrieverProvider === "elastic-local") {
|
||||
const username = process.env.ELASTICSEARCH_USER;
|
||||
const password = process.env.ELASTICSEARCH_PASSWORD;
|
||||
if (!username || !password) {
|
||||
throw new Error(
|
||||
"ELASTICSEARCH_USER or ELASTICSEARCH_PASSWORD environment variable is not defined",
|
||||
);
|
||||
}
|
||||
auth = { username, password };
|
||||
} else {
|
||||
const apiKey = process.env.ELASTICSEARCH_API_KEY;
|
||||
if (!apiKey) {
|
||||
throw new Error(
|
||||
"ELASTICSEARCH_API_KEY environment variable is not defined",
|
||||
);
|
||||
}
|
||||
auth = { apiKey };
|
||||
}
|
||||
|
||||
const client = new Client({
|
||||
node: elasticUrl,
|
||||
auth,
|
||||
});
|
||||
|
||||
const vectorStore = new ElasticVectorSearch(embeddingModel, {
|
||||
client,
|
||||
indexName: "langchain_index",
|
||||
});
|
||||
const searchKwargs = configuration.searchKwargs || {};
|
||||
const filter = {
|
||||
...searchKwargs,
|
||||
user_id: configuration.userId,
|
||||
};
|
||||
|
||||
return vectorStore.asRetriever({ filter });
|
||||
}
|
||||
|
||||
async function makePineconeRetriever(
|
||||
configuration: ReturnType<typeof ensureConfiguration>,
|
||||
embeddingModel: Embeddings,
|
||||
): Promise<VectorStoreRetriever> {
|
||||
const indexName = process.env.PINECONE_INDEX_NAME;
|
||||
if (!indexName) {
|
||||
throw new Error("PINECONE_INDEX_NAME environment variable is not defined");
|
||||
}
|
||||
const pinecone = new PineconeClient();
|
||||
const pineconeIndex = pinecone.Index(indexName!);
|
||||
const vectorStore = await PineconeStore.fromExistingIndex(embeddingModel, {
|
||||
pineconeIndex,
|
||||
});
|
||||
|
||||
const searchKwargs = configuration.searchKwargs || {};
|
||||
const filter = {
|
||||
...searchKwargs,
|
||||
user_id: configuration.userId,
|
||||
};
|
||||
|
||||
return vectorStore.asRetriever({ filter });
|
||||
}
|
||||
|
||||
async function makeMongoDBRetriever(
|
||||
configuration: ReturnType<typeof ensureConfiguration>,
|
||||
embeddingModel: Embeddings,
|
||||
): Promise<VectorStoreRetriever> {
|
||||
if (!process.env.MONGODB_URI) {
|
||||
throw new Error("MONGODB_URI environment variable is not defined");
|
||||
}
|
||||
const client = new MongoClient(process.env.MONGODB_URI);
|
||||
const namespace = `langgraph_retrieval_agent.${configuration.userId}`;
|
||||
const [dbName, collectionName] = namespace.split(".");
|
||||
const collection = client.db(dbName).collection(collectionName);
|
||||
const vectorStore = new MongoDBAtlasVectorSearch(embeddingModel, {
|
||||
collection: collection,
|
||||
textKey: "text",
|
||||
embeddingKey: "embedding",
|
||||
indexName: "vector_index",
|
||||
});
|
||||
const searchKwargs = { ...configuration.searchKwargs };
|
||||
searchKwargs.preFilter = {
|
||||
...searchKwargs.preFilter,
|
||||
user_id: { $eq: configuration.userId },
|
||||
};
|
||||
return vectorStore.asRetriever({ filter: searchKwargs });
|
||||
}
|
||||
|
||||
function makeTextEmbeddings(modelName: string): Embeddings {
|
||||
/**
|
||||
* Connect to the configured text encoder.
|
||||
*/
|
||||
const index = modelName.indexOf("/");
|
||||
let provider, model;
|
||||
if (index === -1) {
|
||||
model = modelName;
|
||||
provider = "openai"; // Assume openai if no provider included
|
||||
} else {
|
||||
provider = modelName.slice(0, index);
|
||||
model = modelName.slice(index + 1);
|
||||
}
|
||||
switch (provider) {
|
||||
case "openai":
|
||||
return new OpenAIEmbeddings({ model });
|
||||
case "cohere":
|
||||
return new CohereEmbeddings({ model });
|
||||
default:
|
||||
throw new Error(`Unsupported embedding provider: ${provider}`);
|
||||
}
|
||||
}
|
||||
|
||||
export async function makeRetriever(
|
||||
config: RunnableConfig,
|
||||
): Promise<VectorStoreRetriever> {
|
||||
const configuration = ensureConfiguration(config);
|
||||
const embeddingModel = makeTextEmbeddings(configuration.embeddingModel);
|
||||
|
||||
const userId = configuration.userId;
|
||||
if (!userId) {
|
||||
throw new Error("Please provide a valid user_id in the configuration.");
|
||||
}
|
||||
|
||||
switch (configuration.retrieverProvider) {
|
||||
case "elastic":
|
||||
case "elastic-local":
|
||||
return makeElasticRetriever(configuration, embeddingModel);
|
||||
case "pinecone":
|
||||
return makePineconeRetriever(configuration, embeddingModel);
|
||||
case "mongodb":
|
||||
return makeMongoDBRetriever(configuration, embeddingModel);
|
||||
default:
|
||||
throw new Error(
|
||||
`Unrecognized retrieverProvider in configuration: ${configuration.retrieverProvider}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
import { Document } from "@langchain/core/documents";
|
||||
import { BaseMessage } from "@langchain/core/messages";
|
||||
import { Annotation, MessagesAnnotation } from "@langchain/langgraph";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
|
||||
/**
|
||||
* Reduces the document array based on the provided new documents or actions.
|
||||
*
|
||||
* @param existing - The existing array of documents.
|
||||
* @param newDocs - The new documents or actions to apply.
|
||||
* @returns The updated array of documents.
|
||||
*/
|
||||
export function reduceDocs(
|
||||
existing?: Document[],
|
||||
newDocs?:
|
||||
| Document[]
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
| { [key: string]: any }[]
|
||||
| string[]
|
||||
| string
|
||||
| "delete",
|
||||
) {
|
||||
// Supports deletion by returning an empty array when "delete" is specified
|
||||
if (newDocs === "delete") {
|
||||
return [];
|
||||
}
|
||||
// Supports adding a single string document
|
||||
if (typeof newDocs === "string") {
|
||||
const docId = uuidv4();
|
||||
return [{ pageContent: newDocs, metadata: { id: docId }, id: docId }];
|
||||
}
|
||||
// User can provide "docs" content in a few different ways
|
||||
if (Array.isArray(newDocs)) {
|
||||
const coerced: Document[] = [];
|
||||
for (const item of newDocs) {
|
||||
if (typeof item === "string") {
|
||||
coerced.push({ pageContent: item, metadata: { id: uuidv4() } });
|
||||
} else if (typeof item === "object") {
|
||||
const doc = item as Document;
|
||||
const docId = item?.id || uuidv4();
|
||||
item.id = docId;
|
||||
if (!doc.metadata || !doc.metadata.id) {
|
||||
doc.metadata = doc.metadata || {};
|
||||
doc.metadata.id = docId;
|
||||
}
|
||||
coerced.push(doc);
|
||||
}
|
||||
}
|
||||
return coerced;
|
||||
}
|
||||
// Returns existing documents if no valid update is provided
|
||||
return existing || [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines the structure and behavior of the index state.
|
||||
* This state is used to manage the documents in the index.
|
||||
*/
|
||||
export const IndexStateAnnotation = Annotation.Root({
|
||||
/**
|
||||
* Stores the documents in the index.
|
||||
*
|
||||
* @type {Document[]} - An array of Document objects.
|
||||
* @reducer reduceDocs - A function that handles updates to the documents array.
|
||||
* It can add new documents, replace existing ones, or delete all documents.
|
||||
* @default An empty array ([]).
|
||||
* @see reduceDocs for detailed behavior on how updates are processed.
|
||||
*/
|
||||
docs: Annotation<
|
||||
Document[],
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
Document[] | { [key: string]: any }[] | string[] | string | "delete"
|
||||
>({
|
||||
reducer: reduceDocs,
|
||||
default: () => [],
|
||||
}),
|
||||
});
|
||||
|
||||
/**
|
||||
* This narrows the interface with the user.
|
||||
*/
|
||||
export const InputStateAnnotation = Annotation.Root({
|
||||
messages: Annotation<BaseMessage[]>,
|
||||
});
|
||||
|
||||
/**
|
||||
* The State defines three things:
|
||||
* 1. The structure of the graph's state (which "channels" are available to read/write)
|
||||
* 2. The default values for the state's channels
|
||||
* 3. The reducers for the state's channels. Reducers are functions that determine how to apply updates to the state.
|
||||
* See [Reducers](https://langchain-ai.github.io/langgraphjs/concepts/low_level/#reducers) for more information.
|
||||
*/
|
||||
export const StateAnnotation = Annotation.Root({
|
||||
/**
|
||||
* Stores the conversation messages.
|
||||
* @type {BaseMessage[]}
|
||||
* @reducer Default reducer that appends new messages to the existing ones.
|
||||
* @default An empty array.
|
||||
*
|
||||
* Nodes can return a list of "MessageLike" objects, which can be LangChain messages
|
||||
* or dictionaries following a common message format.
|
||||
*
|
||||
* To delete messages, use RemoveMessage.
|
||||
* @see https://langchain-ai.github.io/langgraphjs/how-tos/delete-messages/
|
||||
*
|
||||
* For more information, see:
|
||||
* @see https://langchain-ai.github.io/langgraphjs/reference/variables/langgraph.MessagesAnnotation.html
|
||||
*/
|
||||
...MessagesAnnotation.spec,
|
||||
|
||||
/**
|
||||
* Stores the user queries.
|
||||
* @type {string[]}
|
||||
* @reducer A custom reducer function that appends new queries to the existing array.
|
||||
* It handles both single string and string array inputs.
|
||||
* @default An empty array ([]).
|
||||
* @description This annotation manages the list of user queries in the state.
|
||||
* It uses a reducer to add new queries while preserving existing ones.
|
||||
* The reducer supports adding either a single query (string) or multiple queries (string[]).
|
||||
*/
|
||||
queries: Annotation<string[], string | string[]>({
|
||||
reducer: (existing: string[], newQueries: string[] | string) => {
|
||||
/**
|
||||
* This reducer is currently "append only" - it only adds new queries to the existing list.
|
||||
*
|
||||
* To extend this reducer to support more complex operations, you could modify it in ways like this:
|
||||
*
|
||||
* reducer: (existing: string[], action: { type: string; payload: string | string[] }) => {
|
||||
* switch (action.type) {
|
||||
* case 'ADD':
|
||||
* return [...existing, ...(Array.isArray(action.payload) ? action.payload : [action.payload])];
|
||||
* case 'DELETE':
|
||||
* return existing.filter(query => query !== action.payload);
|
||||
* case 'REPLACE':
|
||||
* return Array.isArray(action.payload) ? action.payload : [action.payload];
|
||||
* default:
|
||||
* return existing;
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
return [
|
||||
...existing,
|
||||
...(Array.isArray(newQueries) ? newQueries : [newQueries]),
|
||||
];
|
||||
},
|
||||
default: () => [],
|
||||
}),
|
||||
|
||||
/**
|
||||
* Stores the retrieved documents.
|
||||
* @type {Document[]}
|
||||
*/
|
||||
retrievedDocs: Annotation<Document[]>,
|
||||
});
|
||||
@@ -0,0 +1,4 @@
|
||||
import { describe, it } from "@jest/globals";
|
||||
describe("Researcher", () => {
|
||||
it("Simple runthrough", async () => {}, 100_000);
|
||||
});
|
||||
@@ -0,0 +1,4 @@
|
||||
import { describe, it } from "@jest/globals";
|
||||
describe("Routers", () => {
|
||||
it("Test route", async () => {}, 100_000);
|
||||
});
|
||||
@@ -0,0 +1,57 @@
|
||||
import { BaseMessage } from "@langchain/core/messages";
|
||||
import { Document } from "langchain/document";
|
||||
|
||||
import { BaseChatModel } from "@langchain/core/language_models/chat_models";
|
||||
import { initChatModel } from "langchain/chat_models/universal";
|
||||
|
||||
export function getMessageText(msg: BaseMessage): string {
|
||||
/** Get the text content of a message. */
|
||||
const content = msg.content;
|
||||
if (typeof content === "string") {
|
||||
return content;
|
||||
} else {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const txts = (content as any[]).map((c) =>
|
||||
typeof c === "string" ? c : c.text || "",
|
||||
);
|
||||
return txts.join("").trim();
|
||||
}
|
||||
}
|
||||
|
||||
export function formatDoc(doc: Document): string {
|
||||
const metadata = doc.metadata || {};
|
||||
const meta = Object.entries(metadata)
|
||||
.map(([k, v]) => ` ${k}=${v}`)
|
||||
.join("");
|
||||
const metaStr = meta ? ` ${meta}` : "";
|
||||
|
||||
return `<document${metaStr}>\n${doc.pageContent}\n</document>`;
|
||||
}
|
||||
|
||||
export function formatDocs(docs?: Document[]): string {
|
||||
/**Format a list of documents as XML. */
|
||||
if (!docs || docs.length === 0) {
|
||||
return "<documents></documents>";
|
||||
}
|
||||
const formatted = docs.map(formatDoc).join("\n");
|
||||
return `<documents>\n${formatted}\n</documents>`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a chat model from a fully specified name.
|
||||
* @param fullySpecifiedName - String in the format 'provider/model' or 'provider/account/provider/model'.
|
||||
* @returns A Promise that resolves to a BaseChatModel instance.
|
||||
*/
|
||||
export async function loadChatModel(
|
||||
fullySpecifiedName: string,
|
||||
): Promise<BaseChatModel> {
|
||||
const index = fullySpecifiedName.indexOf("/");
|
||||
if (index === -1) {
|
||||
// If there's no "/", assume it's just the model
|
||||
return await initChatModel(fullySpecifiedName);
|
||||
} else {
|
||||
const provider = fullySpecifiedName.slice(0, index);
|
||||
const model = fullySpecifiedName.slice(index + 1);
|
||||
return await initChatModel(model, { modelProvider: provider });
|
||||
}
|
||||
}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 624 KiB |
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"extends": "@tsconfig/recommended",
|
||||
"compilerOptions": {
|
||||
"target": "ES2021",
|
||||
"lib": ["ES2021", "ES2022.Object", "DOM"],
|
||||
"module": "NodeNext",
|
||||
"moduleResolution": "nodenext",
|
||||
"esModuleInterop": true,
|
||||
"noImplicitReturns": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
"noUnusedLocals": true,
|
||||
"noUnusedParameters": true,
|
||||
"useDefineForClassFields": true,
|
||||
"strictPropertyInitialization": false,
|
||||
"allowJs": true,
|
||||
"strict": true,
|
||||
"strictFunctionTypes": false,
|
||||
"outDir": "dist",
|
||||
"types": ["jest", "node"],
|
||||
"resolveJsonModule": true
|
||||
},
|
||||
"include": ["**/*.ts", "**/*.js"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
Reference in New Issue
Block a user