mirror of
https://github.com/langchain-ai/learning-langchain.git
synced 2026-07-01 16:06:32 -04:00
feat: pg-vector-examples
This commit is contained in:
@@ -0,0 +1,67 @@
|
||||
# Learning LangChain Code Examples
|
||||
|
||||
This repository contains code examples (in python and javascript) from each chapter of the book "Learning LangChain".
|
||||
|
||||
To run the examples, you can clone the repository and run the examples in your preferred language.
|
||||
|
||||
**For python examples:**
|
||||
|
||||
If you haven't installed python on your system, install it first as per the instructions [here](https://www.python.org/downloads/).
|
||||
|
||||
1. Create a virtual environment:
|
||||
|
||||
This command creates a directory named `.venv` containing the virtual environment.
|
||||
|
||||
```bash
|
||||
python -m venv .venv
|
||||
```
|
||||
|
||||
2. Activate the virtual environment:
|
||||
|
||||
- MacOs/Linux:
|
||||
|
||||
```bash
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
- Windows:
|
||||
|
||||
```bash
|
||||
.venv\Scripts\activate
|
||||
```
|
||||
|
||||
After activation, your terminal prompt should prefix with (venv), indicating that the virtual environment is active.
|
||||
|
||||
3. Install the dependencies in the `pyproject.toml` file:
|
||||
|
||||
```bash
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
4. Verify the installation:
|
||||
|
||||
```bash
|
||||
pip list
|
||||
```
|
||||
|
||||
5. Run an example to see the output:
|
||||
|
||||
```bash
|
||||
python ch2/py/a-text-loader.py
|
||||
```
|
||||
|
||||
**For javascript examples:**
|
||||
|
||||
If you haven't installed node on your system, install it first as per the instructions [here](https://nodejs.org/en/download/).
|
||||
|
||||
1. Install the dependencies in the `package.json` file:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
2. Run the example to see the output:
|
||||
|
||||
```bash
|
||||
node ch2/js/a-text-loader.js
|
||||
```
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
/**
|
||||
1. Ensure docker is installed and running (https://docs.docker.com/get-docker/)
|
||||
2. Run the following command to start the postgres container:
|
||||
|
||||
docker run \
|
||||
--name pgvector-container \
|
||||
-e POSTGRES_USER=langchain \
|
||||
-e POSTGRES_PASSWORD=langchain \
|
||||
-e POSTGRES_DB=langchain \
|
||||
-p 6024:5432 \
|
||||
-d pgvector/pgvector:pg16
|
||||
3. Use the connection string below for the postgres container
|
||||
*/
|
||||
|
||||
import { TextLoader } from 'langchain/document_loaders/fs/text';
|
||||
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
||||
import { OpenAIEmbeddings } from '@langchain/openai';
|
||||
import { PGVectorStore } from '@langchain/community/vectorstores/pgvector';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
|
||||
const connectionString =
|
||||
'postgresql://langchain:langchain@localhost:6024/langchain';
|
||||
// Load the document, split it into chunks
|
||||
const loader = new TextLoader('./test.txt');
|
||||
const raw_docs = await loader.load();
|
||||
const splitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize: 1000,
|
||||
chunkOverlap: 200,
|
||||
});
|
||||
const docs = await splitter.splitDocuments(raw_docs);
|
||||
|
||||
// embed each chunk and insert it into the vector store
|
||||
const model = new OpenAIEmbeddings();
|
||||
const db = await PGVectorStore.fromDocuments(docs, model, {
|
||||
postgresConnectionOptions: {
|
||||
connectionString,
|
||||
},
|
||||
});
|
||||
|
||||
console.log('Vector store created successfully');
|
||||
|
||||
const results = await db.similaritySearch('query', 4);
|
||||
|
||||
console.log(`Similarity search results: ${JSON.stringify(results)}`);
|
||||
|
||||
console.log('Adding documents to the vector store');
|
||||
|
||||
const ids = [uuidv4(), uuidv4()];
|
||||
|
||||
await db.addDocuments(
|
||||
[
|
||||
{
|
||||
pageContent: 'there are cats in the pond',
|
||||
metadata: { location: 'pond', topic: 'animals' },
|
||||
},
|
||||
{
|
||||
pageContent: 'ducks are also found in the pond',
|
||||
metadata: { location: 'pond', topic: 'animals' },
|
||||
},
|
||||
],
|
||||
{ ids }
|
||||
);
|
||||
|
||||
console.log('Documents added successfully');
|
||||
|
||||
await db.delete({ ids: [ids[1]] });
|
||||
|
||||
console.log('second document deleted successfully');
|
||||
@@ -1,3 +1,11 @@
|
||||
"""
|
||||
Install the beautifulsoup4 package:
|
||||
|
||||
```bash
|
||||
pip install beautifulsoup4
|
||||
```
|
||||
"""
|
||||
|
||||
from langchain_community.document_loaders import WebBaseLoader
|
||||
|
||||
loader = WebBaseLoader('https://www.langchain.com/')
|
||||
|
||||
@@ -7,3 +7,5 @@ docs = loader.load()
|
||||
|
||||
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
||||
splitted_docs = splitter.split_documents(docs)
|
||||
|
||||
print(splitted_docs)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
|
||||
model = OpenAIEmbeddings()
|
||||
model = OpenAIEmbeddings(model="text-embedding-3-small")
|
||||
embeddings = model.embed_documents([
|
||||
"Hi there!",
|
||||
"Oh, hello!",
|
||||
|
||||
@@ -11,9 +11,9 @@ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
||||
chunks = splitter.split_documents(doc)
|
||||
|
||||
# Generate embeddings
|
||||
embeddings_model = OpenAIEmbeddings()
|
||||
embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small")
|
||||
embeddings = embeddings_model.embed_documents(
|
||||
chunk.page_content for chunk in chunks
|
||||
[chunk.page_content for chunk in chunks]
|
||||
)
|
||||
|
||||
print(embeddings)
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
"""
|
||||
1. Ensure docker is installed and running (https://docs.docker.com/get-docker/)
|
||||
2. pip install -qU langchain_postgres
|
||||
3. Run the following command to start the postgres container:
|
||||
|
||||
docker run \
|
||||
--name pgvector-container \
|
||||
-e POSTGRES_USER=langchain \
|
||||
-e POSTGRES_PASSWORD=langchain \
|
||||
-e POSTGRES_DB=langchain \
|
||||
-p 6024:5432 \
|
||||
-d pgvector/pgvector:pg16
|
||||
4. Use the connection string below for the postgres container
|
||||
|
||||
"""
|
||||
|
||||
from langchain_community.document_loaders import TextLoader
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
from langchain_postgres.vectorstores import PGVector
|
||||
from langchain_core.documents import Document
|
||||
import uuid
|
||||
|
||||
|
||||
# See docker command above to launch a postgres instance with pgvector enabled.
|
||||
connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain"
|
||||
|
||||
# Load the document, split it into chunks
|
||||
raw_documents = TextLoader('./test.txt').load()
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=1000, chunk_overlap=200)
|
||||
documents = text_splitter.split_documents(raw_documents)
|
||||
|
||||
# Create embeddings for the documents
|
||||
embeddings_model = OpenAIEmbeddings()
|
||||
|
||||
db = PGVector.from_documents(
|
||||
documents, embeddings_model, connection=connection)
|
||||
|
||||
results = db.similarity_search("query", k=4)
|
||||
|
||||
print(results)
|
||||
|
||||
print("Adding documents to the vector store")
|
||||
ids = [str(uuid.uuid4()), str(uuid.uuid4())]
|
||||
db.add_documents(
|
||||
[
|
||||
Document(
|
||||
page_content="there are cats in the pond",
|
||||
metadata={"location": "pond", "topic": "animals"},
|
||||
),
|
||||
Document(
|
||||
page_content="ducks are also found in the pond",
|
||||
metadata={"location": "pond", "topic": "animals"},
|
||||
),
|
||||
],
|
||||
ids=ids,
|
||||
)
|
||||
|
||||
print("Documents added successfully")
|
||||
|
||||
print("Deleting document with id", ids[1])
|
||||
db.delete({"ids": [ids[1]]})
|
||||
|
||||
print("Document deleted successfully")
|
||||
@@ -12,6 +12,9 @@ dependencies = [
|
||||
"langchain>=0.2.14",
|
||||
"python-dotenv>=1.0.1",
|
||||
"langchain-community>=0.3.15",
|
||||
"langchain-postgres>=0.0.12",
|
||||
"beautifulsoup4>=4.12.2",
|
||||
"pypdf>=5.1.0",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
|
||||
Reference in New Issue
Block a user