feat: pg-vector-examples

This commit is contained in:
mayo
2025-01-25 06:21:06 +00:00
parent be39d8225d
commit 956cc046a7
9 changed files with 216 additions and 3 deletions
+67
View File
@@ -0,0 +1,67 @@
# Learning LangChain Code Examples
This repository contains code examples (in python and javascript) from each chapter of the book "Learning LangChain".
To run the examples, you can clone the repository and run the examples in your preferred language.
**For python examples:**
If you haven't installed python on your system, install it first as per the instructions [here](https://www.python.org/downloads/).
1. Create a virtual environment:
This command creates a directory named `.venv` containing the virtual environment.
```bash
python -m venv .venv
```
2. Activate the virtual environment:
- MacOs/Linux:
```bash
source .venv/bin/activate
```
- Windows:
```bash
.venv\Scripts\activate
```
After activation, your terminal prompt should prefix with (venv), indicating that the virtual environment is active.
3. Install the dependencies in the `pyproject.toml` file:
```bash
pip install -e .
```
4. Verify the installation:
```bash
pip list
```
5. Run an example to see the output:
```bash
python ch2/py/a-text-loader.py
```
**For javascript examples:**
If you haven't installed node on your system, install it first as per the instructions [here](https://nodejs.org/en/download/).
1. Install the dependencies in the `package.json` file:
```bash
npm install
```
2. Run the example to see the output:
```bash
node ch2/js/a-text-loader.js
```
+68
View File
@@ -0,0 +1,68 @@
/**
1. Ensure docker is installed and running (https://docs.docker.com/get-docker/)
2. Run the following command to start the postgres container:
docker run \
--name pgvector-container \
-e POSTGRES_USER=langchain \
-e POSTGRES_PASSWORD=langchain \
-e POSTGRES_DB=langchain \
-p 6024:5432 \
-d pgvector/pgvector:pg16
3. Use the connection string below for the postgres container
*/
import { TextLoader } from 'langchain/document_loaders/fs/text';
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
import { OpenAIEmbeddings } from '@langchain/openai';
import { PGVectorStore } from '@langchain/community/vectorstores/pgvector';
import { v4 as uuidv4 } from 'uuid';
const connectionString =
'postgresql://langchain:langchain@localhost:6024/langchain';
// Load the document, split it into chunks
const loader = new TextLoader('./test.txt');
const raw_docs = await loader.load();
const splitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkOverlap: 200,
});
const docs = await splitter.splitDocuments(raw_docs);
// embed each chunk and insert it into the vector store
const model = new OpenAIEmbeddings();
const db = await PGVectorStore.fromDocuments(docs, model, {
postgresConnectionOptions: {
connectionString,
},
});
console.log('Vector store created successfully');
const results = await db.similaritySearch('query', 4);
console.log(`Similarity search results: ${JSON.stringify(results)}`);
console.log('Adding documents to the vector store');
const ids = [uuidv4(), uuidv4()];
await db.addDocuments(
[
{
pageContent: 'there are cats in the pond',
metadata: { location: 'pond', topic: 'animals' },
},
{
pageContent: 'ducks are also found in the pond',
metadata: { location: 'pond', topic: 'animals' },
},
],
{ ids }
);
console.log('Documents added successfully');
await db.delete({ ids: [ids[1]] });
console.log('second document deleted successfully');
+8
View File
@@ -1,3 +1,11 @@
"""
Install the beautifulsoup4 package:
```bash
pip install beautifulsoup4
```
"""
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader('https://www.langchain.com/')
+2
View File
@@ -7,3 +7,5 @@ docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splitted_docs = splitter.split_documents(docs)
print(splitted_docs)
+1 -1
View File
@@ -1,6 +1,6 @@
from langchain_openai import OpenAIEmbeddings
model = OpenAIEmbeddings()
model = OpenAIEmbeddings(model="text-embedding-3-small")
embeddings = model.embed_documents([
"Hi there!",
"Oh, hello!",
+2 -2
View File
@@ -11,9 +11,9 @@ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(doc)
# Generate embeddings
embeddings_model = OpenAIEmbeddings()
embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small")
embeddings = embeddings_model.embed_documents(
chunk.page_content for chunk in chunks
[chunk.page_content for chunk in chunks]
)
print(embeddings)
+65
View File
@@ -0,0 +1,65 @@
"""
1. Ensure docker is installed and running (https://docs.docker.com/get-docker/)
2. pip install -qU langchain_postgres
3. Run the following command to start the postgres container:
docker run \
--name pgvector-container \
-e POSTGRES_USER=langchain \
-e POSTGRES_PASSWORD=langchain \
-e POSTGRES_DB=langchain \
-p 6024:5432 \
-d pgvector/pgvector:pg16
4. Use the connection string below for the postgres container
"""
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_postgres.vectorstores import PGVector
from langchain_core.documents import Document
import uuid
# See docker command above to launch a postgres instance with pgvector enabled.
connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain"
# Load the document, split it into chunks
raw_documents = TextLoader('./test.txt').load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(raw_documents)
# Create embeddings for the documents
embeddings_model = OpenAIEmbeddings()
db = PGVector.from_documents(
documents, embeddings_model, connection=connection)
results = db.similarity_search("query", k=4)
print(results)
print("Adding documents to the vector store")
ids = [str(uuid.uuid4()), str(uuid.uuid4())]
db.add_documents(
[
Document(
page_content="there are cats in the pond",
metadata={"location": "pond", "topic": "animals"},
),
Document(
page_content="ducks are also found in the pond",
metadata={"location": "pond", "topic": "animals"},
),
],
ids=ids,
)
print("Documents added successfully")
print("Deleting document with id", ids[1])
db.delete({"ids": [ids[1]]})
print("Document deleted successfully")
+3
View File
@@ -12,6 +12,9 @@ dependencies = [
"langchain>=0.2.14",
"python-dotenv>=1.0.1",
"langchain-community>=0.3.15",
"langchain-postgres>=0.0.12",
"beautifulsoup4>=4.12.2",
"pypdf>=5.1.0",
]
[build-system]
BIN
View File
Binary file not shown.