feat: pg-vector-examples

2026-07-01 16:06:32 -04:00 · 2025-01-25 06:21:06 +00:00
parent be39d8225d
commit 956cc046a7
9 changed files with 216 additions and 3 deletions
@@ -0,0 +1,67 @@
+# Learning LangChain Code Examples
+
+This repository contains code examples (in python and javascript) from each chapter of the book "Learning LangChain".
+
+To run the examples, you can clone the repository and run the examples in your preferred language.
+
+**For python examples:**
+
+If you haven't installed python on your system, install it first as per the instructions [here](https://www.python.org/downloads/).
+
+1. Create a virtual environment:
+
+This command creates a directory named `.venv` containing the virtual environment.
+
+```bash
+python -m venv .venv
+```
+
+2. Activate the virtual environment:
+
+- MacOs/Linux:
+
+```bash
+source .venv/bin/activate
+```
+
+- Windows:
+
+```bash
+.venv\Scripts\activate
+```
+
+After activation, your terminal prompt should prefix with (venv), indicating that the virtual environment is active.
+
+3. Install the dependencies in the `pyproject.toml` file:
+
+```bash
+pip install -e .
+```
+
+4. Verify the installation:
+
+```bash
+pip list
+```
+
+5. Run an example to see the output:
+
+```bash
+python ch2/py/a-text-loader.py
+```
+
+**For javascript examples:**
+
+If you haven't installed node on your system, install it first as per the instructions [here](https://nodejs.org/en/download/).
+
+1. Install the dependencies in the `package.json` file:
+
+```bash
+npm install
+```
+
+2. Run the example to see the output:
+
+```bash
+node ch2/js/a-text-loader.js
+```
@@ -0,0 +1,68 @@
+/** 
+1. Ensure docker is installed and running (https://docs.docker.com/get-docker/)
+2. Run the following command to start the postgres container:
+   
+docker run \
+    --name pgvector-container \
+    -e POSTGRES_USER=langchain \
+    -e POSTGRES_PASSWORD=langchain \
+    -e POSTGRES_DB=langchain \
+    -p 6024:5432 \
+    -d pgvector/pgvector:pg16
+3. Use the connection string below for the postgres container
+*/
+
+import { TextLoader } from 'langchain/document_loaders/fs/text';
+import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
+import { OpenAIEmbeddings } from '@langchain/openai';
+import { PGVectorStore } from '@langchain/community/vectorstores/pgvector';
+import { v4 as uuidv4 } from 'uuid';
+
+const connectionString =
+  'postgresql://langchain:langchain@localhost:6024/langchain';
+// Load the document, split it into chunks
+const loader = new TextLoader('./test.txt');
+const raw_docs = await loader.load();
+const splitter = new RecursiveCharacterTextSplitter({
+  chunkSize: 1000,
+  chunkOverlap: 200,
+});
+const docs = await splitter.splitDocuments(raw_docs);
+
+// embed each chunk and insert it into the vector store
+const model = new OpenAIEmbeddings();
+const db = await PGVectorStore.fromDocuments(docs, model, {
+  postgresConnectionOptions: {
+    connectionString,
+  },
+});
+
+console.log('Vector store created successfully');
+
+const results = await db.similaritySearch('query', 4);
+
+console.log(`Similarity search results: ${JSON.stringify(results)}`);
+
+console.log('Adding documents to the vector store');
+
+const ids = [uuidv4(), uuidv4()];
+
+await db.addDocuments(
+  [
+    {
+      pageContent: 'there are cats in the pond',
+      metadata: { location: 'pond', topic: 'animals' },
+    },
+    {
+      pageContent: 'ducks are also found in the pond',
+      metadata: { location: 'pond', topic: 'animals' },
+    },
+  ],
+  { ids }
+);
+
+console.log('Documents added successfully');
+
+await db.delete({ ids: [ids[1]] });
+
+console.log('second document deleted successfully');
@@ -1,3 +1,11 @@
+"""
+Install the beautifulsoup4 package:
+
+```bash
+pip install beautifulsoup4
+```
+"""
+
 from langchain_community.document_loaders import WebBaseLoader

 loader = WebBaseLoader('https://www.langchain.com/')
@@ -7,3 +7,5 @@ docs = loader.load()

 splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
 splitted_docs = splitter.split_documents(docs)
+
+print(splitted_docs)
@@ -1,6 +1,6 @@
 from langchain_openai import OpenAIEmbeddings

-model = OpenAIEmbeddings()
+model = OpenAIEmbeddings(model="text-embedding-3-small")
 embeddings = model.embed_documents([
    "Hi there!",
    "Oh, hello!",
@@ -11,9 +11,9 @@ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
 chunks = splitter.split_documents(doc)

 # Generate embeddings
-embeddings_model = OpenAIEmbeddings()
+embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small")
 embeddings = embeddings_model.embed_documents(
-    chunk.page_content for chunk in chunks
+    [chunk.page_content for chunk in chunks]
 )

 print(embeddings)
@@ -0,0 +1,65 @@
+"""
+1. Ensure docker is installed and running (https://docs.docker.com/get-docker/)
+2. pip install -qU langchain_postgres
+3. Run the following command to start the postgres container:
+   
+docker run \
+    --name pgvector-container \
+    -e POSTGRES_USER=langchain \
+    -e POSTGRES_PASSWORD=langchain \
+    -e POSTGRES_DB=langchain \
+    -p 6024:5432 \
+    -d pgvector/pgvector:pg16
+4. Use the connection string below for the postgres container
+
+"""
+
+from langchain_community.document_loaders import TextLoader
+from langchain_openai import OpenAIEmbeddings
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_postgres.vectorstores import PGVector
+from langchain_core.documents import Document
+import uuid
+
+
+# See docker command above to launch a postgres instance with pgvector enabled.
+connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain"
+
+# Load the document, split it into chunks
+raw_documents = TextLoader('./test.txt').load()
+text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size=1000, chunk_overlap=200)
+documents = text_splitter.split_documents(raw_documents)
+
+# Create embeddings for the documents
+embeddings_model = OpenAIEmbeddings()
+
+db = PGVector.from_documents(
+    documents, embeddings_model, connection=connection)
+
+results = db.similarity_search("query", k=4)
+
+print(results)
+
+print("Adding documents to the vector store")
+ids = [str(uuid.uuid4()), str(uuid.uuid4())]
+db.add_documents(
+    [
+        Document(
+            page_content="there are cats in the pond",
+            metadata={"location": "pond", "topic": "animals"},
+        ),
+        Document(
+            page_content="ducks are also found in the pond",
+            metadata={"location": "pond", "topic": "animals"},
+        ),
+    ],
+    ids=ids,
+)
+
+print("Documents added successfully")
+
+print("Deleting document with id", ids[1])
+db.delete({"ids": [ids[1]]})
+
+print("Document deleted successfully")
@@ -12,6 +12,9 @@ dependencies = [
    "langchain>=0.2.14",
    "python-dotenv>=1.0.1",
    "langchain-community>=0.3.15",
+    "langchain-postgres>=0.0.12",
+    "beautifulsoup4>=4.12.2",
+    "pypdf>=5.1.0",
 ]

 [build-system]