diff --git a/README.md b/README.md index e69de29..ee89a86 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,67 @@ +# Learning LangChain Code Examples + +This repository contains code examples (in python and javascript) from each chapter of the book "Learning LangChain". + +To run the examples, you can clone the repository and run the examples in your preferred language. + +**For python examples:** + +If you haven't installed python on your system, install it first as per the instructions [here](https://www.python.org/downloads/). + +1. Create a virtual environment: + +This command creates a directory named `.venv` containing the virtual environment. + +```bash +python -m venv .venv +``` + +2. Activate the virtual environment: + +- MacOs/Linux: + +```bash +source .venv/bin/activate +``` + +- Windows: + +```bash +.venv\Scripts\activate +``` + +After activation, your terminal prompt should prefix with (venv), indicating that the virtual environment is active. + +3. Install the dependencies in the `pyproject.toml` file: + +```bash +pip install -e . +``` + +4. Verify the installation: + +```bash +pip list +``` + +5. Run an example to see the output: + +```bash +python ch2/py/a-text-loader.py +``` + +**For javascript examples:** + +If you haven't installed node on your system, install it first as per the instructions [here](https://nodejs.org/en/download/). + +1. Install the dependencies in the `package.json` file: + +```bash +npm install +``` + +2. Run the example to see the output: + +```bash +node ch2/js/a-text-loader.js +``` diff --git a/ch2/js/i-pg-vector.js b/ch2/js/i-pg-vector.js new file mode 100644 index 0000000..2bd94d8 --- /dev/null +++ b/ch2/js/i-pg-vector.js @@ -0,0 +1,68 @@ +/** +1. Ensure docker is installed and running (https://docs.docker.com/get-docker/) +2. Run the following command to start the postgres container: + +docker run \ + --name pgvector-container \ + -e POSTGRES_USER=langchain \ + -e POSTGRES_PASSWORD=langchain \ + -e POSTGRES_DB=langchain \ + -p 6024:5432 \ + -d pgvector/pgvector:pg16 +3. Use the connection string below for the postgres container +*/ + +import { TextLoader } from 'langchain/document_loaders/fs/text'; +import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; +import { OpenAIEmbeddings } from '@langchain/openai'; +import { PGVectorStore } from '@langchain/community/vectorstores/pgvector'; +import { v4 as uuidv4 } from 'uuid'; + +const connectionString = + 'postgresql://langchain:langchain@localhost:6024/langchain'; +// Load the document, split it into chunks +const loader = new TextLoader('./test.txt'); +const raw_docs = await loader.load(); +const splitter = new RecursiveCharacterTextSplitter({ + chunkSize: 1000, + chunkOverlap: 200, +}); +const docs = await splitter.splitDocuments(raw_docs); + +// embed each chunk and insert it into the vector store +const model = new OpenAIEmbeddings(); +const db = await PGVectorStore.fromDocuments(docs, model, { + postgresConnectionOptions: { + connectionString, + }, +}); + +console.log('Vector store created successfully'); + +const results = await db.similaritySearch('query', 4); + +console.log(`Similarity search results: ${JSON.stringify(results)}`); + +console.log('Adding documents to the vector store'); + +const ids = [uuidv4(), uuidv4()]; + +await db.addDocuments( + [ + { + pageContent: 'there are cats in the pond', + metadata: { location: 'pond', topic: 'animals' }, + }, + { + pageContent: 'ducks are also found in the pond', + metadata: { location: 'pond', topic: 'animals' }, + }, + ], + { ids } +); + +console.log('Documents added successfully'); + +await db.delete({ ids: [ids[1]] }); + +console.log('second document deleted successfully'); diff --git a/ch2/py/b-web-loader.py b/ch2/py/b-web-loader.py index a983ab8..f5e8fa3 100644 --- a/ch2/py/b-web-loader.py +++ b/ch2/py/b-web-loader.py @@ -1,3 +1,11 @@ +""" +Install the beautifulsoup4 package: + +```bash +pip install beautifulsoup4 +``` +""" + from langchain_community.document_loaders import WebBaseLoader loader = WebBaseLoader('https://www.langchain.com/') diff --git a/ch2/py/d-rec-text-splitter.py b/ch2/py/d-rec-text-splitter.py index 6f8034c..fff69e4 100644 --- a/ch2/py/d-rec-text-splitter.py +++ b/ch2/py/d-rec-text-splitter.py @@ -7,3 +7,5 @@ docs = loader.load() splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) splitted_docs = splitter.split_documents(docs) + +print(splitted_docs) diff --git a/ch2/py/g-embeddings.py b/ch2/py/g-embeddings.py index eba81b8..c7660d4 100644 --- a/ch2/py/g-embeddings.py +++ b/ch2/py/g-embeddings.py @@ -1,6 +1,6 @@ from langchain_openai import OpenAIEmbeddings -model = OpenAIEmbeddings() +model = OpenAIEmbeddings(model="text-embedding-3-small") embeddings = model.embed_documents([ "Hi there!", "Oh, hello!", diff --git a/ch2/py/h-load-split-embed.py b/ch2/py/h-load-split-embed.py index 761fc9e..9193132 100644 --- a/ch2/py/h-load-split-embed.py +++ b/ch2/py/h-load-split-embed.py @@ -11,9 +11,9 @@ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) chunks = splitter.split_documents(doc) # Generate embeddings -embeddings_model = OpenAIEmbeddings() +embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small") embeddings = embeddings_model.embed_documents( - chunk.page_content for chunk in chunks + [chunk.page_content for chunk in chunks] ) print(embeddings) diff --git a/ch2/py/i-pg-vector.py b/ch2/py/i-pg-vector.py new file mode 100644 index 0000000..749e03e --- /dev/null +++ b/ch2/py/i-pg-vector.py @@ -0,0 +1,65 @@ +""" +1. Ensure docker is installed and running (https://docs.docker.com/get-docker/) +2. pip install -qU langchain_postgres +3. Run the following command to start the postgres container: + +docker run \ + --name pgvector-container \ + -e POSTGRES_USER=langchain \ + -e POSTGRES_PASSWORD=langchain \ + -e POSTGRES_DB=langchain \ + -p 6024:5432 \ + -d pgvector/pgvector:pg16 +4. Use the connection string below for the postgres container + +""" + +from langchain_community.document_loaders import TextLoader +from langchain_openai import OpenAIEmbeddings +from langchain_text_splitters import RecursiveCharacterTextSplitter +from langchain_postgres.vectorstores import PGVector +from langchain_core.documents import Document +import uuid + + +# See docker command above to launch a postgres instance with pgvector enabled. +connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain" + +# Load the document, split it into chunks +raw_documents = TextLoader('./test.txt').load() +text_splitter = RecursiveCharacterTextSplitter( + chunk_size=1000, chunk_overlap=200) +documents = text_splitter.split_documents(raw_documents) + +# Create embeddings for the documents +embeddings_model = OpenAIEmbeddings() + +db = PGVector.from_documents( + documents, embeddings_model, connection=connection) + +results = db.similarity_search("query", k=4) + +print(results) + +print("Adding documents to the vector store") +ids = [str(uuid.uuid4()), str(uuid.uuid4())] +db.add_documents( + [ + Document( + page_content="there are cats in the pond", + metadata={"location": "pond", "topic": "animals"}, + ), + Document( + page_content="ducks are also found in the pond", + metadata={"location": "pond", "topic": "animals"}, + ), + ], + ids=ids, +) + +print("Documents added successfully") + +print("Deleting document with id", ids[1]) +db.delete({"ids": [ids[1]]}) + +print("Document deleted successfully") diff --git a/pyproject.toml b/pyproject.toml index 8824331..7745ebd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,9 @@ dependencies = [ "langchain>=0.2.14", "python-dotenv>=1.0.1", "langchain-community>=0.3.15", + "langchain-postgres>=0.0.12", + "beautifulsoup4>=4.12.2", + "pypdf>=5.1.0", ] [build-system] diff --git a/test.pdf b/test.pdf new file mode 100644 index 0000000..d465872 Binary files /dev/null and b/test.pdf differ