Files
learning-langchain/ch2/py/h-load-split-embed.py
T
2025-01-25 06:21:06 +00:00

20 lines
592 B
Python

from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
# Load the document
loader = TextLoader("./test.txt")
doc = loader.load()
# Split the document
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(doc)
# Generate embeddings
embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small")
embeddings = embeddings_model.embed_documents(
[chunk.page_content for chunk in chunks]
)
print(embeddings)