mirror of
https://github.com/Mintplex-Labs/langchain-python.git
synced 2026-07-01 19:54:43 -04:00
867ca6d0be
Fixes #14342 @eyurtsev @baskaryan --------- Co-authored-by: Erick Friis <erick@langchain.dev>
41 lines
1.3 KiB
Python
41 lines
1.3 KiB
Python
from typing import Any, List, Sequence
|
|
|
|
from langchain_core.documents import Document
|
|
|
|
from langchain.retrievers import ParentDocumentRetriever
|
|
from langchain.storage import InMemoryStore
|
|
from langchain.text_splitter import CharacterTextSplitter
|
|
from tests.unit_tests.indexes.test_indexing import InMemoryVectorStore
|
|
|
|
|
|
class InMemoryVectorstoreWithSearch(InMemoryVectorStore):
|
|
def similarity_search(
|
|
self, query: str, k: int = 4, **kwargs: Any
|
|
) -> List[Document]:
|
|
res = self.store.get(query)
|
|
if res is None:
|
|
return []
|
|
return [res]
|
|
|
|
def add_documents(self, documents: Sequence[Document], **kwargs: Any) -> List[str]:
|
|
print(documents)
|
|
return super().add_documents(
|
|
documents, ids=[f"{i}" for i in range(len(documents))]
|
|
)
|
|
|
|
|
|
def test_parent_document_retriever_initialization() -> None:
|
|
vectorstore = InMemoryVectorstoreWithSearch()
|
|
store = InMemoryStore()
|
|
child_splitter = CharacterTextSplitter(chunk_size=400)
|
|
documents = [Document(page_content="test document")]
|
|
retriever = ParentDocumentRetriever(
|
|
vectorstore=vectorstore,
|
|
docstore=store,
|
|
child_splitter=child_splitter,
|
|
)
|
|
retriever.add_documents(documents)
|
|
results = retriever.invoke("0")
|
|
assert len(results) > 0
|
|
assert results[0].page_content == "test document"
|