mirror of
https://github.com/langchain-ai/langchain-milvus.git
synced 2026-07-01 19:54:58 -04:00
.
This commit is contained in:
@@ -21,26 +21,35 @@ class MilvusCollectionHybridSearchRetriever(BaseRetriever):
|
||||
|
||||
collection: Collection
|
||||
"""Milvus Collection object."""
|
||||
|
||||
rerank: BaseRanker
|
||||
"""Milvus ranker object. Such as WeightedRanker or RRFRanker."""
|
||||
|
||||
anns_fields: List[str]
|
||||
"""The names of vector fields that are used for ANNS search."""
|
||||
|
||||
field_embeddings: List[Union[Embeddings, BaseSparseEmbedding]]
|
||||
"""The embedding functions of each vector fields,
|
||||
which can be either Embeddings or BaseSparseEmbedding."""
|
||||
|
||||
field_search_params: Optional[List[Dict]] = None
|
||||
"""The search parameters of each vector fields.
|
||||
If not specified, the default search parameters will be used."""
|
||||
|
||||
field_limits: Optional[List[int]] = None
|
||||
"""Limit number of results for each ANNS field.
|
||||
If not specified, the default top_k will be used."""
|
||||
|
||||
field_exprs: Optional[List[Optional[str]]] = None
|
||||
"""The boolean expression for filtering the search results."""
|
||||
|
||||
top_k: int = 4
|
||||
"""Final top-K number of documents to retrieve."""
|
||||
|
||||
text_field: str = "text"
|
||||
"""The text field name,
|
||||
which will be used as the `page_content` of a `Document` object."""
|
||||
|
||||
output_fields: Optional[List[str]] = None
|
||||
"""Final output fields of the documents.
|
||||
If not specified, all fields except the vector fields will be used as output fields,
|
||||
@@ -82,9 +91,9 @@ class MilvusCollectionHybridSearchRetriever(BaseRetriever):
|
||||
self.collection.load()
|
||||
|
||||
def _validate_fields_num(self) -> None:
|
||||
assert (
|
||||
len(self.anns_fields) >= 2
|
||||
), "At least two fields are required for hybrid search."
|
||||
assert len(self.anns_fields) >= 2, (
|
||||
"At least two fields are required for hybrid search."
|
||||
)
|
||||
lengths = [len(self.anns_fields)]
|
||||
if self.field_limits is not None:
|
||||
lengths.append(len(self.field_limits))
|
||||
@@ -102,17 +111,17 @@ class MilvusCollectionHybridSearchRetriever(BaseRetriever):
|
||||
def _validate_fields_name(self) -> None:
|
||||
collection_fields = [x.name for x in self.collection.schema.fields]
|
||||
for field in self.anns_fields:
|
||||
assert (
|
||||
field in collection_fields
|
||||
), f"{field} is not a valid field in the collection."
|
||||
assert (
|
||||
self.text_field in collection_fields
|
||||
), f"{self.text_field} is not a valid field in the collection."
|
||||
assert field in collection_fields, (
|
||||
f"{field} is not a valid field in the collection."
|
||||
)
|
||||
assert self.text_field in collection_fields, (
|
||||
f"{self.text_field} is not a valid field in the collection."
|
||||
)
|
||||
for field in self.output_fields: # type: ignore[union-attr]
|
||||
if not self.collection.schema.enable_dynamic_field:
|
||||
assert (
|
||||
field in collection_fields
|
||||
), f"{field} is not a valid field in the collection."
|
||||
assert field in collection_fields, (
|
||||
f"{field} is not a valid field in the collection."
|
||||
)
|
||||
|
||||
def _get_output_fields(self) -> List[str]:
|
||||
if self.output_fields:
|
||||
|
||||
@@ -9,7 +9,7 @@ from langchain_core.retrievers import BaseRetriever
|
||||
class ZillizCloudPipelineRetriever(BaseRetriever):
|
||||
"""`Zilliz Cloud Pipeline` retriever.
|
||||
|
||||
Parameters:
|
||||
Attributes:
|
||||
pipeline_ids: A dictionary of pipeline ids.
|
||||
Valid keys: "ingestion", "search", "deletion".
|
||||
token: Zilliz Cloud's token. Defaults to "".
|
||||
@@ -18,7 +18,9 @@ class ZillizCloudPipelineRetriever(BaseRetriever):
|
||||
"""
|
||||
|
||||
pipeline_ids: Dict
|
||||
|
||||
token: str = ""
|
||||
|
||||
cloud_region: str = "gcp-us-west1"
|
||||
|
||||
def _get_relevant_documents(
|
||||
|
||||
@@ -326,9 +326,9 @@ class Milvus(VectorStore):
|
||||
"Either `embedding_function` or `builtin_function` should be provided."
|
||||
)
|
||||
|
||||
self.embedding_func: Optional[
|
||||
Union[EmbeddingType, List[EmbeddingType]]
|
||||
] = self._from_list(embedding_function)
|
||||
self.embedding_func: Optional[Union[EmbeddingType, List[EmbeddingType]]] = (
|
||||
self._from_list(embedding_function)
|
||||
)
|
||||
self.builtin_func: Optional[
|
||||
Union[BaseMilvusBuiltInFunction, List[BaseMilvusBuiltInFunction]]
|
||||
] = self._from_list(builtin_function)
|
||||
@@ -1233,8 +1233,8 @@ class Milvus(VectorStore):
|
||||
metadatas: Optional metadata for each text
|
||||
ids: Optional IDs for each text
|
||||
force_ids: If force_ids, when auto_id is True and ids is not None,
|
||||
it will return a list containing the customized ids, otherwise,
|
||||
it will not contain the customized ids.
|
||||
it will return a list containing the customized ids, otherwise,
|
||||
it will not contain the customized ids.
|
||||
|
||||
Returns:
|
||||
List of dictionaries ready for insertion
|
||||
@@ -1242,14 +1242,14 @@ class Milvus(VectorStore):
|
||||
insert_list: list[dict] = []
|
||||
|
||||
for vector_field_embeddings in embeddings:
|
||||
assert len(texts) == len(
|
||||
vector_field_embeddings
|
||||
), "Mismatched lengths of texts and embeddings."
|
||||
assert len(texts) == len(vector_field_embeddings), (
|
||||
"Mismatched lengths of texts and embeddings."
|
||||
)
|
||||
|
||||
if metadatas is not None:
|
||||
assert len(texts) == len(
|
||||
metadatas
|
||||
), "Mismatched lengths of texts and metadatas."
|
||||
assert len(texts) == len(metadatas), (
|
||||
"Mismatched lengths of texts and metadatas."
|
||||
)
|
||||
|
||||
for i, text in zip(range(len(texts)), texts):
|
||||
entity_dict = {}
|
||||
@@ -1302,12 +1302,14 @@ class Milvus(VectorStore):
|
||||
that they all fit in memory.
|
||||
metadatas (Optional[List[dict]]): Metadata dicts attached to each of
|
||||
the texts. Defaults to None.
|
||||
should be less than 65535 bytes. Required and work when auto_id is False.
|
||||
timeout (Optional[float]): Timeout for each batch insert. Defaults
|
||||
to None.
|
||||
batch_size (int, optional): Batch size to use for insertion.
|
||||
Defaults to 1000.
|
||||
ids (Optional[List[str]]): List of text ids. The length of each item
|
||||
should be less than 65535 bytes.
|
||||
|
||||
Required and work when `auto_id` is `False`.
|
||||
|
||||
Raises:
|
||||
MilvusException: Failure to add texts
|
||||
@@ -1324,13 +1326,13 @@ class Milvus(VectorStore):
|
||||
)
|
||||
self.auto_id = True
|
||||
elif not self.auto_id and ids: # Check ids
|
||||
assert len(set(ids)) == len(
|
||||
texts
|
||||
), "Different lengths of texts and unique ids are provided."
|
||||
assert len(set(ids)) == len(texts), (
|
||||
"Different lengths of texts and unique ids are provided."
|
||||
)
|
||||
assert all(isinstance(x, str) for x in ids), "All ids should be strings."
|
||||
assert all(
|
||||
len(x.encode()) <= 65_535 for x in ids
|
||||
), "Each id should be a string less than 65535 bytes."
|
||||
assert all(len(x.encode()) <= 65_535 for x in ids), (
|
||||
"Each id should be a string less than 65535 bytes."
|
||||
)
|
||||
|
||||
elif self.auto_id and ids:
|
||||
logger.warning(
|
||||
@@ -1417,12 +1419,12 @@ class Milvus(VectorStore):
|
||||
or list of vectors for each text (in case of multi-vector)
|
||||
metadatas (Optional[List[dict]]): Metadata dicts attached to each of
|
||||
the texts. Defaults to None.
|
||||
should be less than 65535 bytes. Required and work when auto_id is False.
|
||||
timeout (Optional[float]): Timeout for each batch insert. Defaults
|
||||
to None.
|
||||
batch_size (int, optional): Batch size to use for insertion.
|
||||
Defaults to 1000.
|
||||
ids (Optional[List[str]]): List of text ids. The length of each item
|
||||
should be less than 65535 bytes. Required and work when auto_id is False.
|
||||
|
||||
Raises:
|
||||
MilvusException: Failure to add texts and embeddings
|
||||
@@ -1523,7 +1525,7 @@ class Milvus(VectorStore):
|
||||
else:
|
||||
log_entity[k] = v
|
||||
logger.error(
|
||||
"Failed to %s batch starting at entity: %s/%s. " "First entity data: %s",
|
||||
"Failed to %s batch starting at entity: %s/%s. First entity data: %s",
|
||||
operation_name,
|
||||
batch_index + 1,
|
||||
total_count,
|
||||
@@ -1770,7 +1772,7 @@ class Milvus(VectorStore):
|
||||
query (str): The text being searched.
|
||||
k (int, optional): The amount of results to return. Defaults to 4.
|
||||
param (dict | list[dict], optional): The search params for the specified
|
||||
index. Defaults to None.
|
||||
index. Defaults to None.
|
||||
expr (str, optional): Filtering expression. Defaults to None.
|
||||
timeout (float, optional): How long to wait before timeout error.
|
||||
Defaults to None.
|
||||
@@ -2068,8 +2070,7 @@ class Milvus(VectorStore):
|
||||
return _map_ip_to_similarity
|
||||
else:
|
||||
raise ValueError(
|
||||
"No supported normalization function"
|
||||
f" for metric type: {metric_type}."
|
||||
f"No supported normalization function for metric type: {metric_type}."
|
||||
)
|
||||
|
||||
def delete(
|
||||
@@ -2091,13 +2092,13 @@ class Milvus(VectorStore):
|
||||
if isinstance(ids, list) and len(ids) > 0:
|
||||
if expr is not None:
|
||||
logger.warning(
|
||||
"Both ids and expr are provided. " "Ignore expr and delete by ids."
|
||||
"Both ids and expr are provided. Ignore expr and delete by ids."
|
||||
)
|
||||
expr = f"{self._primary_field} in {ids}"
|
||||
else:
|
||||
assert isinstance(
|
||||
expr, str
|
||||
), "Either ids list or expr string must be provided."
|
||||
assert isinstance(expr, str), (
|
||||
"Either ids list or expr string must be provided."
|
||||
)
|
||||
try:
|
||||
self.client.delete(self.collection_name, filter=expr, **kwargs)
|
||||
return True
|
||||
@@ -2128,8 +2129,7 @@ class Milvus(VectorStore):
|
||||
conn.schema_cache.pop(self.collection_name, None)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to clear sync schema cache for "
|
||||
f"{self.collection_name}: {e}"
|
||||
f"Failed to clear sync schema cache for {self.collection_name}: {e}"
|
||||
)
|
||||
|
||||
# Clear schema cache from async client
|
||||
@@ -2201,7 +2201,7 @@ class Milvus(VectorStore):
|
||||
if isinstance(ids, list) and len(ids) > 0:
|
||||
if auto_id:
|
||||
logger.warning(
|
||||
"Both ids and auto_id are provided. " "Ignore auto_id and use ids."
|
||||
"Both ids and auto_id are provided. Ignore auto_id and use ids."
|
||||
)
|
||||
auto_id = False
|
||||
else:
|
||||
@@ -2307,9 +2307,9 @@ class Milvus(VectorStore):
|
||||
if not ids:
|
||||
self.add_documents(documents=documents, **kwargs)
|
||||
else:
|
||||
assert len(set(ids)) == len(
|
||||
documents
|
||||
), "Different lengths of documents and unique ids are provided."
|
||||
assert len(set(ids)) == len(documents), (
|
||||
"Different lengths of documents and unique ids are provided."
|
||||
)
|
||||
|
||||
embeddings_functions: List[EmbeddingType] = self._as_list(self.embedding_func)
|
||||
embeddings: List = []
|
||||
@@ -2474,12 +2474,12 @@ class Milvus(VectorStore):
|
||||
that they all fit in memory.
|
||||
metadatas (Optional[List[dict]]): Metadata dicts attached to each of
|
||||
the texts. Defaults to None.
|
||||
should be less than 65535 bytes. Required and work when auto_id is False.
|
||||
timeout (Optional[float]): Timeout for each batch insert. Defaults
|
||||
to None.
|
||||
batch_size (int, optional): Batch size to use for insertion.
|
||||
Defaults to 1000.
|
||||
ids (Optional[List[str]]): List of text ids. The length of each item
|
||||
should be less than 65535 bytes. Required and work when auto_id is False.
|
||||
|
||||
Raises:
|
||||
MilvusException: Failure to add texts
|
||||
@@ -2496,9 +2496,9 @@ class Milvus(VectorStore):
|
||||
)
|
||||
self.auto_id = True
|
||||
elif not self.auto_id and ids: # Check ids
|
||||
assert len(set(ids)) == len(
|
||||
texts
|
||||
), "Different lengths of texts and unique ids are provided."
|
||||
assert len(set(ids)) == len(texts), (
|
||||
"Different lengths of texts and unique ids are provided."
|
||||
)
|
||||
|
||||
elif self.auto_id and ids:
|
||||
logger.warning(
|
||||
@@ -2565,12 +2565,12 @@ class Milvus(VectorStore):
|
||||
or list of vectors for each text (in case of multi-vector)
|
||||
metadatas (Optional[List[dict]]): Metadata dicts attached to each of
|
||||
the texts. Defaults to None.
|
||||
should be less than 65535 bytes. Required and work when auto_id is False.
|
||||
timeout (Optional[float]): Timeout for each batch insert. Defaults
|
||||
to None.
|
||||
batch_size (int, optional): Batch size to use for insertion.
|
||||
Defaults to 1000.
|
||||
ids (Optional[List[str]]): List of text ids. The length of each item
|
||||
should be less than 65535 bytes. Required and work when auto_id is False.
|
||||
|
||||
Raises:
|
||||
MilvusException: Failure to add texts and embeddings
|
||||
@@ -2872,7 +2872,7 @@ class Milvus(VectorStore):
|
||||
query (str): The text being searched.
|
||||
k (int, optional): The amount of results to return. Defaults to 4.
|
||||
param (dict | list[dict], optional): The search params for the specified
|
||||
index. Defaults to None.
|
||||
index. Defaults to None.
|
||||
expr (str, optional): Filtering expression. Defaults to None.
|
||||
timeout (float, optional): How long to wait before timeout error.
|
||||
Defaults to None.
|
||||
@@ -3126,13 +3126,13 @@ class Milvus(VectorStore):
|
||||
if isinstance(ids, list) and len(ids) > 0:
|
||||
if expr is not None:
|
||||
logger.warning(
|
||||
"Both ids and expr are provided. " "Ignore expr and delete by ids."
|
||||
"Both ids and expr are provided. Ignore expr and delete by ids."
|
||||
)
|
||||
expr = f"{self._primary_field} in {ids}"
|
||||
else:
|
||||
assert isinstance(
|
||||
expr, str
|
||||
), "Either ids list or expr string must be provided."
|
||||
assert isinstance(expr, str), (
|
||||
"Either ids list or expr string must be provided."
|
||||
)
|
||||
try:
|
||||
await self.aclient.delete(self.collection_name, filter=expr, **kwargs)
|
||||
return True
|
||||
@@ -3197,7 +3197,7 @@ class Milvus(VectorStore):
|
||||
if isinstance(ids, list) and len(ids) > 0:
|
||||
if auto_id:
|
||||
logger.warning(
|
||||
"Both ids and auto_id are provided. " "Ignore auto_id and use ids."
|
||||
"Both ids and auto_id are provided. Ignore auto_id and use ids."
|
||||
)
|
||||
auto_id = False
|
||||
else:
|
||||
@@ -3288,9 +3288,9 @@ class Milvus(VectorStore):
|
||||
await self.aadd_documents(documents=documents, **kwargs)
|
||||
return
|
||||
|
||||
assert len(set(ids)) == len(
|
||||
documents
|
||||
), "Different lengths of documents and unique ids are provided."
|
||||
assert len(set(ids)) == len(documents), (
|
||||
"Different lengths of documents and unique ids are provided."
|
||||
)
|
||||
|
||||
embeddings_functions: List[EmbeddingType] = self._as_list(self.embedding_func)
|
||||
embeddings: List = []
|
||||
|
||||
Reference in New Issue
Block a user