mirror of
https://github.com/run-llama/mongodb-demo.git
synced 2026-07-01 20:44:05 -04:00
First commit
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
flask_app/.venv
|
||||
.env
|
||||
tweets.json
|
||||
fewertweets.json
|
||||
+27
@@ -0,0 +1,27 @@
|
||||
## This script imports the tinytweets.json file into your mongo database
|
||||
## It will work for any json file containing a single array of objects
|
||||
## There's nothing specific to llamaindex going on here
|
||||
## You can get your data into mongo any way you like.
|
||||
|
||||
json_file = 'tinytweets.json'
|
||||
|
||||
# Load environment variables from local .env file
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
import os
|
||||
import json
|
||||
from pymongo.mongo_client import MongoClient
|
||||
from pymongo.server_api import ServerApi
|
||||
|
||||
# Load the tweets from a local file
|
||||
with open(json_file, 'r') as f:
|
||||
tweets = json.load(f)
|
||||
|
||||
# Create a new client and connect to the server
|
||||
client = MongoClient(os.getenv('MONGODB_URI'), server_api=ServerApi('1'))
|
||||
db = client[os.getenv("MONGODB_DATABASE")]
|
||||
collection = db[os.getenv("MONGODB_COLLECTION")]
|
||||
|
||||
# Insert the tweets into mongo
|
||||
collection.insert_many(tweets)
|
||||
@@ -0,0 +1,53 @@
|
||||
## This script loads data from a mongo database into an index
|
||||
## This will convert all the documents in the database into vectors
|
||||
## which requires a call to OpenAI for each one, so it can take some time.
|
||||
## Once the data is indexed, it will be stored as a new collection in mongodb
|
||||
## and you can query it without having to re-index every time.
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
# This will turn on really noisy logging so you can be sure something is happening
|
||||
import logging
|
||||
import sys
|
||||
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
|
||||
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
|
||||
|
||||
import os
|
||||
from llama_index.readers.mongo import SimpleMongoReader
|
||||
from pymongo.mongo_client import MongoClient
|
||||
from pymongo.server_api import ServerApi
|
||||
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
|
||||
from llama_index.indices.vector_store.base import VectorStoreIndex
|
||||
from llama_index.storage.storage_context import StorageContext
|
||||
|
||||
# load objects from mongo and convert them into LlamaIndex Document objects
|
||||
# llamaindex has a special class that does this for you
|
||||
# it pulls every object in a given collection
|
||||
query_dict = {}
|
||||
reader = SimpleMongoReader(uri=os.getenv("MONGODB_URI"))
|
||||
documents = reader.load_data(
|
||||
os.getenv("MONGODB_DATABASE"),
|
||||
os.getenv("MONGODB_COLLECTION"), # this is the collection where the objects you loaded in 1_import got stored
|
||||
field_names=["full_text"], # these is a list of the top-level fields in your objects that will be indexed
|
||||
# make sure your objects have a field called "full_text" or that you change this value
|
||||
query_dict=query_dict # this is a mongo query dict that will filter your data if you don't want to index everything
|
||||
)
|
||||
|
||||
# Create a new client and connect to the server
|
||||
client = MongoClient(os.getenv("MONGODB_URI"), server_api=ServerApi('1'))
|
||||
|
||||
# create Atlas as a vector store
|
||||
store = MongoDBAtlasVectorSearch(
|
||||
client,
|
||||
db_name=os.getenv('MONGODB_DATABASE'),
|
||||
collection_name=os.getenv('MONGODB_VECTORS'), # this is where your embeddings will be stored
|
||||
index_name=os.getenv('MONGODB_VECTOR_INDEX') # this is the name of the index you will need to create
|
||||
)
|
||||
|
||||
# now create an index from all the Documents and store them in Atlas
|
||||
storage_context = StorageContext.from_defaults(vector_store=store)
|
||||
index = VectorStoreIndex.from_documents(
|
||||
documents, storage_context=storage_context
|
||||
)
|
||||
|
||||
# you can't query your index yet because you need to create a vector search index in mongodb's UI now
|
||||
+36
@@ -0,0 +1,36 @@
|
||||
## This shows how to load your pre-indexed data from mongo and query it
|
||||
## Note that you MUST manually create a vector search index before this will work
|
||||
## and you must pass in the name of that index when connecting to Mongodb below
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
# Turns on really noisy logging
|
||||
import logging
|
||||
import sys
|
||||
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
|
||||
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
|
||||
|
||||
import os
|
||||
from pymongo.mongo_client import MongoClient
|
||||
from pymongo.server_api import ServerApi
|
||||
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
|
||||
from llama_index.indices.vector_store.base import VectorStoreIndex
|
||||
|
||||
# Create a new client and connect to the server
|
||||
client = MongoClient(os.getenv("MONGODB_URI"), server_api=ServerApi('1'))
|
||||
|
||||
# connect to Atlas as a vector store
|
||||
store = MongoDBAtlasVectorSearch(
|
||||
client,
|
||||
db_name=os.getenv('MONGODB_DATABASE'), # this is the database where you stored your embeddings
|
||||
collection_name=os.getenv('MONGODB_VECTORS'), # this is where your embeddings were stored in 2_load_and_index.py
|
||||
index_name=os.getenv('MONGODB_VECTOR_INDEX') # this is the name of the index you created after loading your data
|
||||
)
|
||||
index = VectorStoreIndex.from_vector_store(store)
|
||||
|
||||
# query your data!
|
||||
# here we have customized the number of documents returned per query to 20, because tweets are really short
|
||||
query_engine = index.as_query_engine(similarity_top_k=20)
|
||||
response = query_engine.query("What does the author think of web frameworks?")
|
||||
print(response)
|
||||
|
||||
Binary file not shown.
@@ -0,0 +1,6 @@
|
||||
from flask import Flask
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/')
|
||||
def hello_world():
|
||||
return 'Hello, World!'
|
||||
@@ -0,0 +1,12 @@
|
||||
blinker==1.6.3
|
||||
click==8.1.7
|
||||
Flask==3.0.0
|
||||
gunicorn==21.2.0
|
||||
itsdangerous==2.1.2
|
||||
Jinja2==3.1.2
|
||||
jsonschema==3.2.0
|
||||
MarkupSafe==2.1.3
|
||||
packaging==23.2
|
||||
python-dateutil==2.8.1
|
||||
pyzmq==25.1.1
|
||||
Werkzeug==3.0.1
|
||||
+57482
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user