diff --git a/README.md b/README.md index 66b93d2..db6875f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ # RAG From Scratch -Retrieval augmented generation (RAG) comes is a general methodology for connecting LLMs with external data sources. These notebooks accompany a video series will build up an understanding of RAG from scratch, starting with the basics of indexing, retrieval, and generation. It will build up to more advanced techniques to address edge cases or challenges in RAG: +LLMs are trained on a large but fixed corpus of data, limiting their ability to reason about private or recent information. Fine-tuning is one way to mitigate this, but is often [not well-suited for factual recall](https://www.anyscale.com/blog/fine-tuning-is-for-form-not-facts) and [can be costly](https://www.glean.com/blog/how-to-build-an-ai-assistant-for-the-enterprise). +Retrieval augmented generation (RAG) has emerged as a popular and powerful mechanism to expand an LLM's knowledge base, using documents retrieved from an external data source to ground the LLM generation via in-context learning. +These notebooks accompany a [video playlist](https://youtube.com/playlist?list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x&feature=shared) that builds up an understanding of RAG from scratch, starting with the basics of indexing, retrieval, and generation. ![rag_detail_v2](https://github.com/langchain-ai/rag-from-scratch/assets/122662504/54a2d76c-b07e-49e7-b4ce-fc45667360a1) -Video playlist: -https://www.youtube.com/playlist?list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x +[Video playlist](https://www.youtube.com/playlist?list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) \ No newline at end of file diff --git a/rag_from_scratch_10_and_11.ipynb b/rag_from_scratch_10_and_11.ipynb index 2de0b9a..a148fdb 100644 --- a/rag_from_scratch_10_and_11.ipynb +++ b/rag_from_scratch_10_and_11.ipynb @@ -14,7 +14,7 @@ "\n", "![image.png](attachment:c02ab9b5-38f9-451a-b202-62b54ab9c87a.png)\n", "\n", - "## Enviornment\n", + "## Environment\n", "\n", "`(1) Packages`" ] @@ -470,7 +470,7 @@ "1. Allows us to perform unstructured search over the `contents` and `title` of each document\n", "2. And to use range filtering on `view count`, `publication date`, and `length`.\n", "\n", - "We want to convert natural langugae into structured search queries.\n", + "We want to convert natural language into structured search queries.\n", "\n", "We can define a schema for structured search queries." ] diff --git a/rag_from_scratch_12_to_14.ipynb b/rag_from_scratch_12_to_14.ipynb index 26c65f7..a338a66 100644 --- a/rag_from_scratch_12_to_14.ipynb +++ b/rag_from_scratch_12_to_14.ipynb @@ -16,13 +16,13 @@ "\n", "## Preface: Chunking\n", "\n", - "We don't explicity cover document chunking / splitting.\n", + "We don't explicitly cover document chunking / splitting.\n", "\n", "For an excellent review of document chunking, see this video from Greg Kamradt:\n", "\n", "https://www.youtube.com/watch?v=8OJC21T2SL4\n", "\n", - "## Enviornment\n", + "## Environment\n", "\n", "`(1) Packages`" ] diff --git a/rag_from_scratch_15_to_18.ipynb b/rag_from_scratch_15_to_18.ipynb index bf0bdec..f5225f1 100644 --- a/rag_from_scratch_15_to_18.ipynb +++ b/rag_from_scratch_15_to_18.ipynb @@ -20,7 +20,7 @@ "id": "a6656c51-25c7-490b-b76c-a506fab8892b", "metadata": {}, "source": [ - "## Enviornment\n", + "## Environment\n", "\n", "`(1) Packages`" ] diff --git a/rag_from_scratch_1_to_4.ipynb b/rag_from_scratch_1_to_4.ipynb index 85f91e2..cd1925f 100644 --- a/rag_from_scratch_1_to_4.ipynb +++ b/rag_from_scratch_1_to_4.ipynb @@ -18,7 +18,7 @@ "\n", "![Screenshot 2024-03-25 at 8.30.33 PM.png](attachment:c566957c-a8ef-41a9-9b78-e089d35cf0b7.png)\n", "\n", - "## Enviornment\n", + "## Environment\n", "\n", "`(1) Packages`" ] @@ -227,7 +227,7 @@ "id": "f5e0e35f-6861-4c5e-9301-04fd5408f8f8", "metadata": {}, "source": [ - "[Cosine similarity](https://platform.openai.com/docs/guides/embeddings/frequently-asked-questions) is reccomended (1 indicates identical) for OpenAI embeddings." + "[Cosine similarity](https://platform.openai.com/docs/guides/embeddings/frequently-asked-questions) is recommended (1 indicates identical) for OpenAI embeddings." ] }, { diff --git a/rag_from_scratch_5_to_9.ipynb b/rag_from_scratch_5_to_9.ipynb index 9220584..3bf3dfe 100644 --- a/rag_from_scratch_5_to_9.ipynb +++ b/rag_from_scratch_5_to_9.ipynb @@ -16,7 +16,7 @@ "\n", "![Screenshot 2024-03-25 at 8.08.30 PM.png](attachment:d9d5305c-e5bb-4934-b91d-5988c87fd767.png)\n", "\n", - "## Enviornment\n", + "## Environment\n", "\n", "`(1) Packages`" ] @@ -818,7 +818,7 @@ "source": [ "from langchain.prompts import ChatPromptTemplate\n", "\n", - "# HyDE document genration\n", + "# HyDE document generation\n", "template = \"\"\"Please write a scientific paper passage to answer the question\n", "Question: {question}\n", "Passage:\"\"\"\n", @@ -845,8 +845,8 @@ "source": [ "# Retrieve\n", "retrieval_chain = generate_docs_for_retrieval | retriever \n", - "retireved_docs = retrieval_chain.invoke({\"question\":question})\n", - "retireved_docs" + "retrieved_docs = retrieval_chain.invoke({\"question\":question})\n", + "retrieved_docs" ] }, { @@ -872,7 +872,7 @@ " | StrOutputParser()\n", ")\n", "\n", - "final_rag_chain.invoke({\"context\":retireved_docs,\"question\":question})" + "final_rag_chain.invoke({\"context\":retrieved_docs,\"question\":question})" ] }, {