minte9
LearnRemember / MLEARNING





Simple RAG

 
""" Retrieval-Augmented Generation (RAG)
A model gets relevant information from a local knowledge base.
1) Document: A small text knowledge base.
2) Chunking: Break into small pieces (per line).
3) Embedding: Turn text into numerical vectors.
4) Retrieval: Find the most similar chunks to the question using cosine similarity.
5) Generation: Give those top chunks to the LLM in a prompt, 
it will answer from facts in your data.
"""

import os
from dotenv import load_dotenv
from openai import OpenAI
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

from icecream import ic
ic.disable() # disable output debugging

load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# ---- 1) Data ----
document = """
LangChain is a Python framework for developing applications powered by language models.
It offers tools for prompt management, chaining, and connecting to external data.
RAG stands for Retrieval-Augmented Generation, where a model uses a retriever 
to get relevant info from a knowledge base before generating an answer.
This helps reduce hallucinations and keeps answers factual.
"""

# ---- 2) Split into chunks ----
chunks = document.split("\n")
chunks = [c.strip() for c in chunks if c and c.strip()]
ic(chunks)
"""
chunks: ['LangChain is a Python framework for developing applications powered by '
        'language models.',
        'It offers tools for prompt management, chaining, and connecting to external '
        'data.',
        'RAG stands for Retrieval-Augmented Generation, where a model uses a '
        'retriever to get relevant info',
        'from a knowledge base before generating an answer.',
        'This helps reduce hallucinations and keeps answers factual.']
"""

# ---- 3) Embed chunks ----
def embed_texts(texts):
    res = client.embeddings.create(
        model='text-embedding-3-small',
        input=texts
    )
    return np.array([d.embedding for d in res.data])

chunk_embeddings = embed_texts(chunks)
ic(chunk_embeddings)
"""
chunk_embeddings: array([[-0.02162271,  0.00888773,  0.03650993, ..., -0.05459763,
                        -0.00785622, -0.00189854],
                        [-0.02058555,  0.0135925 ,  0.06969436, ..., -0.05116868,
                        -0.02132028,  0.01723991],
                        [-0.00323994,  0.02070102, -0.00502882, ..., -0.00503547,
                        0.02446847, -0.00111243],
                        [-0.02227938, -0.00813061,  0.05838839, ...,  0.00838136,
                        0.03080512,  0.01227949],
                        [-0.01651258, -0.01769928, -0.01214926, ..., -0.00260496,
                        -0.02000033,  0.01199007]])
"""

# ---- 4) Retrive top chunks ----
def retrieve(query, k=2):
    query_emb = embed_texts([query])[0].reshape(1, -1)
    sims = cosine_similarity(query_emb, chunk_embeddings)[0]
    top_idxs = np.argsort(sims)[::-1][:k]
    return [chunks[i] for i in top_idxs]

question = "What is RAG?"
context = "\n".join(retrieve(question))
ic(context)
"""
context: ('RAG stands for Retrieval-Augmented Generation, where a model uses a '
        'retriever to get relevant info'
        'It offers tools for prompt management, chaining, and connecting to external '
        'data.')
"""

# ---- 5) Generate answer ----
def rag_answer(question):
    context = "\n".join(retrieve(question))
    prompt = f"Answer based ONLY on this context:\n{context}\n\nQuestion: {question}"
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    response_text = response.choices[0].message.content.strip()
    return response_text

if __name__ == "__main__":
    question = "What RAG means?"
    print(f"Question: {question}")

    answer = rag_answer(question)
    print(f"Answer: {answer}")
    """
    Question: What RAG means?
    Answer: RAG stands for Retrieval-Augmented Generation, which is a model that uses 
    a retriever to obtain relevant information.
    """

Sample document RAG

 
""" Ask a question about a local file and get an answer from a LLM using RAG
"""

import os
from dotenv import load_dotenv
from openai import OpenAI
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# 1) Load data
DIR = os.path.dirname(os.path.realpath(__file__))
with open(DIR + "/sample.txt", "r", encoding="utf-8") as f:
    text = f.read()

# 2) Chunk data
chunk_size = 500
overlap = 50
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size-overlap)]

# 3) Embed chunks
def embed_texts(texts):
    res = client.embeddings.create(model='text-embedding-3-small', input=texts)
    return np.array([d.embedding for d in res.data])

chunk_embeddings = embed_texts(chunks)

# 4) Retrive top chunks
def retrieve(query, k=2):
    query_emb = embed_texts([query])[0].reshape(1, -1)
    sims = cosine_similarity(query_emb, chunk_embeddings)[0]
    top_idxs = np.argsort(sims)[::-1][:k]
    return [chunks[i] for i in top_idxs]

# 5) Generate answer
def rag_answer(question):
    context = "\n".join(retrieve(question))
    prompt = f"Answer based ONLY on this context:\n{context}\n\nQuestion: {question}"
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    response_text = response.choices[0].message.content.strip()
    return response_text

if __name__ == "__main__":
    while True:
        request = input('\nQuestion: ')
        if (request == 'quit'):
            break
        print(f"Answer: {rag_answer(request)}")

    """
    Question: What is the main topic in this document?
    Answer: The main topic in this document is the discussion among a group of friends 
    about their health and the need for rest, leading to a decision to take a break, 
    potentially a trip on the River, due to feeling overworked.

    Question: Give a paragraph where the author talks about cheese, please.
    Answer: Cheese, like oil, makes too much of itself. It wants the whole boat to itself. 
    It goes through the hamper, and gives a cheesy flavour to everything else there. 
    You can’t tell whether you are eating apple-pie or German sausage, 
    or strawberries and cream. It all seems cheese. There is too much odour about cheese. 
    I remember a friend of mine, buying a couple of cheeses at Liverpool. 
    Splendid cheeses they were, ripe and mellow, and with a two hundred horse-power 
    scent about them that might have been warranted to carry three miles, and knock a man
    over at two hundred yards.

    Question: Who is Montmorency?
    Answer: Montmorency is a character who does not enjoy the river or the scenery and 
    finds the activities of the others, particularly their boating, to be foolishness. 
    He does not engage in smoking and prefers to create mischief and annoy the group 
    by getting in the way and being a nuisance. His ambition in life is to be a 
    perfect bother to those around him.

    Question: Is Montmorency a person?
    Answer: No, Montmorency is not a person; Montmorency is a dog.

    Question: What is the Pythagorean formula?
    Answer: The context provided does not contain any information about 
    the Pythagorean formula. Therefore, I am unable to answer your question 
    based solely on that context.
    """

LangChain RAG

 
""" With LangChain we have less boilerplate, more composability.
Buit-in best practice (chunking, retrival, prompt templating).
"""

import os
from dotenv import load_dotenv

load_dotenv() # expects OPEN_API_KEY

# ---- LangChain imports ----
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

# 0) Models
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0, api_key=os.getenv("OPENAI_API_KEY"))
embeddings = OpenAIEmbeddings(model='text-embedding-3-small')

# 1) Load data
DIR = os.path.dirname(os.path.realpath(__file__))
docs = TextLoader(DIR + "/sample.txt", encoding="utf-8").load()

# 2) Chunk data
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
splits = splitter.split_documents(docs)

# 3) Build vector store (embeddings under the hood)
vs = FAISS.from_documents(splits, embeddings)
retriever = vs.as_retriever(search_kwargs={"k": 2}) # top chunks 2

# 4) Prompt template
prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer based ONLY on this context:\n{context}"),
    ("human", "{question}")
])

def format_docs(docs):
    return "\n".join(d.page_content for d in docs)

# 5) Compose the RAG chain (retrive -> prompt -> LLM -> text)
rag_chain = (
    RunnableParallel(
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
    )
    | prompt
    | llm
    | StrOutputParser()
)

def rag_answer(question: str) -> str:
    return rag_chain.invoke(question).strip()

if __name__ == "__main__":
    while True:
        request = input("\nQuestion: ")
        if request.lower().strip() == "quit":
            break
        print(f"Answer: {rag_answer(request)}")

    """
    Question: Who are the characters in the text?
    Answer: The characters in the text include:

    1. Montmorency - carrying a stick.
    2. George - carrying coats and rugs, and smoking a short pipe.
    3. Harris - trying to walk with easy grace while carrying ...
    4. Greengrocer's boy - with a basket.
    5. Baker's boy - with a basket.
    6. Boots from the hotel - carrying a hamper.
    7. Confectioner's boy - with a basket.
    8. Grocer's boy - with a basket.
    9. Long-haired dog - mentioned twice.
    10. Cheesemonger's boy - with a basket, mentioned twice.
    11. Odd man - carrying a bag.
    12. Bosom companion of odd man - with hands in pockets, smoking a short clay.
    13. Fruiterer's boy - with a basket.
    14. Narrator (myself) - carrying three hats and a pair of boots.
    15. Six small boys.
    16. Four stray dogs.
    """

Conversational RAG

 
""" LangChain's components makes it easier to extend later, you can easily 
swap in other loaders (PDF/HTML), add memory, or an API layer 
with only a few lines changed.
"""

import os
from dotenv import load_dotenv
load_dotenv()

# ---- LangChain imports ----
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

# ---- New converstion memory ----
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

# 0) Model + embeddings
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0, api_key=os.getenv("OPENAI_API_KEY"))
embeddings = OpenAIEmbeddings(model='text-embedding-3-small')

# 1) Load data
DIR = os.path.dirname(os.path.realpath(__file__))
docs = TextLoader(DIR + "/sample.txt", encoding="utf-8").load()
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
splits = splitter.split_documents(docs)

# 2) Vector store
vs = FAISS.from_documents(splits, embeddings)
retriever = vs.as_retriever(search_kwargs={"k": 2}) # top chunks 2

# ---- New memory pattern ----
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

conv_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory
)

def rag_answer(question: str) -> str:
    result = conv_chain.invoke({"question": question})
    return result["answer"]

if __name__ == "__main__":
    while True:
        request = input("\nQuestion: ")
        if request.lower().strip() == "quit":
            break
        print(f"Answer: {rag_answer(request)}")


    """
    Question: Who is Montmorency?
    Answer: Montmorency is a character described as a small fox-terrier with a personality 
    that suggests a desire to improve the world. He is portrayed as lively and prefers 
    noisy environments, indicating that he does not enjoy solitude. 
    The description implies that he has a charming and somewhat mischievous demeanor that 
    can evoke strong emotions in people.

    Question: Is it a dog?
    Answer: Yes, Montmorency is a dog, specifically a fox-terrier.
    """



  Last update: 5 days ago

References and applications: