RAG(Retrieval-Augmented Generation)结合了检索和生成,让模型能基于外部知识回答问题。
import ollama
import numpy as np
from typing import List, Dict
class SimpleRAG:
def __init__(self, model='llama3.2', embed_model='nomic-embed-text'):
self.model = model
self.embed_model = embed_model
self.documents: List[Dict] = []
def cosine_similarity(self, a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
def add_document(self, text: str, metadata: dict = None):
response = ollama.embeddings(
model=self.embed_model,
prompt=text
)
self.documents.append({
'text': text,
'embedding': response['embedding'],
'metadata': metadata or {}
})
def add_documents(self, texts: List[str]):
for text in texts:
self.add_document(text)
def search(self, query: str, top_k: int = 3) -> List[Dict]:
response = ollama.embeddings(
model=self.embed_model,
prompt=query
)
query_embedding = response['embedding']
similarities = []
for i, doc in enumerate(self.documents):
sim = self.cosine_similarity(query_embedding, doc['embedding'])
similarities.append((i, sim))
similarities.sort(key=lambda x: x[1], reverse=True)
results = []
for i, sim in similarities[:top_k]:
results.append({
'text': self.documents[i]['text'],
'score': sim,
'metadata': self.documents[i]['metadata']
})
return results
def query(self, question: str, top_k: int = 3) -> str:
results = self.search(question, top_k)
context = "\n\n".join([r['text'] for r in results])
response = ollama.chat(
model=self.model,
messages=[
{
'role': 'system',
'content': '根据提供的上下文回答问题。如果上下文中没有相关信息,请说明。'
},
{
'role': 'user',
'content': f'上下文:\n{context}\n\n问题:{question}'
}
]
)
return response['message']['content']
# 使用
rag = SimpleRAG()
rag.add_documents([
"Ollama 是一个本地运行大语言模型的工具,支持多种开源模型。",
"Ollama 支持 Llama、Mistral、Gemma 等多种模型系列。",
"Ollama 提供 REST API,可以通过 HTTP 请求调用。",
"Ollama 支持流式输出,可以实时显示生成内容。"
])
answer = rag.query("Ollama 支持哪些模型?")
print(answer)
from typing import List
def split_text(text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
chunks = []
start = 0
while start < len(text):
end = start + chunk_size
if end < len(text):
last_period = text.rfind('。', start, end)
last_newline = text.rfind('\n', start, end)
split_point = max(last_period, last_newline)
if split_point > start:
end = split_point + 1
chunk = text[start:end].strip()
if chunk:
chunks.append(chunk)
start = end - overlap if end < len(text) else end
return chunks
class DocumentRAG(SimpleRAG):
def add_document_file(self, filepath: str, chunk_size: int = 500):
with open(filepath, 'r', encoding='utf-8') as f:
text = f.read()
chunks = split_text(text, chunk_size)
for i, chunk in enumerate(chunks):
self.add_document(chunk, {
'source': filepath,
'chunk': i
})
# 使用
rag = DocumentRAG()
rag.add_document_file('document.txt')
answer = rag.query("文档的主要内容是什么?")
print(answer)
import ollama
import numpy as np
class RAGWithSources:
def __init__(self, model='llama3.2', embed_model='nomic-embed-text'):
self.model = model
self.embed_model = embed_model
self.documents = []
def cosine_similarity(self, a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
def add_document(self, text, source=None):
response = ollama.embeddings(
model=self.embed_model,
prompt=text
)
self.documents.append({
'text': text,
'embedding': response['embedding'],
'source': source
})
def search(self, query, top_k=3):
response = ollama.embeddings(
model=self.embed_model,
prompt=query
)
query_embedding = response['embedding']
similarities = []
for i, doc in enumerate(self.documents):
sim = self.cosine_similarity(query_embedding, doc['embedding'])
similarities.append((i, sim))
similarities.sort(key=lambda x: x[1], reverse=True)
return [
{
'text': self.documents[i]['text'],
'score': sim,
'source': self.documents[i]['source']
}
for i, sim in similarities[:top_k]
]
def query_with_sources(self, question, top_k=3):
results = self.search(question, top_k)
context_parts = []
sources = []
for i, r in enumerate(results):
context_parts.append(f"[{i+1}] {r['text']}")
if r['source']:
sources.append(f"[{i+1}] {r['source']}")
context = "\n\n".join(context_parts)
response = ollama.chat(
model=self.model,
messages=[
{
'role': 'system',
'content': '根据提供的上下文回答问题,并在回答中引用来源编号(如[1]、[2])。'
},
{
'role': 'user',
'content': f'上下文:\n{context}\n\n问题:{question}'
}
]
)
return {
'answer': response['message']['content'],
'sources': sources,
'relevant_docs': results
}
# 使用
rag = RAGWithSources()
rag.add_document(
"Ollama 是一个本地运行大语言模型的工具。",
source="ollama-intro.txt"
)
rag.add_document(
"Ollama 支持 Llama、Mistral 等多种模型。",
source="ollama-models.txt"
)
result = rag.query_with_sources("什么是 Ollama?")
print(f"回答: {result['answer']}")
print(f"\n来源: {', '.join(result['sources'])}")
class ConversationalRAG(RAGWithSources):
def __init__(self, model='llama3.2', embed_model='nomic-embed-text'):
super().__init__(model, embed_model)
self.conversation_history = []
def chat(self, question, top_k=3):
results = self.search(question, top_k)
context = "\n\n".join([r['text'] for r in results])
messages = [
{
'role': 'system',
'content': '你是一个助手,根据提供的上下文回答问题。保持回答简洁准确。'
}
]
messages.extend(self.conversation_history)
messages.append({
'role': 'user',
'content': f'参考信息:\n{context}\n\n问题:{question}'
})
response = ollama.chat(
model=self.model,
messages=messages
)
reply = response['message']['content']
self.conversation_history.append({'role': 'user', 'content': question})
self.conversation_history.append({'role': 'assistant', 'content': reply})
return {
'answer': reply,
'sources': [r['source'] for r in results if r['source']]
}
def clear_history(self):
self.conversation_history = []
# 使用
rag = ConversationalRAG()
rag.add_document("Python 是一种高级编程语言,由 Guido van Rossum 创建。", source="python-intro.txt")
rag.add_document("Python 广泛用于 Web 开发、数据科学、人工智能等领域。", source="python-usage.txt")
result1 = rag.chat("Python 是什么?")
print(f"回答: {result1['answer']}")
result2 = rag.chat("它有哪些应用领域?")
print(f"回答: {result2['answer']}")