与 LlamaIndex 集成

LlamaIndex 是专门用于构建 RAG 应用的框架,与 Ollama 集成简单高效。

安装依赖

pip install llama-index llama-index-llms-ollama llama-index-embeddings-ollama

基本使用

from llama_index.llms.ollama import Ollama

llm = Ollama(model="llama3.2", request_timeout=120.0)

response = llm.complete("你好,请介绍一下自己")
print(response.text)

流式输出

from llama_index.llms.ollama import Ollama

llm = Ollama(model="llama3.2")

for chunk in llm.stream_complete("写一首诗"):
    print(chunk.delta, end="", flush=True)

聊天模式

from llama_index.llms.ollama import Ollama
from llama_index.core.llms import ChatMessage

llm = Ollama(model="llama3.2")

messages = [
    ChatMessage(role="system", content="你是一个友好的助手"),
    ChatMessage(role="user", content="你好")
]

response = llm.chat(messages)
print(response.message.content)

嵌入模型

from llama_index.embeddings.ollama import OllamaEmbedding

embed_model = OllamaEmbedding(
    model_name="nomic-embed-text",
    base_url="http://localhost:11434"
)

embeddings = embed_model.get_text_embedding("Hello World")
print(f"嵌入维度: {len(embeddings)}")

简单 RAG

from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings

Settings.llm = Ollama(model="llama3.2")
Settings.embed_model = OllamaEmbedding(model_name="nomic-embed-text")

documents = SimpleDirectoryReader("./documents").load_data()

index = VectorStoreIndex.from_documents(documents)

query_engine = index.as_query_engine()

response = query_engine.query("文档的主要内容是什么?")
print(response)

内存向量存储

from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import VectorStoreIndex, Document, Settings

Settings.llm = Ollama(model="llama3.2")
Settings.embed_model = OllamaEmbedding(model_name="nomic-embed-text")

documents = [
    Document(text="Ollama 是一个本地运行大语言模型的工具。"),
    Document(text="Ollama 支持 Llama、Mistral 等多种模型。"),
    Document(text="Ollama 提供 REST API 接口。")
]

index = VectorStoreIndex.from_documents(documents)

query_engine = index.as_query_engine()

response = query_engine.query("Ollama 支持哪些模型?")
print(response)

对话式 RAG

from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import VectorStoreIndex, Document, Settings
from llama_index.core.memory import ChatMemoryBuffer

Settings.llm = Ollama(model="llama3.2")
Settings.embed_model = OllamaEmbedding(model_name="nomic-embed-text")

documents = [
    Document(text="Python 是一种高级编程语言。"),
    Document(text="Python 广泛用于 Web 开发和数据科学。")
]

index = VectorStoreIndex.from_documents(documents)

memory = ChatMemoryBuffer.from_defaults(token_limit=4096)

chat_engine = index.as_chat_engine(
    chat_mode="context",
    memory=memory,
    verbose=True
)

response1 = chat_engine.chat("Python 是什么?")
print(response1)

response2 = chat_engine.chat("它有哪些应用?")
print(response2)

自定义提示词

from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import VectorStoreIndex, Document, Settings, PromptTemplate

Settings.llm = Ollama(model="llama3.2")
Settings.embed_model = OllamaEmbedding(model_name="nomic-embed-text")

documents = [Document(text="Ollama 是本地 LLM 运行工具。")]
index = VectorStoreIndex.from_documents(documents)

qa_prompt = PromptTemplate(
    "根据以下上下文回答问题。如果上下文中没有相关信息,请说不知道。\n"
    "上下文:{context_str}\n"
    "问题:{query_str}\n"
    "回答:"
)

query_engine = index.as_query_engine(text_qa_template=qa_prompt)

response = query_engine.query("Ollama 是什么?")
print(response)