LlamaIndex 是专门用于构建 RAG 应用的框架,与 Ollama 集成简单高效。
pip install llama-index llama-index-llms-ollama llama-index-embeddings-ollama
from llama_index.llms.ollama import Ollama
llm = Ollama(model="llama3.2", request_timeout=120.0)
response = llm.complete("你好,请介绍一下自己")
print(response.text)
from llama_index.llms.ollama import Ollama
llm = Ollama(model="llama3.2")
for chunk in llm.stream_complete("写一首诗"):
print(chunk.delta, end="", flush=True)
from llama_index.llms.ollama import Ollama
from llama_index.core.llms import ChatMessage
llm = Ollama(model="llama3.2")
messages = [
ChatMessage(role="system", content="你是一个友好的助手"),
ChatMessage(role="user", content="你好")
]
response = llm.chat(messages)
print(response.message.content)
from llama_index.embeddings.ollama import OllamaEmbedding
embed_model = OllamaEmbedding(
model_name="nomic-embed-text",
base_url="http://localhost:11434"
)
embeddings = embed_model.get_text_embedding("Hello World")
print(f"嵌入维度: {len(embeddings)}")
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
Settings.llm = Ollama(model="llama3.2")
Settings.embed_model = OllamaEmbedding(model_name="nomic-embed-text")
documents = SimpleDirectoryReader("./documents").load_data()
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("文档的主要内容是什么?")
print(response)
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import VectorStoreIndex, Document, Settings
Settings.llm = Ollama(model="llama3.2")
Settings.embed_model = OllamaEmbedding(model_name="nomic-embed-text")
documents = [
Document(text="Ollama 是一个本地运行大语言模型的工具。"),
Document(text="Ollama 支持 Llama、Mistral 等多种模型。"),
Document(text="Ollama 提供 REST API 接口。")
]
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("Ollama 支持哪些模型?")
print(response)
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import VectorStoreIndex, Document, Settings
from llama_index.core.memory import ChatMemoryBuffer
Settings.llm = Ollama(model="llama3.2")
Settings.embed_model = OllamaEmbedding(model_name="nomic-embed-text")
documents = [
Document(text="Python 是一种高级编程语言。"),
Document(text="Python 广泛用于 Web 开发和数据科学。")
]
index = VectorStoreIndex.from_documents(documents)
memory = ChatMemoryBuffer.from_defaults(token_limit=4096)
chat_engine = index.as_chat_engine(
chat_mode="context",
memory=memory,
verbose=True
)
response1 = chat_engine.chat("Python 是什么?")
print(response1)
response2 = chat_engine.chat("它有哪些应用?")
print(response2)
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import VectorStoreIndex, Document, Settings, PromptTemplate
Settings.llm = Ollama(model="llama3.2")
Settings.embed_model = OllamaEmbedding(model_name="nomic-embed-text")
documents = [Document(text="Ollama 是本地 LLM 运行工具。")]
index = VectorStoreIndex.from_documents(documents)
qa_prompt = PromptTemplate(
"根据以下上下文回答问题。如果上下文中没有相关信息,请说不知道。\n"
"上下文:{context_str}\n"
"问题:{query_str}\n"
"回答:"
)
query_engine = index.as_query_engine(text_qa_template=qa_prompt)
response = query_engine.query("Ollama 是什么?")
print(response)