将 Ollama 集成到 Web 应用中,构建实用的 AI 服务。
from fastapi import FastAPI
from pydantic import BaseModel
import ollama
app = FastAPI(title="Ollama API")
class ChatRequest(BaseModel):
message: str
model: str = "llama3.2"
class ChatResponse(BaseModel):
response: str
@app.post("/chat", response_model=ChatResponse)
async def chat(request: ChatRequest):
response = ollama.chat(
model=request.model,
messages=[{"role": "user", "content": request.message}]
)
return ChatResponse(response=response["message"]["content"])
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
import ollama
import json
app = FastAPI()
class ChatRequest(BaseModel):
message: str
model: str = "llama3.2"
async def generate_stream(model: str, message: str):
stream = ollama.chat(
model=model,
messages=[{"role": "user", "content": message}],
stream=True
)
for chunk in stream:
if chunk["message"]["content"]:
yield f"data: {json.dumps({'content': chunk['message']['content']})}\n\n"
yield "data: [DONE]\n\n"
@app.post("/chat/stream")
async def chat_stream(request: ChatRequest):
return StreamingResponse(
generate_stream(request.model, request.message),
media_type="text/event-stream"
)
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional
import ollama
app = FastAPI(title="Ollama Chat Service")
class Message(BaseModel):
role: str
content: str
class ChatRequest(BaseModel):
messages: List[Message]
model: str = "llama3.2"
temperature: Optional[float] = 0.7
max_tokens: Optional[int] = None
class ChatResponse(BaseModel):
message: Message
model: str
@app.post("/chat", response_model=ChatResponse)
async def chat(request: ChatRequest):
try:
messages = [{"role": m.role, "content": m.content} for m in request.messages]
options = {}
if request.temperature is not None:
options["temperature"] = request.temperature
if request.max_tokens is not None:
options["num_predict"] = request.max_tokens
response = ollama.chat(
model=request.model,
messages=messages,
options=options if options else None
)
return ChatResponse(
message=Message(
role=response["message"]["role"],
content=response["message"]["content"]
),
model=request.model
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/models")
async def list_models():
models = ollama.list()
return {"models": [m["name"] for m in models["models"]]}
@app.get("/health")
async def health():
return {"status": "healthy"}
from flask import Flask, request, jsonify, Response
import ollama
import json
app = Flask(__name__)
@app.route("/chat", methods=["POST"])
def chat():
data = request.json
message = data.get("message", "")
model = data.get("model", "llama3.2")
response = ollama.chat(
model=model,
messages=[{"role": "user", "content": message}]
)
return jsonify({
"response": response["message"]["content"]
})
@app.route("/chat/stream", methods=["POST"])
def chat_stream():
data = request.json
message = data.get("message", "")
model = data.get("model", "llama3.2")
def generate():
stream = ollama.chat(
model=model,
messages=[{"role": "user", "content": message}],
stream=True
)
for chunk in stream:
if chunk["message"]["content"]:
yield f"data: {json.dumps({'content': chunk['message']['content']})}\n\n"
yield "data: [DONE]\n\n"
return Response(generate(), mimetype="text/event-stream")
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000)
<!DOCTYPE html>
<html>
<head>
<title>Ollama Chat</title>
<style>
#chat { height: 400px; overflow-y: auto; border: 1px solid #ccc; padding: 10px; }
.user { color: blue; }
.assistant { color: green; }
</style>
</head>
<body>
<div id="chat"></div>
<input type="text" id="message" placeholder="输入消息...">
<button onclick="send()">发送</button>
<script>
const chatDiv = document.getElementById('chat');
const messageInput = document.getElementById('message');
function addMessage(role, content) {
const div = document.createElement('div');
div.className = role;
div.textContent = `${role}: ${content}`;
chatDiv.appendChild(div);
chatDiv.scrollTop = chatDiv.scrollHeight;
}
async function send() {
const message = messageInput.value;
if (!message) return;
addMessage('user', message);
messageInput.value = '';
const response = await fetch('/chat/stream', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ message, model: 'llama3.2' })
});
const reader = response.body.getReader();
let assistantMessage = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
const text = new TextDecoder().decode(value);
const lines = text.split('\n');
for (const line of lines) {
if (line.startsWith('data: ') && !line.includes('[DONE]')) {
const data = JSON.parse(line.slice(6));
assistantMessage += data.content;
}
}
}
addMessage('assistant', assistantMessage);
}
messageInput.addEventListener('keypress', (e) => {
if (e.key === 'Enter') send();
});
</script>
</body>
</html>