流式处理让模型响应实时显示,提升用户体验。JavaScript SDK 完整支持流式操作。
import ollama from 'ollama'
const stream = await ollama.chat({
model: 'llama3.2',
messages: [{ role: 'user', content: '写一首诗' }],
stream: true
})
for await (const chunk of stream) {
if (chunk.message.content) {
process.stdout.write(chunk.message.content)
}
}
const stream = await ollama.generate({
model: 'llama3.2',
prompt: '用 JavaScript 写一个快速排序',
stream: true
})
for await (const chunk of stream) {
if (chunk.response) {
process.stdout.write(chunk.response)
}
}
async function streamChat(model, messages, callback) {
const stream = await ollama.chat({
model,
messages,
stream: true
})
let fullResponse = ''
for await (const chunk of stream) {
const content = chunk.message.content
if (content) {
fullResponse += content
if (callback) {
callback(content)
}
}
}
return fullResponse
}
// 使用
await streamChat('llama3.2', [{ role: 'user', content: '写一首诗' }], (text) => {
process.stdout.write(text)
})
class StreamingChat {
constructor(model = 'llama3.2', system = null) {
this.model = model
this.messages = []
if (system) {
this.messages.push({ role: 'system', content: system })
}
}
async send(content) {
this.messages.push({ role: 'user', content })
const stream = await ollama.chat({
model: this.model,
messages: this.messages,
stream: true
})
let fullResponse = ''
process.stdout.write('助手: ')
for await (const chunk of stream) {
const text = chunk.message.content
if (text) {
process.stdout.write(text)
fullResponse += text
}
}
console.log()
this.messages.push({ role: 'assistant', content: fullResponse })
return fullResponse
}
}
// 使用
const chat = new StreamingChat('llama3.2', '你是一个友好的助手')
await chat.send('你好')
await chat.send('写一首关于春天的诗')
const stream = await ollama.chat({
model: 'llama3.2',
messages: [{ role: 'user', content: '你好' }],
stream: true
})
for await (const chunk of stream) {
if (chunk.message.content) {
process.stdout.write(chunk.message.content)
}
if (chunk.done) {
console.log('\n---')
console.log(`总耗时: ${(chunk.total_duration / 1e9).toFixed(2)}秒`)
console.log(`生成token数: ${chunk.eval_count}`)
}
}
const stream = await ollama.chat({
model: 'llama3.2',
messages: [{ role: 'user', content: '写一首诗' }],
stream: true
})
const output = document.getElementById('output')
for await (const chunk of stream) {
if (chunk.message.content) {
output.textContent += chunk.message.content
}
}
import express from 'express'
import ollama from 'ollama'
const app = express()
app.use(express.json())
app.post('/chat/stream', async (req, res) => {
const { message } = req.body
res.setHeader('Content-Type', 'text/event-stream')
res.setHeader('Cache-Control', 'no-cache')
res.setHeader('Connection', 'keep-alive')
const stream = await ollama.chat({
model: 'llama3.2',
messages: [{ role: 'user', content: message }],
stream: true
})
for await (const chunk of stream) {
const content = chunk.message.content
if (content) {
res.write(`data: ${JSON.stringify({ content })}\n\n`)
}
}
res.write('data: [DONE]\n\n')
res.end()
})
app.listen(3000)
import ollama from 'ollama'
export async function POST(request) {
const { message } = await request.json()
const stream = await ollama.chat({
model: 'llama3.2',
messages: [{ role: 'user', content: message }],
stream: true
})
const encoder = new TextEncoder()
const readable = new ReadableStream({
async start(controller) {
for await (const chunk of stream) {
const content = chunk.message.content
if (content) {
controller.enqueue(encoder.encode(`data: ${JSON.stringify({ content })}\n\n`))
}
}
controller.enqueue(encoder.encode('data: [DONE]\n\n'))
controller.close()
}
})
return new Response(readable, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive'
}
})
}
const controller = new AbortController()
async function streamWithAbort() {
try {
const stream = await ollama.chat({
model: 'llama3.2',
messages: [{ role: 'user', content: '写一篇长文章' }],
stream: true
}, { signal: controller.signal })
for await (const chunk of stream) {
if (chunk.message.content) {
process.stdout.write(chunk.message.content)
}
}
} catch (error) {
if (error.name === 'AbortError') {
console.log('\n[已取消]')
} else {
throw error
}
}
}
// 3秒后取消
setTimeout(() => controller.abort(), 3000)
streamWithAbort()