合理的超时和重试机制可以提高应用的稳定性和可靠性。
import ollama
def chat_with_timeout(model, messages, timeout=120):
try:
response = ollama.chat(
model=model,
messages=messages,
timeout=timeout
)
return response['message']['content']
except ollama.TimeoutError:
print(f"请求超时({timeout}秒)")
return None
# 使用
result = chat_with_timeout('llama3.2', [{'role': 'user', 'content': '你好'}], timeout=60)
import ollama
from typing import List
class AdaptiveTimeoutClient:
def __init__(
self,
model: str = 'llama3.2',
base_timeout: int = 60,
max_timeout: int = 300
):
self.model = model
self.base_timeout = base_timeout
self.max_timeout = max_timeout
self.timeout = base_timeout
def chat(self, messages: List[dict]) -> str:
try:
response = ollama.chat(
model=self.model,
messages=messages,
timeout=self.timeout
)
self.timeout = self.base_timeout
return response['message']['content']
except ollama.TimeoutError:
self.timeout = min(self.timeout * 2, self.max_timeout)
print(f"超时,增加超时时间到 {self.timeout} 秒")
raise
# 使用
client = AdaptiveTimeoutClient(base_timeout=30, max_timeout=120)
try:
result = client.chat([{'role': 'user', 'content': '你好'}])
except ollama.TimeoutError:
print("请求失败")
import ollama
import time
def chat_with_retry(
model: str,
messages: list,
max_retries: int = 3,
delay: float = 1.0
) -> str:
for attempt in range(max_retries):
try:
response = ollama.chat(model=model, messages=messages)
return response['message']['content']
except Exception as e:
if attempt < max_retries - 1:
print(f"重试 {attempt + 1}/{max_retries}: {e}")
time.sleep(delay * (attempt + 1))
else:
raise
# 使用
result = chat_with_retry('llama3.2', [{'role': 'user', 'content': '你好'}])
import ollama
import time
import random
from typing import Callable, Type, Tuple
class ExponentialBackoff:
def __init__(
self,
max_retries: int = 5,
base_delay: float = 1.0,
max_delay: float = 60.0,
jitter: bool = True
):
self.max_retries = max_retries
self.base_delay = base_delay
self.max_delay = max_delay
self.jitter = jitter
def get_delay(self, attempt: int) -> float:
delay = self.base_delay * (2 ** attempt)
delay = min(delay, self.max_delay)
if self.jitter:
delay *= random.uniform(0.5, 1.5)
return delay
def retry(self, func: Callable, *exceptions: Type[Exception]):
for attempt in range(self.max_retries):
try:
return func()
except exceptions as e:
if attempt < self.max_retries - 1:
delay = self.get_delay(attempt)
print(f"重试 {attempt + 1}/{self.max_retries},等待 {delay:.2f} 秒")
time.sleep(delay)
else:
raise
# 使用
backoff = ExponentialBackoff(max_retries=4, base_delay=1.0)
def make_request():
return ollama.chat(
model='llama3.2',
messages=[{'role': 'user', 'content': '你好'}]
)['message']['content']
result = backoff.retry(make_request, ollama.ResponseError, ollama.TimeoutError)
import ollama
import time
from typing import Optional
class RetryPolicy:
def __init__(
self,
max_retries: int = 3,
timeout: int = 120,
backoff_factor: float = 2.0
):
self.max_retries = max_retries
self.timeout = timeout
self.backoff_factor = backoff_factor
def should_retry(self, error: Exception, attempt: int) -> bool:
if attempt >= self.max_retries:
return False
if isinstance(error, ollama.TimeoutError):
return True
if isinstance(error, ollama.ResponseError):
return True
return False
def get_delay(self, attempt: int) -> float:
return self.backoff_factor ** attempt
class ResilientClient:
def __init__(
self,
model: str = 'llama3.2',
retry_policy: RetryPolicy = None
):
self.model = model
self.retry_policy = retry_policy or RetryPolicy()
def chat(self, messages: list) -> str:
for attempt in range(self.retry_policy.max_retries):
try:
response = ollama.chat(
model=self.model,
messages=messages,
timeout=self.retry_policy.timeout
)
return response['message']['content']
except Exception as e:
if self.retry_policy.should_retry(e, attempt):
delay = self.retry_policy.get_delay(attempt)
print(f"重试 {attempt + 1},等待 {delay:.2f} 秒")
time.sleep(delay)
else:
raise
# 使用
policy = RetryPolicy(max_retries=4, timeout=60)
client = ResilientClient('llama3.2', policy)
result = client.chat([{'role': 'user', 'content': '你好'}])
import time
from enum import Enum
from typing import Callable
class CircuitState(Enum):
CLOSED = 'closed'
OPEN = 'open'
HALF_OPEN = 'half_open'
class CircuitBreaker:
def __init__(
self,
failure_threshold: int = 5,
recovery_timeout: int = 60
):
self.failure_threshold = failure_threshold
self.recovery_timeout = recovery_timeout
self.state = CircuitState.CLOSED
self.failure_count = 0
self.last_failure_time = 0
def call(self, func: Callable):
if self.state == CircuitState.OPEN:
if time.time() - self.last_failure_time > self.recovery_timeout:
self.state = CircuitState.HALF_OPEN
else:
raise Exception("断路器开启,拒绝请求")
try:
result = func()
if self.state == CircuitState.HALF_OPEN:
self.state = CircuitState.CLOSED
self.failure_count = 0
return result
except Exception as e:
self.failure_count += 1
self.last_failure_time = time.time()
if self.failure_count >= self.failure_threshold:
self.state = CircuitState.OPEN
raise
# 使用
breaker = CircuitBreaker(failure_threshold=3, recovery_timeout=30)
def protected_call():
return ollama.chat(
model='llama3.2',
messages=[{'role': 'user', 'content': '你好'}]
)
try:
result = breaker.call(protected_call)
except Exception as e:
print(f"请求失败: {e}")
import ollama
import time
import random
from typing import List, Optional
class FaultTolerantClient:
def __init__(
self,
model: str = 'llama3.2',
max_retries: int = 3,
base_timeout: int = 60,
max_timeout: int = 300
):
self.model = model
self.max_retries = max_retries
self.base_timeout = base_timeout
self.max_timeout = max_timeout
self.stats = {
'total_requests': 0,
'successful': 0,
'failed': 0,
'retries': 0
}
def chat(self, messages: List[dict]) -> Optional[str]:
self.stats['total_requests'] += 1
timeout = self.base_timeout
for attempt in range(self.max_retries):
try:
response = ollama.chat(
model=self.model,
messages=messages,
timeout=timeout
)
self.stats['successful'] += 1
return response['message']['content']
except ollama.TimeoutError:
self.stats['retries'] += 1
timeout = min(timeout * 2, self.max_timeout)
if attempt < self.max_retries - 1:
delay = (2 ** attempt) + random.random()
time.sleep(delay)
except Exception as e:
if attempt < self.max_retries - 1:
delay = (2 ** attempt) + random.random()
time.sleep(delay)
else:
self.stats['failed'] += 1
raise
self.stats['failed'] += 1
return None
def get_stats(self) -> dict:
return self.stats.copy()
# 使用
client = FaultTolerantClient(max_retries=4, base_timeout=30)
result = client.chat([{'role': 'user', 'content': '你好'}])
print(result)
stats = client.get_stats()
print(f"成功率: {stats['successful'] / stats['total_requests'] * 100:.1f}%")