Files
exchange_go/services/binanceservice/error_handler_optimized.go
2025-08-27 14:54:03 +08:00

676 lines
15 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package binanceservice
import (
"context"
"errors"
"fmt"
"net"
"strings"
"time"
"github.com/go-admin-team/go-admin-core/logger"
"gorm.io/gorm"
)
// ErrorType 错误类型
type ErrorType string
const (
ErrorTypeNetwork ErrorType = "network"
ErrorTypeDatabase ErrorType = "database"
ErrorTypeBusiness ErrorType = "business"
ErrorTypeSystem ErrorType = "system"
ErrorTypeValidation ErrorType = "validation"
ErrorTypeTimeout ErrorType = "timeout"
ErrorTypeRateLimit ErrorType = "rate_limit"
ErrorTypeAuth ErrorType = "auth"
)
// ErrorSeverity 错误严重程度
type ErrorSeverity string
const (
SeverityLow ErrorSeverity = "low"
SeverityMedium ErrorSeverity = "medium"
SeverityHigh ErrorSeverity = "high"
SeverityCritical ErrorSeverity = "critical"
)
// ErrorAction 错误处理动作
type ErrorAction string
const (
ActionRetry ErrorAction = "retry"
ActionFallback ErrorAction = "fallback"
ActionAlert ErrorAction = "alert"
ActionIgnore ErrorAction = "ignore"
ActionCircuit ErrorAction = "circuit"
ActionDegrade ErrorAction = "degrade"
)
// ErrorInfo 错误信息
type ErrorInfo struct {
Type ErrorType `json:"type"`
Severity ErrorSeverity `json:"severity"`
Message string `json:"message"`
OriginalErr error `json:"-"`
Context string `json:"context"`
Timestamp time.Time `json:"timestamp"`
Retryable bool `json:"retryable"`
Action ErrorAction `json:"action"`
}
// RetryConfig 和 CircuitBreakerConfig 已在 config_optimized.go 中定义
// ErrorHandler 错误处理器
type ErrorHandler struct {
config *OptimizedConfig
metricsCollector *MetricsCollector
circuitBreakers map[string]*CircuitBreaker
}
// CircuitBreaker 熔断器
type CircuitBreaker struct {
name string
config CircuitBreakerConfig
state CircuitState
failureCount int
successCount int
lastFailTime time.Time
nextRetry time.Time
halfOpenCalls int
}
// CircuitState 熔断器状态
type CircuitState string
const (
StateClosed CircuitState = "closed"
StateOpen CircuitState = "open"
StateHalfOpen CircuitState = "half_open"
)
// NewErrorHandler 创建错误处理器
func NewErrorHandler(config *OptimizedConfig, metricsCollector *MetricsCollector) *ErrorHandler {
return &ErrorHandler{
config: config,
metricsCollector: metricsCollector,
circuitBreakers: make(map[string]*CircuitBreaker),
}
}
// ClassifyError 分类错误
func (eh *ErrorHandler) ClassifyError(err error, context string) *ErrorInfo {
if err == nil {
return nil
}
errorInfo := &ErrorInfo{
OriginalErr: err,
Message: err.Error(),
Context: context,
Timestamp: time.Now(),
}
// 根据错误类型分类
switch {
case isNetworkError(err):
errorInfo.Type = ErrorTypeNetwork
errorInfo.Severity = SeverityMedium
errorInfo.Retryable = true
errorInfo.Action = ActionRetry
case isDatabaseError(err):
errorInfo.Type = ErrorTypeDatabase
errorInfo.Severity = SeverityHigh
errorInfo.Retryable = isRetryableDatabaseError(err)
errorInfo.Action = ActionRetry
case isTimeoutError(err):
errorInfo.Type = ErrorTypeTimeout
errorInfo.Severity = SeverityMedium
errorInfo.Retryable = true
errorInfo.Action = ActionRetry
case isRateLimitError(err):
errorInfo.Type = ErrorTypeRateLimit
errorInfo.Severity = SeverityMedium
errorInfo.Retryable = true
errorInfo.Action = ActionRetry
case isAuthError(err):
errorInfo.Type = ErrorTypeAuth
errorInfo.Severity = SeverityHigh
errorInfo.Retryable = false
errorInfo.Action = ActionAlert
case isValidationError(err):
errorInfo.Type = ErrorTypeValidation
errorInfo.Severity = SeverityLow
errorInfo.Retryable = false
errorInfo.Action = ActionIgnore
case isBusinessError(err):
errorInfo.Type = ErrorTypeBusiness
errorInfo.Severity = SeverityMedium
errorInfo.Retryable = false
errorInfo.Action = ActionFallback
default:
errorInfo.Type = ErrorTypeSystem
errorInfo.Severity = SeverityHigh
errorInfo.Retryable = true
errorInfo.Action = ActionRetry
}
return errorInfo
}
// HandleError 处理错误
func (eh *ErrorHandler) HandleError(err error, context string) *ErrorInfo {
errorInfo := eh.ClassifyError(err, context)
if errorInfo == nil {
return nil
}
// 记录错误指标
if eh.metricsCollector != nil {
eh.metricsCollector.RecordError(string(errorInfo.Type), errorInfo.Message, context)
}
// 根据错误动作处理
switch errorInfo.Action {
case ActionAlert:
eh.sendAlert(errorInfo)
case ActionCircuit:
eh.triggerCircuitBreaker(context)
case ActionDegrade:
eh.enableDegradedMode(context)
}
// 记录日志
eh.logError(errorInfo)
return errorInfo
}
// RetryWithBackoff 带退避的重试
func (eh *ErrorHandler) RetryWithBackoff(ctx context.Context, operation func() error, config RetryConfig, context string) error {
var lastErr error
delay := config.RetryDelay
for attempt := 1; attempt <= config.MaxRetries; attempt++ {
// 检查上下文是否已取消
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// 检查熔断器状态
if !eh.canExecute(context) {
return fmt.Errorf("circuit breaker is open for %s", context)
}
// 执行操作
// 注意config中没有TimeoutPerTry字段这里暂时注释掉
// if config.TimeoutPerTry > 0 {
// var cancel context.CancelFunc
// operationCtx, cancel = context.WithTimeout(ctx, config.TimeoutPerTry)
// defer cancel()
// }
err := operation()
if err == nil {
// 成功,记录成功指标
if eh.metricsCollector != nil {
eh.metricsCollector.RecordRetry(true)
}
eh.recordSuccess(context)
return nil
}
lastErr = err
errorInfo := eh.ClassifyError(err, context)
// 记录失败
eh.recordFailure(context)
// 如果不可重试,直接返回
if errorInfo != nil && !errorInfo.Retryable {
logger.Warnf("不可重试的错误 [%s]: %v", context, err)
return err
}
// 最后一次尝试,不再等待
if attempt == config.MaxRetries {
break
}
// 计算下次重试延迟
nextDelay := eh.calculateDelay(delay, config)
logger.Infof("重试 %d/%d [%s] 在 %v 后,错误: %v", attempt, config.MaxRetries, context, nextDelay, err)
// 等待重试
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(nextDelay):
delay = nextDelay
}
}
// 记录重试失败指标
if eh.metricsCollector != nil {
eh.metricsCollector.RecordRetry(false)
}
return fmt.Errorf("重试 %d 次后仍然失败 [%s]: %w", config.MaxRetries, context, lastErr)
}
// calculateDelay 计算重试延迟
func (eh *ErrorHandler) calculateDelay(currentDelay time.Duration, config RetryConfig) time.Duration {
nextDelay := time.Duration(float64(currentDelay) * config.BackoffFactor)
// 限制最大延迟
if nextDelay > config.MaxRetryDelay {
nextDelay = config.MaxRetryDelay
}
// 添加10%抖动简化版本不依赖JitterEnabled字段
jitter := time.Duration(float64(nextDelay) * 0.1)
if jitter > 0 {
nextDelay += time.Duration(time.Now().UnixNano() % int64(jitter))
}
return nextDelay
}
// canExecute 检查是否可以执行操作(熔断器检查)
func (eh *ErrorHandler) canExecute(context string) bool {
cb := eh.getCircuitBreaker(context)
if cb == nil || !cb.config.Enabled {
return true
}
now := time.Now()
switch cb.state {
case StateClosed:
return true
case StateOpen:
if now.After(cb.nextRetry) {
cb.state = StateHalfOpen
cb.halfOpenCalls = 0
return true
}
return false
case StateHalfOpen:
return cb.halfOpenCalls < cb.config.HalfOpenMaxCalls
default:
return true
}
}
// recordSuccess 记录成功
func (eh *ErrorHandler) recordSuccess(context string) {
cb := eh.getCircuitBreaker(context)
if cb == nil || !cb.config.Enabled {
return
}
switch cb.state {
case StateHalfOpen:
cb.successCount++
if cb.successCount >= cb.config.SuccessThreshold {
cb.state = StateClosed
cb.failureCount = 0
cb.successCount = 0
}
case StateClosed:
cb.failureCount = 0
}
}
// recordFailure 记录失败
func (eh *ErrorHandler) recordFailure(context string) {
cb := eh.getCircuitBreaker(context)
if cb == nil || !cb.config.Enabled {
return
}
cb.failureCount++
cb.lastFailTime = time.Now()
switch cb.state {
case StateClosed:
if cb.failureCount >= cb.config.FailureThreshold {
cb.state = StateOpen
cb.nextRetry = time.Now().Add(cb.config.Timeout)
logger.Warnf("熔断器开启 [%s]: 失败次数 %d", context, cb.failureCount)
}
case StateHalfOpen:
cb.state = StateOpen
cb.nextRetry = time.Now().Add(cb.config.Timeout)
cb.successCount = 0
logger.Warnf("熔断器重新开启 [%s]", context)
}
}
// getCircuitBreaker 获取熔断器
func (eh *ErrorHandler) getCircuitBreaker(context string) *CircuitBreaker {
cb, exists := eh.circuitBreakers[context]
if !exists {
cb = &CircuitBreaker{
name: context,
config: eh.config.CircuitBreakerConfig,
state: StateClosed,
}
eh.circuitBreakers[context] = cb
}
return cb
}
// triggerCircuitBreaker 触发熔断器
func (eh *ErrorHandler) triggerCircuitBreaker(context string) {
cb := eh.getCircuitBreaker(context)
if cb != nil && cb.config.Enabled {
cb.state = StateOpen
cb.nextRetry = time.Now().Add(cb.config.Timeout)
logger.Warnf("手动触发熔断器 [%s]", context)
}
}
// sendAlert 发送告警
func (eh *ErrorHandler) sendAlert(errorInfo *ErrorInfo) {
// 这里可以集成告警系统,如钉钉、邮件等
logger.Errorf("告警: [%s] %s - %s", errorInfo.Type, errorInfo.Context, errorInfo.Message)
}
// enableDegradedMode 启用降级模式
func (eh *ErrorHandler) enableDegradedMode(context string) {
logger.Warnf("启用降级模式 [%s]", context)
// 这里可以实现具体的降级逻辑
}
// logError 记录错误日志
func (eh *ErrorHandler) logError(errorInfo *ErrorInfo) {
switch errorInfo.Severity {
case SeverityLow:
logger.Infof("[%s] %s: %s", errorInfo.Type, errorInfo.Context, errorInfo.Message)
case SeverityMedium:
logger.Warnf("[%s] %s: %s", errorInfo.Type, errorInfo.Context, errorInfo.Message)
case SeverityHigh, SeverityCritical:
logger.Errorf("[%s] %s: %s", errorInfo.Type, errorInfo.Context, errorInfo.Message)
}
}
// GetCircuitBreakerStatus 获取熔断器状态
func (eh *ErrorHandler) GetCircuitBreakerStatus() map[string]interface{} {
status := make(map[string]interface{})
for name, cb := range eh.circuitBreakers {
status[name] = map[string]interface{}{
"state": cb.state,
"failure_count": cb.failureCount,
"success_count": cb.successCount,
"last_fail_time": cb.lastFailTime,
"next_retry": cb.nextRetry,
}
}
return status
}
// 错误分类辅助函数
// isNetworkError 判断是否为网络错误
func isNetworkError(err error) bool {
if err == nil {
return false
}
// 检查网络相关错误
var netErr net.Error
if errors.As(err, &netErr) {
return true
}
// 检查常见网络错误消息
errorMsg := strings.ToLower(err.Error())
networkKeywords := []string{
"connection refused",
"connection reset",
"connection timeout",
"network unreachable",
"no route to host",
"dns",
"socket",
}
for _, keyword := range networkKeywords {
if strings.Contains(errorMsg, keyword) {
return true
}
}
return false
}
// isDatabaseError 判断是否为数据库错误
func isDatabaseError(err error) bool {
if err == nil {
return false
}
// 检查GORM错误
if errors.Is(err, gorm.ErrRecordNotFound) ||
errors.Is(err, gorm.ErrInvalidTransaction) ||
errors.Is(err, gorm.ErrNotImplemented) {
return true
}
// 检查数据库相关错误消息
errorMsg := strings.ToLower(err.Error())
dbKeywords := []string{
"database",
"sql",
"mysql",
"postgres",
"connection pool",
"deadlock",
"constraint",
"duplicate key",
}
for _, keyword := range dbKeywords {
if strings.Contains(errorMsg, keyword) {
return true
}
}
return false
}
// isRetryableDatabaseError 判断是否为可重试的数据库错误
func isRetryableDatabaseError(err error) bool {
if err == nil {
return false
}
// 不可重试的错误
if errors.Is(err, gorm.ErrRecordNotFound) {
return false
}
errorMsg := strings.ToLower(err.Error())
nonRetryableKeywords := []string{
"constraint",
"duplicate key",
"foreign key",
"syntax error",
}
for _, keyword := range nonRetryableKeywords {
if strings.Contains(errorMsg, keyword) {
return false
}
}
return true
}
// isTimeoutError 判断是否为超时错误
func isTimeoutError(err error) bool {
if err == nil {
return false
}
// 检查超时错误
var netErr net.Error
if errors.As(err, &netErr) && netErr.Timeout() {
return true
}
if errors.Is(err, context.DeadlineExceeded) {
return true
}
errorMsg := strings.ToLower(err.Error())
return strings.Contains(errorMsg, "timeout") ||
strings.Contains(errorMsg, "deadline exceeded")
}
// isRateLimitError 判断是否为限流错误
func isRateLimitError(err error) bool {
if err == nil {
return false
}
errorMsg := strings.ToLower(err.Error())
rateLimitKeywords := []string{
"rate limit",
"too many requests",
"429",
"quota exceeded",
"throttle",
}
for _, keyword := range rateLimitKeywords {
if strings.Contains(errorMsg, keyword) {
return true
}
}
return false
}
// isAuthError 判断是否为认证错误
func isAuthError(err error) bool {
if err == nil {
return false
}
errorMsg := strings.ToLower(err.Error())
authKeywords := []string{
"unauthorized",
"authentication",
"invalid signature",
"api key",
"401",
"403",
"forbidden",
}
for _, keyword := range authKeywords {
if strings.Contains(errorMsg, keyword) {
return true
}
}
return false
}
// isValidationError 判断是否为验证错误
func isValidationError(err error) bool {
if err == nil {
return false
}
errorMsg := strings.ToLower(err.Error())
validationKeywords := []string{
"validation",
"invalid parameter",
"bad request",
"400",
"missing required",
"invalid format",
}
for _, keyword := range validationKeywords {
if strings.Contains(errorMsg, keyword) {
return true
}
}
return false
}
// isBusinessError 判断是否为业务错误
func isBusinessError(err error) bool {
if err == nil {
return false
}
errorMsg := strings.ToLower(err.Error())
businessKeywords := []string{
"insufficient balance",
"order not found",
"position not found",
"invalid order status",
"market closed",
"symbol not found",
}
for _, keyword := range businessKeywords {
if strings.Contains(errorMsg, keyword) {
return true
}
}
return false
}
// 全局错误处理器实例
var GlobalErrorHandler *ErrorHandler
// InitErrorHandler 初始化错误处理器
func InitErrorHandler(config *OptimizedConfig, metricsCollector *MetricsCollector) {
GlobalErrorHandler = NewErrorHandler(config, metricsCollector)
}
// GetErrorHandler 获取全局错误处理器
func GetErrorHandler() *ErrorHandler {
return GlobalErrorHandler
}
// HandleErrorWithRetry 处理错误并重试的便捷函数
func HandleErrorWithRetry(ctx context.Context, operation func() error, context string) error {
if GlobalErrorHandler == nil {
return operation()
}
config := RetryConfig{
MaxRetries: 3,
RetryDelay: 100 * time.Millisecond,
MaxRetryDelay: 5 * time.Second,
BackoffFactor: 2.0,
ApiRetryCount: 3,
DbRetryCount: 3,
}
return GlobalErrorHandler.RetryWithBackoff(ctx, operation, config, context)
}