src/server/internal/inference/client.go
2,537 bytes · 88 lines · capsule://quake0day/[email protected]
raw on github
package inference
import (
"context"
"fmt"
"time"
pb "github.com/cyberverse/server/internal/pb"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
healthpb "google.golang.org/grpc/health/grpc_health_v1"
"google.golang.org/grpc/keepalive"
)
// Client manages the gRPC connection to the Python inference server
// and provides typed access to all service clients.
type Client struct {
conn *grpc.ClientConn
avatar pb.AvatarServiceClient
llm pb.LLMServiceClient
rag pb.RAGServiceClient
tts pb.TTSServiceClient
asr pb.ASRServiceClient
voiceLLM pb.VoiceLLMServiceClient
}
// Compile-time check that Client implements InferenceService.
var _ InferenceService = (*Client)(nil)
// NewClient creates a new gRPC client connected to the inference server.
func NewClient(addr string) (*Client, error) {
opts := []grpc.DialOption{
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithKeepaliveParams(keepalive.ClientParameters{
Time: 60 * time.Second,
Timeout: 20 * time.Second,
PermitWithoutStream: true,
}),
grpc.WithDefaultCallOptions(
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB for video frames
grpc.MaxCallSendMsgSize(10*1024*1024),
),
}
conn, err := grpc.NewClient(addr, opts...)
if err != nil {
return nil, fmt.Errorf("failed to connect to inference server at %s: %w", addr, err)
}
return &Client{
conn: conn,
avatar: pb.NewAvatarServiceClient(conn),
llm: pb.NewLLMServiceClient(conn),
rag: pb.NewRAGServiceClient(conn),
tts: pb.NewTTSServiceClient(conn),
asr: pb.NewASRServiceClient(conn),
voiceLLM: pb.NewVoiceLLMServiceClient(conn),
}, nil
}
// Close shuts down the gRPC connection.
func (c *Client) Close() error {
if c.conn != nil {
return c.conn.Close()
}
return nil
}
// HealthCheck verifies the inference server is reachable.
func (c *Client) HealthCheck(ctx context.Context) error {
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
resp, err := healthpb.NewHealthClient(c.conn).Check(ctx, &healthpb.HealthCheckRequest{})
if err != nil {
return err
}
if resp.GetStatus() != healthpb.HealthCheckResponse_SERVING {
return fmt.Errorf("inference health status is %s", resp.GetStatus().String())
}
return nil
}
func (c *Client) AvatarInfo(ctx context.Context) (*pb.AvatarInfo, error) {
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
return c.avatar.GetInfo(ctx, &pb.GetInfoRequest{})
}