capsule AI-native Unix-like composition layer

src/proto/voice_llm.proto

1,502 bytes · 71 lines · capsule://quake0day/[email protected] raw on github

syntax = "proto3";
package cyberverse;

option go_package = "github.com/cyberverse/server/internal/pb";

import "common.proto";

service VoiceLLMService {
  rpc Converse (stream VoiceLLMInput) returns (stream VoiceLLMOutput);
  rpc CheckVoice (CheckVoiceRequest) returns (CheckVoiceResponse);
  rpc Interrupt (InterruptRequest) returns (InterruptResponse);
}

message VoiceLLMInput {
  oneof input {
    AudioChunk audio = 1;
    string text = 2;
    VoiceLLMConfig config = 3;
    ImageFrame image = 4;
  }
}

message VoiceLLMOutput {
  AudioChunk audio = 1;
  string transcript = 2;
  string user_transcript = 3;
  bool is_final = 4;
  reserved 5; // removed: video comes from AvatarService.GenerateStream only
  string question_id = 6;
  string reply_id = 7;
  bool barge_in = 8;
  string task_event_json = 9;
}

message VoiceLLMConfig {
  string session_id = 1;
  string system_prompt = 2;
  string voice = 3;
  float temperature = 4;
  string bot_name = 5;
  string speaking_style = 6;
  string welcome_message = 7;
  repeated VoiceLLMDialogContextItem dialog_context = 8;
  string provider = 9;
  string character_id = 10;
  string character_dir = 11;
}

message VoiceLLMDialogContextItem {
  string role = 1;
  string text = 2;
  int64 timestamp = 3;
}

message CheckVoiceRequest {
  VoiceLLMConfig config = 1;
}

message CheckVoiceResponse {
  bool ok = 1;
  string provider_error = 2;
}

message InterruptRequest {
  string session_id = 1;
}

message InterruptResponse {
  bool success = 1;
}