src/inference/services/llm_service.py
2,158 bytes · 60 lines · capsule://quake0day/[email protected]
raw on github
import grpc
from inference.core.registry import PluginRegistry
from inference.generated import llm_pb2, llm_pb2_grpc
from inference.plugins.llm.base import LLMPlugin
class LLMGRPCService(llm_pb2_grpc.LLMServiceServicer):
def __init__(self, registry: PluginRegistry) -> None:
self.registry = registry
def _get_plugin(self, provider: str = "") -> LLMPlugin:
provider = provider.strip()
if provider:
return self.registry.get(f"llm.{provider}")
plugin = self.registry.get_by_category("llm")
if plugin is None:
raise RuntimeError("No LLM plugin initialized")
return plugin
async def GenerateStream(self, request, context):
provider = request.config.provider if request.config else ""
try:
plugin = self._get_plugin(provider)
except (KeyError, RuntimeError) as exc:
await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(exc))
messages = []
has_images = False
for msg in request.messages:
images = [
{
"data": image.data,
"mime_type": image.mime_type,
"width": image.width,
"height": image.height,
"source": image.source,
"timestamp_ms": image.timestamp_ms,
"frame_seq": image.frame_seq,
}
for image in msg.images
]
has_images = has_images or bool(images)
item = {"role": msg.role, "content": msg.content}
if images:
item["images"] = images
messages.append(item)
if has_images and not getattr(plugin, "supports_images", False):
raise RuntimeError("Configured LLM plugin does not support image input")
async for chunk in plugin.generate_stream(messages):
yield llm_pb2.LLMChunk(
token=chunk.token,
accumulated_text=chunk.accumulated_text,
is_sentence_end=chunk.is_sentence_end,
is_final=chunk.is_final,
)