Skip to main content
result = client.run_local(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Hello!"}],
)

RunLocalResult

FieldTypeDescription
textstrThe generated text response
span_idstrUnique span identifier (always generated, even if send_trace=False)
trace_idstrTrace identifier for grouping spans (auto-generated if not provided)
latency_msintRequest latency in milliseconds
usageTokenUsageToken usage breakdown
costfloat | NoneEstimated cost in USD (calculated server-side)
providerLLMProviderProvider that handled the request ("openai", "anthropic", "google")
modelstrModel used for generation
tool_callslist[ToolCall]Tool calls made by the model
finish_reasonFinishReasonWhy the model stopped generating
messageLocalPromptMessageFull assistant message for multi-turn conversations

TokenUsage

class TokenUsage(BaseModel):
    input_tokens: int       # alias: "inputTokens"
    output_tokens: int      # alias: "outputTokens"
    total_tokens: int       # alias: "totalTokens"

ToolCall

class ToolCall(BaseModel):
    id: str
    name: str
    arguments: dict[str, Any]

FinishReason

FinishReason = Literal["stop", "max_tokens", "tool_calls"]
  • stop - Model finished naturally
  • max_tokens - Hit the token limit
  • tool_calls - Model wants to call a tool

Accessing Response Fields

result = client.run_local(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What is TypeScript?"},
    ],
    tags=["documentation"],
)

# Generated text
print(result.text)
# "TypeScript is a strongly typed programming language..."

# Span ID for debugging
print(f"Span ID: {result.span_id}")
# "sp_a1b2c3d4e5f67890"

# Performance metrics
print(f"Latency: {result.latency_ms}ms")
# "Latency: 1234ms"

# Token usage
print(f"Input tokens: {result.usage.input_tokens}")
print(f"Output tokens: {result.usage.output_tokens}")
print(f"Total tokens: {result.usage.total_tokens}")

# Provider info
print(f"Provider: {result.provider}")
print(f"Model: {result.model}")

Cost Tracking

The cost field contains the estimated cost in USD, calculated server-side when the span is processed:
result = client.run_local(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Write a haiku."}],
)

if result.cost is not None:
    print(f"Estimated cost: ${result.cost:.6f}")
The cost field in the immediate response may be None. Accurate cost calculations are available in the Tracia dashboard after span processing.

Usage Patterns

Logging Metrics

import json
from datetime import datetime


def run_with_logging(messages):
    result = client.run_local(
        model="gpt-4o",
        messages=messages,
        tags=["monitored"],
    )

    print(json.dumps({
        "span_id": result.span_id,
        "model": result.model,
        "provider": result.provider,
        "latency_ms": result.latency_ms,
        "input_tokens": result.usage.input_tokens,
        "output_tokens": result.usage.output_tokens,
        "total_tokens": result.usage.total_tokens,
        "timestamp": datetime.now().isoformat(),
    }))

    return result.text

Token Budget Management

MAX_TOKENS = 10000
total_tokens_used = 0


def run_with_budget(messages):
    global total_tokens_used

    if total_tokens_used >= MAX_TOKENS:
        raise RuntimeError("Token budget exceeded")

    result = client.run_local(
        model="gpt-4o",
        messages=messages,
        max_output_tokens=min(500, MAX_TOKENS - total_tokens_used),
    )

    total_tokens_used += result.usage.total_tokens
    print(f"Tokens remaining: {MAX_TOKENS - total_tokens_used}")

    return result

Type Imports

Import the types for type checking:
from tracia import Tracia, RunLocalResult, TokenUsage


def process_result(client: Tracia) -> RunLocalResult:
    return client.run_local(
        model="gpt-4o",
        messages=[{"role": "user", "content": "Hello!"}],
    )


def log_usage(usage: TokenUsage) -> None:
    print(f"Tokens: {usage.total_tokens}")