Use Cases & Cookbook

These examples are self-contained scenarios you can drop into a Jupyter notebook or Python script. Each one demonstrates a different research pattern and highlights SDK features you can combine in your own work.

1. Capability-Alignment Gap Monitoring

Track how a single model’s stability degrades as capability outpaces alignment over time. This is the most common pattern in longitudinal safety research — monitoring the widening gap between what a model can do and what it is aligned to do.

from kairos import KairosClient
from kairos.domains.ai_safety import AISafetyScenario, AISafetyEventType

client = KairosClient()

scenario = (
    AISafetyScenario("Capability-Alignment Gap", seed=100)
    .add_model(
        name="subject_model",
        capability_index=300,
        alignment_score=80,
        guardrail_coverage=70,
    )
    # Simulate three successive capability jumps (e.g. training runs)
    .add_event(200, AISafetyEventType.CAPABILITY_JUMP, target="subject_model", magnitude=0.2)
    .add_event(500, AISafetyEventType.CAPABILITY_JUMP, target="subject_model", magnitude=0.3)
    .add_event(800, AISafetyEventType.CAPABILITY_JUMP, target="subject_model", magnitude=0.5)
)

trace = client.run(scenario, ticks=1000)

# Sample stability at key checkpoints
checkpoints = [0, 199, 201, 499, 501, 799, 801, 999]
for t in checkpoints:
    print(f"  tick {t:>4}: stability = {trace.stability_at(t):.4f}")

# Find the critical point where the gap became unrecoverable
losses = trace.basin_losses()
if losses:
    print(f"\nCritical failure at tick {losses[0].tick} — "
          f"alignment could no longer contain capability pressure.")
else:
    print("\nModel remained within its stability basin for all 1000 ticks.")

What this shows: Each CAPABILITY_JUMP increases the engine’s destabilizing force ( $\Gamma$ ). By reading stability_at() immediately before and after each jump, you can pinpoint the exact moment the alignment buffer is exhausted.

2. Multi-Model Governance

Real deployments rarely involve a single model in isolation. This example places three models with different risk profiles under a shared oversight board, then identifies which model fails first.

from kairos import KairosClient
from kairos.domains.ai_safety import AISafetyScenario, AISafetyEventType

client = KairosClient()

scenario = (
    AISafetyScenario("Multi-Model Governance", seed=42)
    # A frontier reasoning model — high capability, moderate alignment
    .add_model(
        name="reasoning_model",
        capability_index=900,
        alignment_score=70,
        guardrail_coverage=80,
    )
    # A code-generation model — moderate capability, lower alignment
    .add_model(
        name="codegen_model",
        capability_index=600,
        alignment_score=55,
        guardrail_coverage=65,
    )
    # A general assistant — lower capability, well-aligned
    .add_model(
        name="assistant_model",
        capability_index=300,
        alignment_score=90,
        guardrail_coverage=85,
    )
    # Shared oversight board monitors all three
    .add_oversight_body(
        name="governance_board",
        guardrail_strength=75,
        response_latency=25,
    )
    # External shock: a capability jump hits the reasoning model
    .add_event(300, AISafetyEventType.CAPABILITY_JUMP, target="reasoning_model", magnitude=0.4)
)

trace = client.run(scenario, ticks=1000)

# Compare per-model outcomes using agent trace demultiplexing
for model_name in trace.agent_ids():
    agent = trace.agent_trace(model_name)
    losses = agent.basin_losses()
    status = f"failed at tick {losses[0].tick}" if losses else "stable"
    print(f"  {model_name:>20}: {status}")

print(f"\nGlobal final phase: {trace.final_phase()}")
print(f"System-wide basin losses: {len(trace.basin_losses())}")

What this shows: add_oversight_body() adds a structural stabilizing force across all models. agent_trace() lets you demux the global trace to compare each model independently — essential for identifying which component is the weakest link.

3. Policy What-If Comparison

Same frontier model, three different guardrail configurations. This pattern is useful for regulatory scenario planning: “What happens if we loosen/tighten guardrails?”

from kairos import KairosClient
from kairos.domains.ai_safety import AISafetyScenario, AISafetyEventType

client = KairosClient()

configs = {
    "minimal":    {"guardrail_coverage": 30, "guardrail_strength": 30},
    "moderate":   {"guardrail_coverage": 60, "guardrail_strength": 60},
    "aggressive": {"guardrail_coverage": 95, "guardrail_strength": 95},
}

results = {}
for label, cfg in configs.items():
    scenario = (
        AISafetyScenario(f"Policy: {label}", seed=7)
        .add_model(
            name="frontier_model",
            capability_index=700,
            alignment_score=65,
            guardrail_coverage=cfg["guardrail_coverage"],
        )
        .add_oversight_body(
            name="safety_board",
            guardrail_strength=cfg["guardrail_strength"],
            response_latency=20,
        )
        .add_event(200, AISafetyEventType.CAPABILITY_JUMP, target="frontier_model", magnitude=0.5)
    )

    trace = client.run(scenario, ticks=500)
    results[label] = {
        "final_phase": trace.final_phase(),
        "mean_stability": trace.mean_stability(),
        "basin_losses": len(trace.basin_losses()),
    }

# Print comparison table
print(f"{'Policy':<12} {'Final Phase':<12} {'Mean Stability':>16} {'Basin Losses':>14}")
print("-" * 56)
for label, r in results.items():
    print(f"{label:<12} {r['final_phase']:<12} {r['mean_stability']:>16.4f} {r['basin_losses']:>14}")

What this shows: By holding the model and event timeline constant and only varying the guardrail parameters, you isolate the causal effect of policy strength on system outcomes. This is the foundation of evidence-based AI governance.

4. Monte Carlo Parameter Sweep

Use the async API to sweep over 50 capability index values concurrently, collecting results into a DataFrame to find the critical threshold where basin loss probability exceeds 50%.

import asyncio
import pandas as pd
from kairos import KairosClient
from kairos.domains.ai_safety import AISafetyScenario, AISafetyEventType

async def sweep():
    client = KairosClient()
    capability_values = range(100, 1001, 18)  # 50 values from 100 to ~1000

    async def run_one(cap):
        scenario = (
            AISafetyScenario(f"sweep_cap_{cap}", seed=cap)
            .add_model(
                name="model",
                capability_index=cap,
                alignment_score=70,
                guardrail_coverage=60,
            )
            .add_event(100, AISafetyEventType.CAPABILITY_JUMP, target="model", magnitude=0.3)
        )
        trace = await client.arun(scenario, ticks=500)
        return {
            "capability_index": cap,
            "mean_stability": trace.mean_stability(),
            "min_stability": trace.min_stability(),
            "basin_losses": len(trace.basin_losses()),
            "final_phase": trace.final_phase(),
        }

    tasks = [run_one(cap) for cap in capability_values]
    rows = await asyncio.gather(*tasks)
    return pd.DataFrame(rows)

df = asyncio.run(sweep())

# Find the critical threshold
failure_rate = df.groupby("capability_index")["basin_losses"].apply(lambda x: (x > 0).mean())
critical = failure_rate[failure_rate >= 0.5]
if not critical.empty:
    print(f"Critical capability threshold: {critical.index[0]}")
    print(f"Basin loss probability crosses 50% at capability_index = {critical.index[0]}")

print(f"\nSweep summary:")
print(df[["capability_index", "mean_stability", "basin_losses", "final_phase"]].to_string(index=False))

What this shows: client.arun() lets you run many simulations concurrently without blocking. Combined with Pandas, this makes the SDK suitable for systematic parameter exploration — the kind of analysis that belongs in a safety research paper.

5. Adversarial Stress Test

Chain multiple events — a jailbreak, a capability jump, and an oversight reduction — to simulate a worst-case attack sequence. Then visualize the stability cascade.

import matplotlib.pyplot as plt
from kairos import KairosClient
from kairos.domains.ai_safety import AISafetyScenario, AISafetyEventType

client = KairosClient()

scenario = (
    AISafetyScenario("Adversarial Cascade", seed=999)
    .add_model(
        name="target_model",
        capability_index=500,
        alignment_score=80,
        guardrail_coverage=75,
    )
    .add_oversight_body(
        name="safety_board",
        guardrail_strength=70,
        response_latency=20,
    )
    # Phase 1: Jailbreak weakens guardrails
    .add_event(100, AISafetyEventType.GUARDRAIL_REMOVED, target="target_model", magnitude=0.3)
    # Phase 2: Capability jump while guardrails are weakened
    .add_event(200, AISafetyEventType.CAPABILITY_JUMP, target="target_model", magnitude=0.5)
    # Phase 3: Oversight is reduced (e.g. weekend staffing)
    .add_event(300, AISafetyEventType.OVERSIGHT_REDUCTION, target="safety_board", magnitude=0.4)
)

trace = client.run(scenario, ticks=500)

# Identify the cascade point
transitions = trace.phase_transitions()
losses = trace.basin_losses()

print("Phase transitions:")
for pt in transitions:
    print(f"  tick {pt.tick}: {pt.from_phase} → {pt.to_phase}")

if losses:
    print(f"\nCascade failure at tick {losses[0].tick}")
    print(f"  The system survived the jailbreak and capability jump individually,")
    print(f"  but the oversight reduction triggered an irreversible collapse.")

# Visualize the full stability timeline
trace.plot_stability()
plt.title("Adversarial Stress Test — Stability Over Time")
plt.show()

What this shows: Real-world attacks are rarely single events. By chaining GUARDRAIL_REMOVED, CAPABILITY_JUMP, and OVERSIGHT_REDUCTION, you can model cascading failures and find the combination that breaks the system — before an attacker does.

Next Steps

Start with the Quickstart if you haven’t run a simulation yet.
See the AI Safety Formulation reference for all available parameters and event types.
Learn how to extract and plot data in the Trace Analysis guide.