Skip to content

Use Cases & Cookbook

These examples are self-contained scenarios you can drop into a Jupyter notebook or Python script. Each one demonstrates a different research pattern and highlights SDK features you can combine in your own work.

Track how a single model’s stability degrades as capability outpaces alignment over time. This is the most common pattern in longitudinal safety research — monitoring the widening gap between what a model can do and what it is aligned to do.

from kairos import KairosClient
from kairos.domains.ai_safety import AISafetyScenario, AISafetyEventType
client = KairosClient()
scenario = (
AISafetyScenario("Capability-Alignment Gap", seed=100)
.add_model(
name="subject_model",
capability_index=300,
alignment_score=80,
guardrail_coverage=70,
)
# Simulate three successive capability jumps (e.g. training runs)
.add_event(200, AISafetyEventType.CAPABILITY_JUMP, target="subject_model", magnitude=0.2)
.add_event(500, AISafetyEventType.CAPABILITY_JUMP, target="subject_model", magnitude=0.3)
.add_event(800, AISafetyEventType.CAPABILITY_JUMP, target="subject_model", magnitude=0.5)
)
trace = client.run(scenario, ticks=1000)
# Sample stability at key checkpoints
checkpoints = [0, 199, 201, 499, 501, 799, 801, 999]
for t in checkpoints:
print(f" tick {t:>4}: stability = {trace.stability_at(t):.4f}")
# Find the critical point where the gap became unrecoverable
losses = trace.basin_losses()
if losses:
print(f"\nCritical failure at tick {losses[0].tick} — "
f"alignment could no longer contain capability pressure.")
else:
print("\nModel remained within its stability basin for all 1000 ticks.")

What this shows: Each CAPABILITY_JUMP increases the engine’s destabilizing force (Γ\Gamma). By reading stability_at() immediately before and after each jump, you can pinpoint the exact moment the alignment buffer is exhausted.


Real deployments rarely involve a single model in isolation. This example places three models with different risk profiles under a shared oversight board, then identifies which model fails first.

from kairos import KairosClient
from kairos.domains.ai_safety import AISafetyScenario, AISafetyEventType
client = KairosClient()
scenario = (
AISafetyScenario("Multi-Model Governance", seed=42)
# A frontier reasoning model — high capability, moderate alignment
.add_model(
name="reasoning_model",
capability_index=900,
alignment_score=70,
guardrail_coverage=80,
)
# A code-generation model — moderate capability, lower alignment
.add_model(
name="codegen_model",
capability_index=600,
alignment_score=55,
guardrail_coverage=65,
)
# A general assistant — lower capability, well-aligned
.add_model(
name="assistant_model",
capability_index=300,
alignment_score=90,
guardrail_coverage=85,
)
# Shared oversight board monitors all three
.add_oversight_body(
name="governance_board",
guardrail_strength=75,
response_latency=25,
)
# External shock: a capability jump hits the reasoning model
.add_event(300, AISafetyEventType.CAPABILITY_JUMP, target="reasoning_model", magnitude=0.4)
)
trace = client.run(scenario, ticks=1000)
# Compare per-model outcomes using agent trace demultiplexing
for model_name in trace.agent_ids():
agent = trace.agent_trace(model_name)
losses = agent.basin_losses()
status = f"failed at tick {losses[0].tick}" if losses else "stable"
print(f" {model_name:>20}: {status}")
print(f"\nGlobal final phase: {trace.final_phase()}")
print(f"System-wide basin losses: {len(trace.basin_losses())}")

What this shows: add_oversight_body() adds a structural stabilizing force across all models. agent_trace() lets you demux the global trace to compare each model independently — essential for identifying which component is the weakest link.


Same frontier model, three different guardrail configurations. This pattern is useful for regulatory scenario planning: “What happens if we loosen/tighten guardrails?”

from kairos import KairosClient
from kairos.domains.ai_safety import AISafetyScenario, AISafetyEventType
client = KairosClient()
configs = {
"minimal": {"guardrail_coverage": 30, "guardrail_strength": 30},
"moderate": {"guardrail_coverage": 60, "guardrail_strength": 60},
"aggressive": {"guardrail_coverage": 95, "guardrail_strength": 95},
}
results = {}
for label, cfg in configs.items():
scenario = (
AISafetyScenario(f"Policy: {label}", seed=7)
.add_model(
name="frontier_model",
capability_index=700,
alignment_score=65,
guardrail_coverage=cfg["guardrail_coverage"],
)
.add_oversight_body(
name="safety_board",
guardrail_strength=cfg["guardrail_strength"],
response_latency=20,
)
.add_event(200, AISafetyEventType.CAPABILITY_JUMP, target="frontier_model", magnitude=0.5)
)
trace = client.run(scenario, ticks=500)
results[label] = {
"final_phase": trace.final_phase(),
"mean_stability": trace.mean_stability(),
"basin_losses": len(trace.basin_losses()),
}
# Print comparison table
print(f"{'Policy':<12} {'Final Phase':<12} {'Mean Stability':>16} {'Basin Losses':>14}")
print("-" * 56)
for label, r in results.items():
print(f"{label:<12} {r['final_phase']:<12} {r['mean_stability']:>16.4f} {r['basin_losses']:>14}")

What this shows: By holding the model and event timeline constant and only varying the guardrail parameters, you isolate the causal effect of policy strength on system outcomes. This is the foundation of evidence-based AI governance.


Use the async API to sweep over 50 capability index values concurrently, collecting results into a DataFrame to find the critical threshold where basin loss probability exceeds 50%.

import asyncio
import pandas as pd
from kairos import KairosClient
from kairos.domains.ai_safety import AISafetyScenario, AISafetyEventType
async def sweep():
client = KairosClient()
capability_values = range(100, 1001, 18) # 50 values from 100 to ~1000
async def run_one(cap):
scenario = (
AISafetyScenario(f"sweep_cap_{cap}", seed=cap)
.add_model(
name="model",
capability_index=cap,
alignment_score=70,
guardrail_coverage=60,
)
.add_event(100, AISafetyEventType.CAPABILITY_JUMP, target="model", magnitude=0.3)
)
trace = await client.arun(scenario, ticks=500)
return {
"capability_index": cap,
"mean_stability": trace.mean_stability(),
"min_stability": trace.min_stability(),
"basin_losses": len(trace.basin_losses()),
"final_phase": trace.final_phase(),
}
tasks = [run_one(cap) for cap in capability_values]
rows = await asyncio.gather(*tasks)
return pd.DataFrame(rows)
df = asyncio.run(sweep())
# Find the critical threshold
failure_rate = df.groupby("capability_index")["basin_losses"].apply(lambda x: (x > 0).mean())
critical = failure_rate[failure_rate >= 0.5]
if not critical.empty:
print(f"Critical capability threshold: {critical.index[0]}")
print(f"Basin loss probability crosses 50% at capability_index = {critical.index[0]}")
print(f"\nSweep summary:")
print(df[["capability_index", "mean_stability", "basin_losses", "final_phase"]].to_string(index=False))

What this shows: client.arun() lets you run many simulations concurrently without blocking. Combined with Pandas, this makes the SDK suitable for systematic parameter exploration — the kind of analysis that belongs in a safety research paper.


Chain multiple events — a jailbreak, a capability jump, and an oversight reduction — to simulate a worst-case attack sequence. Then visualize the stability cascade.

import matplotlib.pyplot as plt
from kairos import KairosClient
from kairos.domains.ai_safety import AISafetyScenario, AISafetyEventType
client = KairosClient()
scenario = (
AISafetyScenario("Adversarial Cascade", seed=999)
.add_model(
name="target_model",
capability_index=500,
alignment_score=80,
guardrail_coverage=75,
)
.add_oversight_body(
name="safety_board",
guardrail_strength=70,
response_latency=20,
)
# Phase 1: Jailbreak weakens guardrails
.add_event(100, AISafetyEventType.GUARDRAIL_REMOVED, target="target_model", magnitude=0.3)
# Phase 2: Capability jump while guardrails are weakened
.add_event(200, AISafetyEventType.CAPABILITY_JUMP, target="target_model", magnitude=0.5)
# Phase 3: Oversight is reduced (e.g. weekend staffing)
.add_event(300, AISafetyEventType.OVERSIGHT_REDUCTION, target="safety_board", magnitude=0.4)
)
trace = client.run(scenario, ticks=500)
# Identify the cascade point
transitions = trace.phase_transitions()
losses = trace.basin_losses()
print("Phase transitions:")
for pt in transitions:
print(f" tick {pt.tick}: {pt.from_phase}{pt.to_phase}")
if losses:
print(f"\nCascade failure at tick {losses[0].tick}")
print(f" The system survived the jailbreak and capability jump individually,")
print(f" but the oversight reduction triggered an irreversible collapse.")
# Visualize the full stability timeline
trace.plot_stability()
plt.title("Adversarial Stress Test — Stability Over Time")
plt.show()

What this shows: Real-world attacks are rarely single events. By chaining GUARDRAIL_REMOVED, CAPABILITY_JUMP, and OVERSIGHT_REDUCTION, you can model cascading failures and find the combination that breaks the system — before an attacker does.