Analytics
Track prompt template performance with comprehensive metrics. Monitor success rates, latency, token usage, and errors to optimize your LLM applications.
Quick Start
from parsec.prompts import TemplateAnalytics, TemplateManager
from parsec.models.adapters import OpenAIAdapter
from parsec.validators import PydanticValidator
from parsec import EnforcementEngine
from pydantic import BaseModel
# Create analytics tracker
analytics = TemplateAnalytics()
# Create manager with analytics
manager = TemplateManager(analytics=analytics)
# Setup engine
adapter = OpenAIAdapter(api_key="your-key", model="gpt-4o-mini")
validator = PydanticValidator()
engine = EnforcementEngine(adapter, validator)
# Define schema
class UserInfo(BaseModel):
name: str
email: str
# Register template
manager.register_template(
name="user_extractor",
template="Extract user info: {text}",
version="1.0.0",
schema=UserInfo
)
# Use template - metrics tracked automatically
template = manager.get_template("user_extractor", "1.0.0")
result = await template.render(engine=engine, text="John at john@example.com")
# View metrics
metrics = analytics.get_metrics("user_extractor", "1.0.0")
print(f"Success rate: {metrics.success_rate:.2%}")
print(f"Average latency: {metrics.average_latency_ms:.0f}ms")
print(f"P95 latency: {metrics.p95_latency_ms:.0f}ms")Template Metrics
Core Metrics
metrics = analytics.get_metrics("email_generator", "2.1.0")
# Call statistics
print(f"Total calls: {metrics.total_calls}")
print(f"Successful: {metrics.successful_calls}")
print(f"Failed: {metrics.failed_calls}")
print(f"Success rate: {metrics.success_rate:.2%}")
# Token usage
print(f"Total tokens: {metrics.total_tokens}")
print(f"Average tokens/call: {metrics.average_tokens:.0f}")
# Latency
print(f"Average latency: {metrics.average_latency_ms:.0f}ms")
print(f"P95 latency: {metrics.p95_latency_ms:.0f}ms")
print(f"P99 latency: {metrics.p99_latency_ms:.0f}ms")
# Retries
print(f"Average retries: {metrics.average_retries:.2f}")Latency Percentiles
metrics = analytics.get_metrics("content_classifier")
# 95% of calls complete within this time
p95 = metrics.p95_latency_ms
print(f"P95 latency: {p95:.0f}ms")
# 99% of calls complete within this time
p99 = metrics.p99_latency_ms
print(f"P99 latency: {p99:.0f}ms")
# SLA monitoring
if p95 > 2000: # 2 second SLA
print("⚠️ P95 latency exceeds SLA")Error Analysis
metrics = analytics.get_metrics("json_extractor", "1.0.0")
# Get error breakdown
error_breakdown = metrics.get_error_breakdown()
for error_type, count in error_breakdown.items():
print(f"{error_type}: {count} occurrences")
# Example output:
# ValidationError: Field 'email' is required: 42 occurrences
# ValidationError: Invalid date format: 15 occurrences
# ValidationError: Value must be positive: 8 occurrencesTemplate Analytics
Recording Results Manually
Metrics are automatically tracked when using TemplateManager, but you can record manually:
from parsec.prompts import TemplateAnalytics
analytics = TemplateAnalytics()
# Record successful call
analytics.record_result(
template_name="summarizer",
version="1.0.0",
success=True,
tokens_used=1250,
latency_ms=850.5,
retry_count=0
)
# Record failed call
analytics.record_result(
template_name="summarizer",
version="1.0.0",
success=False,
tokens_used=300,
latency_ms=450.2,
retry_count=2,
validation_errors=["ValidationError: Field 'summary' is required"]
)Retrieving Metrics
# Get metrics for specific version
metrics = analytics.get_metrics("email_writer", "2.0.0")
if metrics:
print(f"Success rate: {metrics.success_rate:.2%}")
else:
print("No data for this version")
# Get combined metrics across all versions
all_versions = analytics.get_metrics("email_writer")
print(f"Total calls across all versions: {all_versions.total_calls}")
print(f"Overall success rate: {all_versions.success_rate:.2%}")Comparing Versions
# Get metrics for all versions
versions = analytics.compare_versions("content_generator")
for version, metrics in versions.items():
print(f"\nVersion {version}:")
print(f" Calls: {metrics.total_calls}")
print(f" Success rate: {metrics.success_rate:.2%}")
print(f" Avg latency: {metrics.average_latency_ms:.0f}ms")
print(f" Avg tokens: {metrics.average_tokens:.0f}")
# Example output:
# Version 1.0.0:
# Calls: 1523
# Success rate: 94.30%
# Avg latency: 1250ms
# Avg tokens: 850
#
# Version 2.0.0:
# Calls: 892
# Success rate: 97.80%
# Avg latency: 980ms
# Avg tokens: 720Finding Best Version
# Find version with highest success rate (default)
best = analytics.get_best_performing_version("summarizer")
print(f"Best version: {best.version}")
print(f"Success rate: {best.success_rate:.2%}")
# Find version with lowest latency
fastest = analytics.get_best_performing_version("summarizer", "average_latency_ms")
print(f"Fastest version: {fastest.version}")
print(f"Avg latency: {fastest.average_latency_ms:.0f}ms")
# Find version with lowest token usage
cheapest = analytics.get_best_performing_version("summarizer", "average_tokens")
print(f"Most efficient version: {cheapest.version}")
print(f"Avg tokens: {cheapest.average_tokens:.0f}")All Metrics
all_metrics = analytics.get_all_metrics()
for template_key, metrics in all_metrics.items():
print(f"{template_key}:")
print(f" Success rate: {metrics.success_rate:.2%}")
print(f" Total calls: {metrics.total_calls}")Performance Optimization
Find Slow Templates
def find_slow_templates(analytics: TemplateAnalytics, threshold_ms: float = 2000):
"""Find templates exceeding latency threshold."""
slow_templates = []
for template_key, metrics in analytics.get_all_metrics().items():
if metrics.p95_latency_ms > threshold_ms:
slow_templates.append({
"template": template_key,
"p95_latency": metrics.p95_latency_ms,
"avg_latency": metrics.average_latency_ms
})
slow_templates.sort(key=lambda x: x["p95_latency"], reverse=True)
return slow_templates
# Find and optimize slow templates
slow = find_slow_templates(analytics, threshold_ms=1500)
for template in slow:
print(f"{template['template']}: P95={template['p95_latency']:.0f}ms")Find High-Cost Templates
def find_expensive_templates(analytics: TemplateAnalytics, threshold_tokens: int = 1000):
"""Find templates with high token usage."""
expensive = []
for template_key, metrics in analytics.get_all_metrics().items():
if metrics.average_tokens > threshold_tokens:
expensive.append({
"template": template_key,
"avg_tokens": metrics.average_tokens,
"total_tokens": metrics.total_tokens
})
expensive.sort(key=lambda x: x["total_tokens"], reverse=True)
return expensive
# Find templates to optimize for cost
expensive = find_expensive_templates(analytics, threshold_tokens=800)
for template in expensive:
print(f"{template['template']}: avg={template['avg_tokens']:.0f}")Production Monitoring
Performance Alerts
class PerformanceAlerts:
def __init__(self, analytics: TemplateAnalytics):
self.analytics = analytics
self.thresholds = {
"success_rate": 0.95,
"p95_latency_ms": 2000,
"average_tokens": 1000
}
def check_alerts(self) -> List[dict]:
alerts = []
for template_key, metrics in self.analytics.get_all_metrics().items():
# Check success rate
if metrics.success_rate < self.thresholds["success_rate"]:
alerts.append({
"template": template_key,
"alert": "low_success_rate",
"value": metrics.success_rate
})
# Check latency
if metrics.p95_latency_ms > self.thresholds["p95_latency_ms"]:
alerts.append({
"template": template_key,
"alert": "high_latency",
"value": metrics.p95_latency_ms
})
# Check tokens
if metrics.average_tokens > self.thresholds["average_tokens"]:
alerts.append({
"template": template_key,
"alert": "high_token_usage",
"value": metrics.average_tokens
})
return alerts
# Check for issues
alerter = PerformanceAlerts(analytics)
alerts = alerter.check_alerts()
for alert in alerts:
print(f"⚠️ {alert['template']}: {alert['alert']}")
print(f" Value: {alert['value']:.2f}")Export Metrics
import json
from datetime import datetime
def export_metrics_json(analytics: TemplateAnalytics, filepath: str):
"""Export metrics to JSON for external analysis."""
data = {
"exported_at": datetime.now().isoformat(),
"templates": {}
}
for template_key, metrics in analytics.get_all_metrics().items():
data["templates"][template_key] = {
"total_calls": metrics.total_calls,
"success_rate": metrics.success_rate,
"average_latency_ms": metrics.average_latency_ms,
"p95_latency_ms": metrics.p95_latency_ms,
"p99_latency_ms": metrics.p99_latency_ms,
"average_tokens": metrics.average_tokens,
"total_tokens": metrics.total_tokens,
"error_breakdown": metrics.get_error_breakdown()
}
with open(filepath, "w") as f:
json.dump(data, f, indent=2)
# Export for analysis
export_metrics_json(analytics, "./metrics_export.json")API Reference
TemplateMetrics
@dataclass
class TemplateMetrics:
template_name: str
version: str
total_calls: int = 0
successful_calls: int = 0
failed_calls: int = 0
total_tokens: int = 0
total_latency_ms: float = 0.0
retry_counts: List[int]
validation_errors: List[str]
timestamps: List[datetime]
latencies: List[float]
@property
def success_rate(self) -> float:
"""Success rate between 0 and 1."""
@property
def average_latency_ms(self) -> float:
"""Average latency in milliseconds."""
@property
def average_tokens(self) -> float:
"""Average tokens per call."""
@property
def average_retries(self) -> float:
"""Average retries per call."""
@property
def p95_latency_ms(self) -> float:
"""95th percentile latency."""
@property
def p99_latency_ms(self) -> float:
"""99th percentile latency."""
def get_error_breakdown(self) -> Dict[str, int]:
"""Breakdown of validation errors."""TemplateAnalytics
class TemplateAnalytics:
def __init__(self)
def record_result(
self,
template_name: str,
version: str,
success: bool,
tokens_used: int,
latency_ms: float,
retry_count: int,
validation_errors: Optional[List[str]] = None
) -> None:
"""Record template execution result."""
def get_metrics(
self,
template_name: str,
version: Optional[str] = None
) -> Optional[TemplateMetrics]:
"""
Get metrics for template.
If version specified: returns metrics for that version
If version is None: returns aggregated metrics across all versions
"""
def get_all_metrics(self) -> Dict[str, TemplateMetrics]:
"""Get metrics for all templates and versions."""
def compare_versions(self, template_name: str) -> Dict[str, TemplateMetrics]:
"""Compare metrics across all versions of template."""
def get_best_performing_version(
self,
template_name: str,
metric: str = "success_rate"
) -> Optional[TemplateMetrics]:
"""Find best performing version for given metric."""Last updated on