You can't improve what you don't measure. Platform engineering requires a clear understanding of how the platform impacts developer productivity, system reliability, and business outcomes. The challenge is choosing the right metricsβmeasuring the wrong things leads to gaming metrics instead of genuine improvement.
In my experience, the most effective platform metrics focus on outcomes (what developers achieve) rather than outputs (what the platform does). DORA metrics have become the industry standard because they correlate strongly with organizational performance.
In Article 12: Building Platform Teams, we'll explore how to structure and scale platform teams, manage stakeholders, and apply the platform-as-product mindset.
"""
DORA metrics calculation and reporting.
"""
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from typing import List, Dict, Optional
from enum import Enum
import statistics
class PerformanceLevel(Enum):
ELITE = "elite"
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
@dataclass
class Deployment:
"""Represents a production deployment."""
id: str
team: str
service: str
timestamp: datetime
commit_sha: str
commit_timestamp: datetime
success: bool
caused_incident: bool = False
@dataclass
class Incident:
"""Represents a production incident."""
id: str
team: str
service: str
started_at: datetime
resolved_at: Optional[datetime]
caused_by_deployment: Optional[str] = None # Deployment ID
@dataclass
class DORAMetrics:
"""DORA metrics for a team."""
team: str
period_start: datetime
period_end: datetime
deployment_frequency: float # Deployments per day
lead_time_hours: float # Hours from commit to deploy
change_failure_rate: float # Percentage
mttr_hours: float # Mean time to restore in hours
deployment_frequency_level: PerformanceLevel = PerformanceLevel.LOW
lead_time_level: PerformanceLevel = PerformanceLevel.LOW
change_failure_level: PerformanceLevel = PerformanceLevel.LOW
mttr_level: PerformanceLevel = PerformanceLevel.LOW
def __post_init__(self):
"""Calculate performance levels."""
# Deployment Frequency
if self.deployment_frequency >= 1: # Multiple per day
self.deployment_frequency_level = PerformanceLevel.ELITE
elif self.deployment_frequency >= 0.14: # Daily-weekly
self.deployment_frequency_level = PerformanceLevel.HIGH
elif self.deployment_frequency >= 0.03: # Weekly-monthly
self.deployment_frequency_level = PerformanceLevel.MEDIUM
else:
self.deployment_frequency_level = PerformanceLevel.LOW
# Lead Time
if self.lead_time_hours < 1:
self.lead_time_level = PerformanceLevel.ELITE
elif self.lead_time_hours < 24 * 7: # Less than a week
self.lead_time_level = PerformanceLevel.HIGH
elif self.lead_time_hours < 24 * 30: # Less than a month
self.lead_time_level = PerformanceLevel.MEDIUM
else:
self.lead_time_level = PerformanceLevel.LOW
# Change Failure Rate
if self.change_failure_rate <= 15:
self.change_failure_level = PerformanceLevel.ELITE
elif self.change_failure_rate <= 30:
self.change_failure_level = PerformanceLevel.HIGH
elif self.change_failure_rate <= 45:
self.change_failure_level = PerformanceLevel.MEDIUM
else:
self.change_failure_level = PerformanceLevel.LOW
# MTTR
if self.mttr_hours < 1:
self.mttr_level = PerformanceLevel.ELITE
elif self.mttr_hours < 24:
self.mttr_level = PerformanceLevel.HIGH
elif self.mttr_hours < 24 * 7:
self.mttr_level = PerformanceLevel.MEDIUM
else:
self.mttr_level = PerformanceLevel.LOW
@property
def overall_level(self) -> PerformanceLevel:
"""Calculate overall performance level."""
levels = [
self.deployment_frequency_level,
self.lead_time_level,
self.change_failure_level,
self.mttr_level,
]
level_values = {
PerformanceLevel.ELITE: 4,
PerformanceLevel.HIGH: 3,
PerformanceLevel.MEDIUM: 2,
PerformanceLevel.LOW: 1,
}
avg = statistics.mean(level_values[l] for l in levels)
if avg >= 3.5:
return PerformanceLevel.ELITE
elif avg >= 2.5:
return PerformanceLevel.HIGH
elif avg >= 1.5:
return PerformanceLevel.MEDIUM
else:
return PerformanceLevel.LOW
class DORACalculator:
"""Calculate DORA metrics from deployment and incident data."""
def __init__(self):
self.deployments: List[Deployment] = []
self.incidents: List[Incident] = []
def add_deployment(self, deployment: Deployment):
"""Add a deployment to the dataset."""
self.deployments.append(deployment)
def add_incident(self, incident: Incident):
"""Add an incident to the dataset."""
self.incidents.append(incident)
def calculate(
self,
team: str,
period_start: datetime,
period_end: datetime,
) -> DORAMetrics:
"""Calculate DORA metrics for a team and period."""
team_deployments = [
d for d in self.deployments
if d.team == team
and period_start <= d.timestamp <= period_end
]
team_incidents = [
i for i in self.incidents
if i.team == team
and period_start <= i.started_at <= period_end
]
# Deployment Frequency
days = (period_end - period_start).days or 1
deployment_frequency = len(team_deployments) / days
# Lead Time for Changes
lead_times = [
(d.timestamp - d.commit_timestamp).total_seconds() / 3600
for d in team_deployments
if d.commit_timestamp
]
lead_time_hours = statistics.mean(lead_times) if lead_times else 0
# Change Failure Rate
failed_deployments = sum(1 for d in team_deployments if d.caused_incident)
change_failure_rate = (
(failed_deployments / len(team_deployments) * 100)
if team_deployments else 0
)
# Mean Time to Restore
restoration_times = [
(i.resolved_at - i.started_at).total_seconds() / 3600
for i in team_incidents
if i.resolved_at
]
mttr_hours = statistics.mean(restoration_times) if restoration_times else 0
return DORAMetrics(
team=team,
period_start=period_start,
period_end=period_end,
deployment_frequency=deployment_frequency,
lead_time_hours=lead_time_hours,
change_failure_rate=change_failure_rate,
mttr_hours=mttr_hours,
)
def generate_report(self, metrics: DORAMetrics) -> str:
"""Generate human-readable report."""
level_emoji = {
PerformanceLevel.ELITE: "π",
PerformanceLevel.HIGH: "β ",
PerformanceLevel.MEDIUM: "β οΈ",
PerformanceLevel.LOW: "β",
}
lines = [
f"DORA Metrics Report: {metrics.team}",
f"Period: {metrics.period_start.date()} to {metrics.period_end.date()}",
"=" * 50,
"",
f"Overall Performance: {level_emoji[metrics.overall_level]} {metrics.overall_level.value.upper()}",
"",
"Metrics:",
f" {level_emoji[metrics.deployment_frequency_level]} Deployment Frequency: {metrics.deployment_frequency:.2f}/day",
f" {level_emoji[metrics.lead_time_level]} Lead Time: {metrics.lead_time_hours:.1f} hours",
f" {level_emoji[metrics.change_failure_level]} Change Failure Rate: {metrics.change_failure_rate:.1f}%",
f" {level_emoji[metrics.mttr_level]} MTTR: {metrics.mttr_hours:.1f} hours",
]
return "\n".join(lines)
# Example usage
calc = DORACalculator()
# Add sample data
base_time = datetime.now() - timedelta(days=30)
for i in range(60): # 60 deployments in 30 days = 2/day
calc.add_deployment(Deployment(
id=f"deploy-{i}",
team="checkout",
service="checkout-api",
timestamp=base_time + timedelta(hours=i * 12),
commit_sha=f"abc{i}",
commit_timestamp=base_time + timedelta(hours=i * 12 - 2), # 2 hour lead time
success=True,
caused_incident=(i % 20 == 0), # 5% failure rate
))
for i in range(3):
calc.add_incident(Incident(
id=f"incident-{i}",
team="checkout",
service="checkout-api",
started_at=base_time + timedelta(days=i * 10),
resolved_at=base_time + timedelta(days=i * 10, hours=1), # 1 hour MTTR
caused_by_deployment=f"deploy-{i * 20}",
))
metrics = calc.calculate(
team="checkout",
period_start=base_time,
period_end=datetime.now(),
)
print(calc.generate_report(metrics))
"""
Developer experience survey and analysis.
"""
from dataclasses import dataclass
from typing import Dict, List, Optional
from enum import Enum
from datetime import datetime
import statistics
class SurveyCategory(Enum):
PLATFORM_USABILITY = "platform_usability"
DOCUMENTATION = "documentation"
DEPLOYMENT_EXPERIENCE = "deployment_experience"
SUPPORT_QUALITY = "support_quality"
SELF_SERVICE = "self_service"
OVERALL_SATISFACTION = "overall_satisfaction"
@dataclass
class SurveyQuestion:
"""A survey question."""
id: str
category: SurveyCategory
text: str
scale_min: int = 1
scale_max: int = 5
@dataclass
class SurveyResponse:
"""A survey response from a developer."""
respondent_id: str # Anonymous
team: str
timestamp: datetime
answers: Dict[str, int] # question_id -> score
comments: Optional[str] = None
class DeveloperExperienceSurvey:
"""Manage developer experience surveys."""
QUESTIONS = [
SurveyQuestion(
"q1", SurveyCategory.PLATFORM_USABILITY,
"How easy is it to deploy a new service?",
),
SurveyQuestion(
"q2", SurveyCategory.PLATFORM_USABILITY,
"How easy is it to find what you need in the developer portal?",
),
SurveyQuestion(
"q3", SurveyCategory.DOCUMENTATION,
"How helpful is the platform documentation?",
),
SurveyQuestion(
"q4", SurveyCategory.DOCUMENTATION,
"How up-to-date is the documentation?",
),
SurveyQuestion(
"q5", SurveyCategory.DEPLOYMENT_EXPERIENCE,
"How confident are you in the deployment process?",
),
SurveyQuestion(
"q6", SurveyCategory.DEPLOYMENT_EXPERIENCE,
"How quickly can you get feedback on deployment issues?",
),
SurveyQuestion(
"q7", SurveyCategory.SUPPORT_QUALITY,
"How satisfied are you with platform team responsiveness?",
),
SurveyQuestion(
"q8", SurveyCategory.SUPPORT_QUALITY,
"How helpful is the platform team in resolving issues?",
),
SurveyQuestion(
"q9", SurveyCategory.SELF_SERVICE,
"How often can you complete tasks without asking for help?",
),
SurveyQuestion(
"q10", SurveyCategory.SELF_SERVICE,
"How satisfied are you with the available golden paths?",
),
SurveyQuestion(
"q11", SurveyCategory.OVERALL_SATISFACTION,
"How likely are you to recommend the platform to a colleague?",
scale_min=0, scale_max=10, # NPS scale
),
SurveyQuestion(
"q12", SurveyCategory.OVERALL_SATISFACTION,
"Overall, how satisfied are you with the developer platform?",
),
]
def __init__(self):
self.responses: List[SurveyResponse] = []
self.questions_by_id = {q.id: q for q in self.QUESTIONS}
def add_response(self, response: SurveyResponse):
"""Add a survey response."""
self.responses.append(response)
def calculate_nps(self) -> Dict[str, float]:
"""Calculate Net Promoter Score."""
nps_question = "q11"
scores = [
r.answers[nps_question]
for r in self.responses
if nps_question in r.answers
]
if not scores:
return {"nps": 0, "promoters": 0, "passives": 0, "detractors": 0}
promoters = sum(1 for s in scores if s >= 9)
passives = sum(1 for s in scores if 7 <= s <= 8)
detractors = sum(1 for s in scores if s <= 6)
total = len(scores)
nps = ((promoters - detractors) / total) * 100
return {
"nps": nps,
"promoters": promoters / total * 100,
"passives": passives / total * 100,
"detractors": detractors / total * 100,
}
def calculate_category_scores(self) -> Dict[SurveyCategory, float]:
"""Calculate average score by category."""
category_scores: Dict[SurveyCategory, List[float]] = {
cat: [] for cat in SurveyCategory
}
for response in self.responses:
for q_id, score in response.answers.items():
if q_id in self.questions_by_id:
q = self.questions_by_id[q_id]
# Normalize to 0-100 scale
normalized = (score - q.scale_min) / (q.scale_max - q.scale_min) * 100
category_scores[q.category].append(normalized)
return {
cat: statistics.mean(scores) if scores else 0
for cat, scores in category_scores.items()
}
def calculate_team_scores(self) -> Dict[str, float]:
"""Calculate average score by team."""
team_scores: Dict[str, List[float]] = {}
for response in self.responses:
if response.team not in team_scores:
team_scores[response.team] = []
# Calculate average for this response
scores = list(response.answers.values())
if scores:
team_scores[response.team].append(statistics.mean(scores))
return {
team: statistics.mean(scores)
for team, scores in team_scores.items()
}
def identify_improvement_areas(self) -> List[tuple]:
"""Identify lowest scoring areas."""
question_scores: Dict[str, List[int]] = {}
for response in self.responses:
for q_id, score in response.answers.items():
if q_id not in question_scores:
question_scores[q_id] = []
question_scores[q_id].append(score)
averages = [
(
q_id,
self.questions_by_id[q_id].text,
statistics.mean(scores),
)
for q_id, scores in question_scores.items()
if q_id in self.questions_by_id
]
# Sort by score ascending (lowest first)
return sorted(averages, key=lambda x: x[2])[:5]
def generate_report(self) -> str:
"""Generate survey analysis report."""
lines = [
"Developer Experience Survey Report",
f"Responses: {len(self.responses)}",
"=" * 50,
"",
]
# NPS
nps = self.calculate_nps()
lines.append(f"Net Promoter Score: {nps['nps']:.0f}")
lines.append(f" Promoters: {nps['promoters']:.1f}%")
lines.append(f" Passives: {nps['passives']:.1f}%")
lines.append(f" Detractors: {nps['detractors']:.1f}%")
lines.append("")
# Category Scores
lines.append("Category Scores (0-100):")
for cat, score in self.calculate_category_scores().items():
emoji = "π’" if score >= 70 else "π‘" if score >= 50 else "π΄"
lines.append(f" {emoji} {cat.value}: {score:.1f}")
lines.append("")
# Improvement Areas
lines.append("Top 5 Improvement Areas:")
for q_id, text, score in self.identify_improvement_areas():
lines.append(f" - {text}: {score:.2f}/5")
return "\n".join(lines)
# Example usage
survey = DeveloperExperienceSurvey()
# Add sample responses
for i in range(50):
survey.add_response(SurveyResponse(
respondent_id=f"dev-{i}",
team=["checkout", "payments", "inventory"][i % 3],
timestamp=datetime.now(),
answers={
"q1": 4, "q2": 3, "q3": 4, "q4": 3, "q5": 4,
"q6": 3, "q7": 4, "q8": 4, "q9": 3, "q10": 4,
"q11": 8, "q12": 4,
},
))
print(survey.generate_report())