Part 7: Capacity Planning, Performance, and Chaos Engineering
Finding Limits Before Users Do
Load Testing with k6
Order Creation Load Test
// tests/load/order-creation.js
import http from 'k6/http';
import { check, sleep } from 'k6';
import { Rate, Trend } from 'k6/metrics';
const orderErrors = new Rate('order_errors');
const orderDuration = new Trend('order_duration', true);
// Test configuration: ramp up to 100 VUs over 2 minutes, hold 5 minutes, ramp down
export const options = {
stages: [
{ duration: '2m', target: 100 }, // Ramp up
{ duration: '5m', target: 100 }, // Steady state
{ duration: '1m', target: 0 }, // Ramp down
],
thresholds: {
http_req_failed: ['rate<0.01'], // < 1% error rate
http_req_duration: ['p(99)<500'], // p99 < 500ms
order_errors: ['rate<0.005'], // < 0.5% order errors
},
};
const BASE_URL = __ENV.BASE_URL || 'https://api.staging.go-reliable.dev';
export function setup() {
// Authenticate and return a token for all VUs to share
const res = http.post(`${BASE_URL}/auth/token`, JSON.stringify({
client_id: __ENV.CLIENT_ID,
client_secret: __ENV.CLIENT_SECRET,
}), { headers: { 'Content-Type': 'application/json' } });
check(res, { 'auth succeeded': (r) => r.status === 200 });
return { token: res.json('access_token') };
}
export default function (data) {
const payload = JSON.stringify({
amount: Math.floor(Math.random() * 10000) + 100, // Random amount 100-10100 cents
currency: 'USD',
});
const params = {
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${data.token}`,
},
};
const res = http.post(`${BASE_URL}/api/v1/orders`, payload, params);
const success = check(res, {
'status is 201': (r) => r.status === 201,
'has order id': (r) => r.json('id') !== null,
'latency < 300ms': (r) => r.timings.duration < 300,
});
orderErrors.add(!success);
orderDuration.add(res.timings.duration);
sleep(0.5); // Think time between requests per VU
}What the Load Test Revealed
Profiling Go Services in Production
Capturing Profiles
What pprof Found
Custom HPA for the Notification Worker
Prometheus Adapter Configuration
HPA Using Custom Metric
Chaos Engineering with Chaos Mesh
Installing Chaos Mesh
Experiment 1: Order Service Pod Failure
Experiment 2: Network Partition to Database
Experiment 3: CPU Stress on Notification Worker
Capacity Planning Summary
Service
Max Sustained RPS
Bottleneck
Mitigation
PreviousPart 6: Incident Management and On-Call AutomationNextPart 8: MLOps with KubeFlow — Training Pipelines on Kubernetes
Last updated