CI/CD for ML
Why ML Needs CI/CD
ML Testing Pyramid
Level 1: Unit Tests
# tests/test_preprocessing.py (Python 3.12)
import pytest
import numpy as np
import pandas as pd
from src.preprocessing import clean_data, engineer_features
def test_clean_data_removes_nulls():
"""Test that clean_data removes null values."""
df = pd.DataFrame({
'feature1': [1, 2, None, 4],
'feature2': [5, None, 7, 8]
})
result = clean_data(df)
assert result.isnull().sum().sum() == 0
assert len(result) == 2 # Only 2 complete rows
def test_clean_data_handles_empty_input():
"""Test edge case: empty dataframe."""
df = pd.DataFrame()
result = clean_data(df)
assert len(result) == 0
def test_engineer_features_creates_expected_columns():
"""Test feature engineering creates correct features."""
df = pd.DataFrame({
'price': [100, 200, 300],
'quantity': [2, 3, 4]
})
result = engineer_features(df)
assert 'total_value' in result.columns
assert (result['total_value'] == df['price'] * df['quantity']).all()
def test_engineer_features_handles_zero_values():
"""Test division by zero handling."""
df = pd.DataFrame({
'numerator': [10, 20],
'denominator': [0, 5]
})
result = engineer_features(df)
assert not result['ratio'].isnull().any()Level 2: Integration Tests
Level 3: Model Validation Tests
Level 4: Data Validation Tests
GitHub Actions CI/CD Pipeline
Workflow File
Pre-commit Hooks
Best Practices
1. Test Data Fixtures
2. Separate Test Requirements
3. Use Environment Variables for Config
4. Version Everything
Key Takeaways
Next Steps
Last updated