End-to-End Example

The Complete Workflow

Let's put everything together with a realistic example: Building and deploying a text classification model using Kubeflow and Python 3.12.

Project: Sentiment analysis for customer reviews

Requirements:

Automatically retrain weekly with new data
Deploy with 99.5% uptime
Scale based on traffic
Monitor performance and drift
Rollback capability

Project Structure

sentiment-classifier/
├── data/
│   ├── raw/
│   └── processed/
├── src/
│   ├── __init__.py
│   ├── data.py          # Data loading and validation
│   ├── features.py      # Feature engineering
│   ├── model.py         # Model training
│   └── inference.py     # Inference server
├── pipelines/
│   ├── training_pipeline.py
│   └── deployment_pipeline.py
├── tests/
│   ├── test_data.py
│   ├── test_model.py
│   └── test_inference.py
├── kubernetes/
│   ├── namespace.yaml
│   └── inference-service.yaml
├── Dockerfile
├── requirements.txt
├── setup.py
└── README.md

Step 1: Data Processing

# src/data.py (Python 3.12)
import pandas as pd
from pathlib import Path
from typing import Tuple
import logging

logger = logging.getLogger(__name__)

def load_data(filepath: str) -> pd.DataFrame:
    """Load data from CSV."""
    logger.info(f"Loading data from {filepath}")
    df = pd.read_csv(filepath)
    logger.info(f"Loaded {len(df)} rows")
    return df

def validate_data(df: pd.DataFrame) -> None:
    """Validate data schema and quality."""
    required_columns = ['text', 'sentiment']
    
    # Check columns
    missing = set(required_columns) - set(df.columns)
    if missing:
        raise ValueError(f"Missing columns: {missing}")
    
    # Check nulls
    null_counts = df[required_columns].isnull().sum()
    if null_counts.any():
        raise ValueError(f"Null values found: {null_counts.to_dict()}")
    
    # Check sentiment values
    valid_sentiments = {'positive', 'negative', 'neutral'}
    invalid = set(df['sentiment'].unique()) - valid_sentiments
    if invalid:
        raise ValueError(f"Invalid sentiments: {invalid}")
    
    logger.info("Data validation passed")

def clean_data(df: pd.DataFrame) -> pd.DataFrame:
    """Clean and preprocess data."""
    # Remove duplicates
    df = df.drop_duplicates(subset=['text'])
    
    # Remove empty text
    df = df[df['text'].str.strip() != '']
    
    # Normalize sentiment labels
    df['sentiment'] = df['sentiment'].str.lower()
    
    logger.info(f"Cleaned data: {len(df)} rows remaining")
    return df

def split_data(
    df: pd.DataFrame,
    test_size: float = 0.2,
    random_state: int = 42
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Split data into train and test."""
    from sklearn.model_selection import train_test_split
    
    train_df, test_df = train_test_split(
        df,
        test_size=test_size,
        random_state=random_state,
        stratify=df['sentiment']
    )
    
    logger.info(f"Train: {len(train_df)}, Test: {len(test_df)}")
    return train_df, test_df

Step 2: Feature Engineering

# src/features.py
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib
from pathlib import Path

class FeatureExtractor:
    def __init__(self, max_features: int = 10000):
        self.vectorizer = TfidfVectorizer(
            max_features=max_features,
            ngram_range=(1, 2),
            min_df=2,
            max_df=0.95
        )
    
    def fit_transform(self, texts):
        """Fit vectorizer and transform texts."""
        return self.vectorizer.fit_transform(texts)
    
    def transform(self, texts):
        """Transform texts using fitted vectorizer."""
        return self.vectorizer.transform(texts)
    
    def save(self, filepath: str):
        """Save vectorizer."""
        joblib.dump(self.vectorizer, filepath)
    
    @classmethod
    def load(cls, filepath: str):
        """Load vectorizer."""
        instance = cls()
        instance.vectorizer = joblib.load(filepath)
        return instance

Step 3: Model Training

# src/model.py
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import joblib
import json
from datetime import datetime

class SentimentModel:
    def __init__(self, C: float = 1.0, max_iter: int = 1000):
        self.model = LogisticRegression(
            C=C,
            max_iter=max_iter,
            random_state=42,
            n_jobs=-1
        )
        self.metadata = {}
    
    def train(self, X_train, y_train, X_test, y_test):
        """Train and evaluate model."""
        print("Training model...")
        self.model.fit(X_train, y_train)
        
        # Evaluate
        train_acc = accuracy_score(y_train, self.model.predict(X_train))
        test_acc = accuracy_score(y_test, self.model.predict(X_test))
        
        # Store metadata
        self.metadata = {
            'train_accuracy': float(train_acc),
            'test_accuracy': float(test_acc),
            'n_train_samples': X_train.shape[0],
            'n_test_samples': X_test.shape[0],
            'n_features': X_train.shape[1],
            'training_date': datetime.now().isoformat(),
            'model_type': 'LogisticRegression',
            'C': self.model.C
        }
        
        print(f"Train Accuracy: {train_acc:.3f}")
        print(f"Test Accuracy: {test_acc:.3f}")
        
        # Classification report
        y_pred = self.model.predict(X_test)
        print(classification_report(y_test, y_pred))
        
        return self.metadata
    
    def save(self, model_path: str, metadata_path: str = None):
        """Save model and metadata."""
        joblib.dump(self.model, model_path)
        
        if metadata_path:
            with open(metadata_path, 'w') as f:
                json.dump(self.metadata, f, indent=2)
    
    @classmethod
    def load(cls, model_path: str):
        """Load trained model."""
        instance = cls()
        instance.model = joblib.load(model_path)
        return instance

Step 4: Kubeflow Pipeline

# pipelines/training_pipeline.py
from kfp import dsl
from kfp.dsl import Output, Model, Dataset, Metrics

@dsl.component(
    base_image='python:3.12-slim',
    packages_to_install=[
        'pandas==2.1.0',
        'scikit-learn==1.3.0',
        'joblib==1.3.2'
    ]
)
def load_and_validate_data(
    data_url: str,
    output_data: Output[Dataset]
):
    """Load and validate data."""
    import pandas as pd
    from pathlib import Path
    
    # Download data
    df = pd.read_csv(data_url)
    
    # Validate
    required_columns = ['text', 'sentiment']
    assert all(col in df.columns for col in required_columns)
    
    # Clean
    df = df.drop_duplicates(subset=['text'])
    df = df[df['text'].str.strip() != '']
    
    # Save
    output_path = Path(output_data.path)
    df.to_csv(output_path, index=False)
    
    print(f"Processed {len(df)} rows")

@dsl.component(
    base_image='python:3.12-slim',
    packages_to_install=['pandas==2.1.0', 'scikit-learn==1.3.0', 'joblib==1.3.2']
)
def train_model(
    input_data: dsl.Input[Dataset],
    model_output: Output[Model],
    metrics: Output[Metrics],
    C: float = 1.0
):
    """Train sentiment model."""
    import pandas as pd
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.linear_model import LogisticRegression
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import accuracy_score
    import joblib
    from pathlib import Path
    
    # Load data
    df = pd.read_csv(input_data.path)
    
    # Features
    vectorizer = TfidfVectorizer(max_features=10000, ngram_range=(1, 2))
    X = vectorizer.fit_transform(df['text'])
    y = df['sentiment']
    
    # Split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )
    
    # Train
    model = LogisticRegression(C=C, max_iter=1000, random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate
    train_acc = accuracy_score(y_train, model.predict(X_train))
    test_acc = accuracy_score(y_test, model.predict(X_test))
    
    print(f"Train Accuracy: {train_acc:.3f}")
    print(f"Test Accuracy: {test_acc:.3f}")
    
    # Log metrics
    metrics.log_metric('train_accuracy', train_acc)
    metrics.log_metric('test_accuracy', test_acc)
    metrics.log_metric('n_samples', len(df))
    
    # Save model and vectorizer
    model_data = {
        'model': model,
        'vectorizer': vectorizer
    }
    joblib.dump(model_data, Path(model_output.path))
    
    # Metadata
    model_output.metadata['accuracy'] = test_acc
    model_output.metadata['framework'] = 'sklearn'

@dsl.component(
    base_image='python:3.12-slim',
    packages_to_install=['scikit-learn==1.3.0', 'joblib==1.3.2']
)
def validate_model(
    model: dsl.Input[Model],
    accuracy_threshold: float = 0.85
) -> str:
    """Validate model meets quality threshold."""
    accuracy = model.metadata.get('accuracy', 0)
    
    if accuracy < accuracy_threshold:
        raise ValueError(f"Model accuracy {accuracy:.3f} below threshold {accuracy_threshold}")
    
    print(f"Model validation passed: {accuracy:.3f}")
    return "PASSED"

@dsl.pipeline(
    name='sentiment-training-pipeline',
    description='End-to-end sentiment model training'
)
def training_pipeline(
    data_url: str = 's3://my-bucket/reviews.csv',
    model_C: float = 1.0,
    accuracy_threshold: float = 0.85
):
    """Complete training pipeline."""
    
    # Load and validate data
    data_task = load_and_validate_data(data_url=data_url)
    
    # Train model
    train_task = train_model(
        input_data=data_task.outputs['output_data'],
        C=model_C
    )
    
    # Validate model quality
    validate_task = validate_model(
        model=train_task.outputs['model_output'],
        accuracy_threshold=accuracy_threshold
    )
    
    return train_task.outputs['model_output']

if __name__ == '__main__':
    from kfp import compiler
    
    compiler.Compiler().compile(
        pipeline_func=training_pipeline,
        package_path='sentiment_pipeline.yaml'
    )
    
    print("Pipeline compiled successfully")

Step 5: Inference Service

# src/inference.py
from flask import Flask, request, jsonify
import joblib
import logging

app = Flask(__name__)
logging.basicConfig(level=logging.INFO)

# Load model at startup
model_data = joblib.load('model/model.pkl')
model = model_data['model']
vectorizer = model_data['vectorizer']

@app.route('/health', methods=['GET'])
def health():
    """Health check endpoint."""
    return jsonify({'status': 'healthy'}), 200

@app.route('/predict', methods=['POST'])
def predict():
    """Prediction endpoint."""
    try:
        data = request.json
        texts = data.get('texts', [])
        
        if not texts:
            return jsonify({'error': 'No texts provided'}), 400
        
        # Transform
        X = vectorizer.transform(texts)
        
        # Predict
        predictions = model.predict(X)
        probabilities = model.predict_proba(X)
        
        results = []
        for i, text in enumerate(texts):
            results.append({
                'text': text,
                'sentiment': predictions[i],
                'confidence': float(probabilities[i].max())
            })
        
        return jsonify({'predictions': results})
    
    except Exception as e:
        logging.error(f"Prediction error: {e}")
        return jsonify({'error': str(e)}), 500

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=8080)

Step 6: Dockerfile

FROM python:3.12-slim

WORKDIR /app

# Install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy application
COPY src/ ./src/
COPY model/ ./model/

# Expose ports
EXPOSE 8080

# Run server
CMD ["python", "src/inference.py"]

Step 7: Kubernetes Deployment

# kubernetes/inference-service.yaml
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
  name: sentiment-classifier
  namespace: ml-workspace
  annotations:
    autoscaling.knative.dev/target: "10"
    autoscaling.knative.dev/minScale: "2"
    autoscaling.knative.dev/maxScale: "20"
spec:
  predictor:
    containers:
    - name: kserve-container
      image: my-registry/sentiment-classifier:latest
      ports:
      - containerPort: 8080
        protocol: TCP
      resources:
        requests:
          cpu: "500m"
          memory: "1Gi"
        limits:
          cpu: "2"
          memory: "4Gi"
      livenessProbe:
        httpGet:
          path: /health
          port: 8080
        initialDelaySeconds: 10
        periodSeconds: 30
      readinessProbe:
        httpGet:
          path: /health
          port: 8080
        initialDelaySeconds: 5
        periodSeconds: 10

Step 8: Monitoring Setup

# Add to src/inference.py
from prometheus_client import Counter, Histogram, start_http_server

# Metrics
prediction_counter = Counter(
    'predictions_total',
    'Total predictions',
    ['sentiment']
)

prediction_latency = Histogram(
    'prediction_latency_seconds',
    'Prediction latency'
)

# Start metrics server
start_http_server(9090)

# Update predict function
@app.route('/predict', methods=['POST'])
@prediction_latency.time()
def predict():
    # ... existing code ...
    
    # Log predictions
    for pred in predictions:
        prediction_counter.labels(sentiment=pred).inc()
    
    return jsonify({'predictions': results})

Step 9: Scheduled Retraining

# kubernetes/cronjob-retrain.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
  name: weekly-retrain
  namespace: ml-workspace
spec:
  schedule: "0 2 * * 0"  # Every Sunday at 2 AM
  jobTemplate:
    spec:
      template:
        spec:
          containers:
          - name: trigger-pipeline
            image: python:3.12-slim
            command:
            - python
            - trigger_pipeline.py
            env:
            - name: KF_PIPELINES_ENDPOINT
              value: "http://ml-pipeline.kubeflow:8888"
          restartPolicy: OnFailure

Running the Complete Workflow

Deploy Everything

# 1. Build and push image
docker build -t my-registry/sentiment-classifier:v1.0.0 .
docker push my-registry/sentiment-classifier:v1.0.0

# 2. Compile pipeline
python pipelines/training_pipeline.py

# 3. Run training pipeline
kfp run submit \
  --pipeline-file sentiment_pipeline.yaml \
  --experiment-name sentiment-experiments \
  --run-name initial-training

# 4. Deploy inference service
kubectl apply -f kubernetes/inference-service.yaml

# 5. Setup monitoring
kubectl apply -f kubernetes/prometheus-config.yaml

# 6. Schedule retraining
kubectl apply -f kubernetes/cronjob-retrain.yaml

Test the Service

# Get service URL
SERVICE_URL=$(kubectl get inferenceservice sentiment-classifier \
  -n ml-workspace \
  -o jsonpath='{.status.url}')

# Test prediction
curl -X POST $SERVICE_URL/predict \
  -H "Content-Type: application/json" \
  -d '{
    "texts": [
      "This product is amazing! Best purchase ever.",
      "Terrible experience, would not recommend."
    ]
  }'

# Expected output:
# {
#   "predictions": [
#     {"text": "This product is...", "sentiment": "positive", "confidence": 0.95},
#     {"text": "Terrible experience...", "sentiment": "negative", "confidence": 0.89}
#   ]
# }

Key Takeaways

Start with a clear project structure
Automate the entire pipeline—data to deployment
Monitor everything from day one
Use version control for code, models, and configuration
Plan for retraining and updates

What We Built

✅ Automated data validation and processing
✅ Reproducible training pipeline
✅ Automated model validation
✅ Scalable inference service
✅ Prometheus monitoring
✅ Scheduled retraining
✅ CI/CD integration
✅ Production-ready deployment

This is a complete, production-ready MLOps system using Kubeflow and Python 3.12.

Congratulations! You've learned how to build end-to-end MLOps systems. Continue iterating, monitoring, and improving your ML workflows.

Resources:

PreviousCI/CD for ML NextArchitecture & Patterns

Last updated 1 month ago

hashtagThe Complete Workflow

hashtagProject Structure

hashtagStep 1: Data Processing

hashtagStep 2: Feature Engineering

hashtagStep 3: Model Training

hashtagStep 4: Kubeflow Pipeline

hashtagStep 5: Inference Service

hashtagStep 6: Dockerfile

hashtagStep 7: Kubernetes Deployment

hashtagStep 8: Monitoring Setup

hashtagStep 9: Scheduled Retraining

hashtagRunning the Complete Workflow

hashtagDeploy Everything

hashtagTest the Service

hashtagKey Takeaways

hashtagWhat We Built