# Part 3: OOP, Dataclasses and Protocols

## Introduction

`ansible-inspec` has two major subsystems that need to share a common interface: the **Ansible adapter** (which builds playbooks and inventories) and the **InSpec adapter** (which parses Ruby-based profiles). Both must be swappable for testing purposes — I can inject a fake adapter in tests without touching real inventory files or Ruby parsers.

That design lives on three Python OOP features: `@dataclass`, `Protocol`, and standard class inheritance. This part covers each.

***

## Classes and `__init__`

The fundamental building block:

```python
class ProfileConverter:
    """Converts an InSpec profile directory into an Ansible collection."""

    # Class variable — shared across all instances
    supported_resources: list[str] = [
        "file", "service", "package",
        "security_policy", "registry_key", "audit_policy",
    ]

    def __init__(self, profile_path: str, namespace: str = "local") -> None:
        self.profile_path = profile_path
        self.namespace = namespace
        self._controls: list[dict] = []   # private by convention

    def load(self) -> None:
        """Parse controls from the profile directory."""
        import os
        for root, _, files in os.walk(self.profile_path):
            for f in files:
                if f.endswith(".rb"):
                    self._parse_file(os.path.join(root, f))

    def _parse_file(self, path: str) -> None:
        """Internal — parse a single .rb control file."""
        with open(path) as f:
            content = f.read()
        # Simplified: real impl does Ruby AST parsing
        self._controls.append({"path": path, "raw": content})

    def convert(self) -> dict:
        """Return the Ansible collection structure as a dict."""
        if not self._controls:
            self.load()
        return {
            "namespace": self.namespace,
            "controls": self._controls,
        }
```

***

## Inheritance

The converter has multiple output targets. Inheritance lets us share the parsing logic:

```python
class BaseConverter:
    def __init__(self, profile_path: str) -> None:
        self.profile_path = profile_path
        self._controls: list[dict] = []

    def load(self) -> None:
        import os
        for root, _, files in os.walk(self.profile_path):
            for f in files:
                if f.endswith(".rb"):
                    self._parse_file(os.path.join(root, f))

    def _parse_file(self, path: str) -> None:
        with open(path) as f:
            self._controls.append({"path": path, "raw": f.read()})

    def convert(self) -> dict:
        raise NotImplementedError("Subclasses must implement convert()")


class AnsibleCollectionConverter(BaseConverter):
    def __init__(self, profile_path: str, namespace: str, collection_name: str) -> None:
        super().__init__(profile_path)
        self.namespace = namespace
        self.collection_name = collection_name

    def convert(self) -> dict:
        if not self._controls:
            self.load()
        return {
            "type": "ansible_collection",
            "namespace": self.namespace,
            "name": self.collection_name,
            "roles": self._build_roles(),
        }

    def _build_roles(self) -> list[dict]:
        return [{"name": "compliance_check", "tasks": self._controls}]


class JSONExporter(BaseConverter):
    def convert(self) -> dict:
        if not self._controls:
            self.load()
        return {"type": "json_export", "controls": self._controls}
```

### `super()` — calling parent methods

```python
class TimedConverter(AnsibleCollectionConverter):
    def convert(self) -> dict:
        from datetime import datetime
        start = datetime.now()
        result = super().convert()           # call parent's convert()
        elapsed = (datetime.now() - start).total_seconds()
        result["conversion_time_seconds"] = elapsed
        return result
```

***

## `@dataclass`

`@dataclass` auto-generates `__init__`, `__repr__`, `__eq__` from field annotations. I use dataclasses for lightweight value objects that don't need Pydantic's validation overhead:

```python
from dataclasses import dataclass, field
from datetime import datetime

@dataclass
class ControlResult:
    control_id: str
    title: str
    status: str
    message: str | None = None

@dataclass
class HostReport:
    hostname: str
    controls: list[ControlResult] = field(default_factory=list)
    checked_at: datetime = field(default_factory=datetime.now)

    def passed(self) -> list[ControlResult]:
        return [c for c in self.controls if c.status == "passed"]

    def failed(self) -> list[ControlResult]:
        return [c for c in self.controls if c.status == "failed"]

    def summary(self) -> dict[str, int]:
        return {
            "passed": len(self.passed()),
            "failed": len(self.failed()),
            "total": len(self.controls),
        }


# Usage
report = HostReport(hostname="web-01")
report.controls.append(ControlResult("sshd-01", "PermitRootLogin", "passed"))
report.controls.append(ControlResult("pkg-01", "telnet not installed", "failed",
                                      message="telnetd 0.17 is installed"))
print(report.summary())  # {'passed': 1, 'failed': 1, 'total': 2}
```

### `@dataclass(frozen=True)` — immutable data

```python
from dataclasses import dataclass

@dataclass(frozen=True)
class ConnectionKey:
    """Immutable key used for connection pooling."""
    host: str
    port: int
    user: str

    def uri(self) -> str:
        return f"ssh://{self.user}@{self.host}:{self.port}"

key = ConnectionKey("web-01", 22, "deploy")
# key.host = "web-02"  # raises FrozenInstanceError
```

### `@dataclass` vs Pydantic

|                | `@dataclass`               | `Pydantic BaseModel`                        |
| -------------- | -------------------------- | ------------------------------------------- |
| Validation     | None (you add it manually) | Automatic on assignment                     |
| JSON serialise | Manual                     | `.model_dump_json()`                        |
| Performance    | Very fast                  | Slightly more overhead (still fast with v2) |
| Use for        | Internal value objects     | API request/response, config                |

***

## `Protocol` — Structural Subtyping

`Protocol` is Python's duck-typing formalised. Instead of requiring `isinstance` checks, you define the *interface*:

```python
from typing import Protocol, runtime_checkable

# Any class with these methods satisfies the protocol
@runtime_checkable
class InventoryLoader(Protocol):
    def load(self, path: str) -> dict[str, list[str]]: ...
    def validate(self) -> bool: ...


class AnsibleInventoryLoader:
    """Reads Ansible YAML inventory."""

    def load(self, path: str) -> dict[str, list[str]]:
        import yaml
        with open(path) as f:
            raw = yaml.safe_load(f)
        hosts = raw.get("all", {}).get("hosts", {})
        return {"hosts": list(hosts.keys())}

    def validate(self) -> bool:
        return True  # simplified


class StaticInventoryLoader:
    """For tests — returns a fixed inventory without reading files."""

    def __init__(self, hosts: list[str]) -> None:
        self._hosts = hosts

    def load(self, path: str) -> dict[str, list[str]]:
        return {"hosts": self._hosts}

    def validate(self) -> bool:
        return len(self._hosts) > 0


# A function that accepts any InventoryLoader
def run_compliance(
    loader: InventoryLoader,
    inventory_path: str,
    profile: str,
) -> dict:
    if not loader.validate():
        raise ValueError("Invalid inventory loader")
    inventory = loader.load(inventory_path)
    return {"profile": profile, "hosts": inventory["hosts"]}


# Both work — Protocol is satisfied structurally, no inheritance needed
real_loader = AnsibleInventoryLoader()
test_loader = StaticInventoryLoader(["web-01", "web-02"])

print(isinstance(real_loader, InventoryLoader))   # True (runtime_checkable)
print(isinstance(test_loader, InventoryLoader))   # True
```

This is exactly how `ansible-inspec` separates the real Ansible adapter from test doubles.

***

## Abstract Base Classes (ABC)

When you want to *enforce* that subclasses implement certain methods at class definition time (not at call time), use `ABC`:

```python
from abc import ABC, abstractmethod

class BaseReporter(ABC):
    """All reporters must implement these methods."""

    @abstractmethod
    def render(self, results: list[HostReport]) -> str:
        """Convert results to a string output."""
        ...

    @abstractmethod
    def save(self, output: str, path: str) -> None:
        """Save the rendered output to a file."""
        ...

    # Concrete method shared by all reporters
    def report(self, results: list[HostReport], path: str) -> None:
        output = self.render(results)
        self.save(output, path)


class JSONReporter(BaseReporter):
    def render(self, results: list[HostReport]) -> str:
        import json
        data = [
            {"host": r.hostname, "summary": r.summary()}
            for r in results
        ]
        return json.dumps(data, indent=2)

    def save(self, output: str, path: str) -> None:
        with open(path, "w") as f:
            f.write(output)


class CLIReporter(BaseReporter):
    def render(self, results: list[HostReport]) -> str:
        lines = []
        for r in results:
            s = r.summary()
            lines.append(
                f"{r.hostname}: {s['passed']} passed, {s['failed']} failed"
            )
        return "\n".join(lines)

    def save(self, output: str, path: str) -> None:
        print(output)   # CLI reporter just prints
```

### ABC vs Protocol

|                      | `ABC`                                              | `Protocol`                                          |
| -------------------- | -------------------------------------------------- | --------------------------------------------------- |
| Enforcement          | At class definition (`TypeError` on instantiation) | At type-check time (mypy/pyright)                   |
| Inheritance required | Yes                                                | No — structural match is enough                     |
| Runtime check        | `isinstance(x, ABC)`                               | `isinstance(x, Protocol)` with `@runtime_checkable` |
| Use for              | Shared base with concrete helpers                  | Interface contract without coupling                 |

***

## Class Methods and Static Methods

```python
from dataclasses import dataclass
from datetime import datetime
import json

@dataclass
class JobTemplate:
    name: str
    profile: str
    timeout: int = 300

    @classmethod
    def from_dict(cls, data: dict) -> "JobTemplate":
        """Alternative constructor from a raw dict."""
        return cls(
            name=data["name"],
            profile=data["profile"],
            timeout=data.get("timeout", 300),
        )

    @classmethod
    def from_json(cls, json_str: str) -> "JobTemplate":
        return cls.from_dict(json.loads(json_str))

    @staticmethod
    def is_valid_profile_name(name: str) -> bool:
        """Utility that doesn't need self or cls."""
        import re
        return bool(re.match(r"^[\w\-/]+$", name))


t = JobTemplate.from_dict({"name": "ssh-baseline", "profile": "dev-sec/ssh-baseline"})
print(JobTemplate.is_valid_profile_name("dev-sec/linux-baseline"))  # True
print(JobTemplate.is_valid_profile_name("bad name!"))               # False
```

***

## Properties

```python
from dataclasses import dataclass, field
from datetime import datetime

@dataclass
class JobExecution:
    job_id: str
    started_at: datetime | None = None
    finished_at: datetime | None = None
    _status: str = field(default="pending", repr=False)

    @property
    def status(self) -> str:
        return self._status

    @status.setter
    def status(self, value: str) -> None:
        allowed = {"pending", "running", "success", "failed"}
        if value not in allowed:
            raise ValueError(f"Invalid status: {value}. Must be one of {allowed}")
        self._status = value

    @property
    def duration(self) -> float | None:
        if self.started_at and self.finished_at:
            return (self.finished_at - self.started_at).total_seconds()
        return None


job = JobExecution(job_id="abc-123")
job.status = "running"
job.started_at = datetime.now()
# job.status = "invalid"  # raises ValueError
```

***

## Putting It Together

Here is a simplified slice of how `ansible-inspec`'s adapter layer is structured:

```python
from typing import Protocol
from dataclasses import dataclass, field

# ---- Domain types ----

@dataclass
class Profile:
    name: str
    path: str
    controls: list[str] = field(default_factory=list)

@dataclass
class CheckResult:
    host: str
    passed: int
    failed: int
    skipped: int

# ---- Protocols (interfaces) ----

class ProfileLoader(Protocol):
    def load_profile(self, path: str) -> Profile: ...

class ComplianceRunner(Protocol):
    def run(self, profile: Profile, hosts: list[str]) -> list[CheckResult]: ...

# ---- Concrete implementations ----

class LocalProfileLoader:
    def load_profile(self, path: str) -> Profile:
        import os
        controls = [
            f for f in os.listdir(path) if f.endswith(".rb")
        ]
        return Profile(name=os.path.basename(path), path=path, controls=controls)

class AnsibleRunner:
    def run(self, profile: Profile, hosts: list[str]) -> list[CheckResult]:
        # In real code this shells out to ansible-playbook
        return [
            CheckResult(host=h, passed=len(profile.controls),
                        failed=0, skipped=0)
            for h in hosts
        ]

# ---- Orchestrator that depends only on protocols ----

class ComplianceOrchestrator:
    def __init__(self, loader: ProfileLoader, runner: ComplianceRunner) -> None:
        self._loader = loader
        self._runner = runner

    def execute(self, profile_path: str, hosts: list[str]) -> list[CheckResult]:
        profile = self._loader.load_profile(profile_path)
        return self._runner.run(profile, hosts)


# Production
orchestrator = ComplianceOrchestrator(
    loader=LocalProfileLoader(),
    runner=AnsibleRunner(),
)

# Test — inject fakes, no file system needed
class FakeLoader:
    def load_profile(self, path: str) -> Profile:
        return Profile(name="test", path=path, controls=["ctrl-01"])

class FakeRunner:
    def run(self, profile: Profile, hosts: list[str]) -> list[CheckResult]:
        return [CheckResult(h, 1, 0, 0) for h in hosts]

test_orchestrator = ComplianceOrchestrator(FakeLoader(), FakeRunner())
```

***

## Summary

| Concept                   | When to use                                           |
| ------------------------- | ----------------------------------------------------- |
| Plain class               | When you need methods and shared state                |
| `@dataclass`              | Lightweight value objects; auto-generates boilerplate |
| `@dataclass(frozen=True)` | Immutable value objects, safe dict keys               |
| `ABC`                     | Shared base with enforced abstract interface          |
| `Protocol`                | Interface-only contract without inheritance coupling  |
| `@classmethod`            | Alternative constructors                              |
| `@staticmethod`           | Utility functions tied to the class logically         |
| `@property`               | Computed attributes with validation on set            |

***

## What's Next

[Part 4](https://blog.htunnthuthu.com/getting-started/programming/python-101/python-101-part-4) covers `asyncio`, `async/await`, and how `ansible-inspec`'s FastAPI server handles concurrent compliance job execution without blocking the event loop.
