{
  "id": "edge.26-04-24",
  "name": "edge.26-04-24",
  "summary": "Adds Temporal workflow orchestration, per-schema Vault roles for PostgreSQL clusters, AWS Service Quotas Automatic Management, and a new cluster reset command, alongside extensive reliability improvements to cluster and SSO installation flows.",
  "skip": false,
  "highlights": [
    "New `kube_temporal` module — deploy Temporal workflow orchestration with full Panfactum operational guarantees",
    "New `pf cluster reset` command — safely reset EKS clusters by removing default AWS resources",
    "`aws_account` now enables AWS Service Quotas Automatic Management for proactive quota monitoring and increase requests",
    "`kube_pg_cluster` gains per-schema Vault roles and automatic schema initialization — re-apply to enable new capabilities",
    "`authentik_vault_sso` now filters regex redirect URIs Vault cannot accept — re-apply `authentik_vault_sso` and `vault_auth_oidc` modules",
    "`kube_cilium` operator now runs 2 replicas on all clusters regardless of SLA target — re-apply `kube_cilium` to apply",
    "`kube_aws_ebs_csi` PodDisruptionBudget switched to server-side apply to fix Helm race condition — re-apply `kube_aws_ebs_csi`",
    "Authentik default login session extended from 8 hours to 30 days — matches industry norms while MFA remains enforced",
    "`pf cluster add` and `pf sso add` receive major reliability improvements including bootstrap anti-affinity bypass, idempotent re-runs, and pre-flight checks"
  ],
  "changes": [
    {
      "id": "d6aa8ca8-2613-424d-9e7e-e5faf480309a",
      "type": "improvement",
      "summary": "Extended default authentication session duration from 8 hours to 30 days in `authentik_core_resources`, keeping recovery flow sessions at 8 hours.",
      "description": "Users were being logged out every 8 hours, requiring multiple daily re-authentications. The 30-day session duration aligns with industry norms (Google, GitHub) while MFA and `terminate_other_sessions` already provide sufficient security. Recovery flow sessions intentionally remain at 8 hours. The implementation splits the shared login stage into separate stages for authentication and recovery flows, with a new `recovery_session_duration` variable to keep both durations independently configurable.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(authentik): extend auth session to 30 days, keep recovery at 8h",
          "link": "https://github.com/Panfactum/stack/commit/5c9b2c00bde6bd370ccb1c1b7544199a6b9620a2"
        },
        {
          "type": "internal-docs",
          "summary": "authentik_core_resources module documentation",
          "link": "/docs/main/modules/authentik_core_resources/overview"
        },
        {
          "type": "external-docs",
          "summary": "Authentik User Login stage — session duration configuration",
          "link": "https://docs.goauthentik.io/add-secure-apps/flows-stages/stages/user_login/"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "authentik_core_resources",
          "summary": "Extended default `session_duration` to 30 days and added `recovery_session_duration` variable for independent control of recovery flow session length"
        }
      ]
    },
    {
      "id": "2163b0c4-c8c8-45b7-be35-37f0bdd9f24a",
      "type": "update",
      "summary": "Updated Panfactum container image tag in `kube_constants` to `e61269a`, refreshing `panfactum`, `vault`, `bastion`, and `pvc-autoresizer` images.",
      "description": "The `kube_constants` module holds the canonical Panfactum image tag that all other infrastructure modules use when referencing Panfactum-built container images (devShell, Vault sidecar, bastion, and pvc-autoresizer). Bumping this tag to `e61269a` picks up CLI refactoring work that encodes devshell requirements in the command type hierarchy, eliminating a class of runtime crashes in CI environments, along with several other bug fixes merged since the previous tag.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "chore(kube_constants): bump kube image tag to e61269a",
          "link": "https://github.com/Panfactum/stack/commit/eaf701cf3b45b28fe4e35bb3e1df1f7aca679e98"
        },
        {
          "type": "internal-docs",
          "summary": "kube_constants module documentation",
          "link": "/docs/main/reference/infrastructure-modules/direct/kubernetes/kube_constants"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "kube_constants",
          "summary": "Updated pinned image tag to `e61269a` for all Panfactum-built container images (panfactum devShell, vault sidecar, bastion, and pvc-autoresizer)"
        }
      ]
    },
    {
      "id": "bf22b964-8004-4622-ae6f-40e7776c8255",
      "type": "fix",
      "summary": "Fixed `wf_dockerfile_build` `scale-buildkit` steps crashing with \"Devshell configuration file does not exist\" inside CI containers without a `panfactum.yaml`.",
      "description": "Several CLI commands that do not require devshell configuration were incorrectly typed as `PanfactumCommand<PanfactumContext>`, which exposes `devshellConfig` on `this.context`. TypeScript therefore accepted accesses to `devshellConfig` inside these commands without complaint. The production failure surfaced in `wf_dockerfile_build`: the `scale-buildkit` Argo Workflows step called `devshellConfig.repo_root` at runtime inside a CI container where no `panfactum.yaml` exists. Argo reported the step as successful, leaving the subsequent `build` step to fail with a confusing \"Devshell configuration file does not exist\" error that was difficult to trace back to its root cause. The fix splits `PanfactumCommand` into two base classes: `PanfactumLightCommand<PanfactumBaseContext>` for commands that do not need devshell config, and `PanfactumCommand<PanfactumContext>` for commands that do. Utility functions that previously read `context.devshellConfig.repo_root` now receive an explicit `workingDirectory` parameter instead. Any future access to `devshellConfig` from a light command is now a compile-time error rather than a silent runtime crash.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "refactor(cli): encode devshell requirement in command type hierarchy",
          "link": "https://github.com/Panfactum/stack/commit/e61269a70c071f8c41f16647f735c90bd372d6de"
        },
        {
          "type": "internal-docs",
          "summary": "`wf_dockerfile_build` module documentation",
          "link": "/docs/main/modules/wf_dockerfile_build/overview"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "buildkit clear-cache",
          "summary": "Migrated to `PanfactumLightCommand`; no longer has unsafe access to `devshellConfig`"
        },
        {
          "type": "cli",
          "component": "buildkit get-address",
          "summary": "Migrated to `PanfactumLightCommand`; `workingDirectory` now passed explicitly instead of reading from `devshellConfig`"
        },
        {
          "type": "cli",
          "component": "buildkit record-build",
          "summary": "Migrated to `PanfactumLightCommand`; `workingDirectory` now passed explicitly instead of reading from `devshellConfig`"
        },
        {
          "type": "cli",
          "component": "buildkit scale up",
          "summary": "Migrated to `PanfactumLightCommand`; `workingDirectory` now passed explicitly instead of reading from `devshellConfig`"
        },
        {
          "type": "cli",
          "component": "buildkit suspend",
          "summary": "Migrated to `PanfactumLightCommand`; `workingDirectory` now passed explicitly instead of reading from `devshellConfig`"
        },
        {
          "type": "cli",
          "component": "iac update-module-status",
          "summary": "Migrated to `PanfactumLightCommand`; no longer has unsafe access to `devshellConfig`"
        },
        {
          "type": "cli",
          "component": "kube disable-disruptions",
          "summary": "Migrated to `PanfactumLightCommand`; no longer has unsafe access to `devshellConfig`"
        },
        {
          "type": "cli",
          "component": "kube enable-disruptions",
          "summary": "Migrated to `PanfactumLightCommand`; no longer has unsafe access to `devshellConfig`"
        },
        {
          "type": "cli",
          "component": "k8s velero snapshot-gc",
          "summary": "Migrated to `PanfactumLightCommand`; no longer has unsafe access to `devshellConfig`"
        },
        {
          "type": "cli",
          "component": "util get-commit-hash",
          "summary": "Migrated to `PanfactumLightCommand`; `workingDirectory` now passed explicitly instead of reading from `devshellConfig`"
        },
        {
          "type": "cli",
          "component": "util get-module-hash",
          "summary": "Migrated to `PanfactumLightCommand`; `workingDirectory` now passed explicitly instead of reading from `devshellConfig`"
        },
        {
          "type": "cli",
          "component": "wf git-checkout",
          "summary": "Migrated to `PanfactumLightCommand`; `workingDirectory` now passed explicitly instead of reading from `devshellConfig`"
        },
        {
          "type": "cli",
          "component": "wf sops-set-profile",
          "summary": "Migrated to `PanfactumLightCommand`; no longer has unsafe access to `devshellConfig`"
        }
      ]
    },
    {
      "id": "5834f0e7-5fa5-4adf-9464-afc0b2822691",
      "type": "fix",
      "summary": "Fixed `kube_argo` Workflows executor containers hitting memory limits by increasing resource allocations",
      "description": "The Argo Workflows executor (`argoexec`) containers were experiencing out-of-memory (OOM) failures when running workflow tasks that required more memory than the previous `50Mi`/`70Mi` allocation. This caused workflow executions to fail unpredictably, especially for tasks with higher memory requirements. Increasing the memory request to `75Mi` and limit to `100Mi` provides adequate headroom for typical exec container usage patterns while maintaining resource efficiency.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(kube_argo): increase argo exec container memory limits",
          "link": "https://github.com/Panfactum/stack/commit/6821b621005e6a52e5272c7c164ec934ba70ec44"
        },
        {
          "type": "internal-docs",
          "summary": "kube_argo module documentation",
          "link": "/docs/main/reference/infrastructure-modules/kubernetes/kube_argo"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "kube_argo",
          "summary": "Increased executor container memory request from `50Mi` to `75Mi` and limit from `70Mi` to `100Mi` to prevent OOM failures"
        }
      ]
    },
    {
      "id": "1c8dbffd-3d29-47bf-a582-608e0eb3ca89",
      "type": "fix",
      "summary": "Fixed `authentik_vault_sso` module passing regex redirect URIs to `vault_auth_oidc`, causing silent misconfigurations since Vault does not support regex patterns.",
      "description": "When `authentik_vault_sso` was updated to support regex redirect URIs for Authentik's pattern-matching, it began passing all URIs — including regex patterns — directly to `vault_auth_oidc`. Vault's `allowed_redirect_uris` requires exact string matches and does not support regex, so any regex pattern silently caused authentication failures. Two changes close this boundary: the `oidc_redirect_uris` output on `authentik_vault_sso` now returns only strict-mode URIs suitable for Vault, and a new `oidc_redirect_uris_including_regexes` output provides the full set for OIDC providers that do support regex matching (such as those used by ArgoCD or Grafana). Additionally, `vault_auth_oidc` now hardcodes `http://localhost:8250/oidc/callback` internally, since Vault handles port-agnostic localhost matching natively per RFC 8252 — callers no longer need to supply this URI manually.",
      "action_items": [
        "Re-apply the `authentik_vault_sso` module to update redirect URI filtering",
        "Re-apply the `vault_auth_oidc` module to get the built-in localhost callback URI",
        "If you pass `authentik_vault_sso`'s `oidc_redirect_uris` output to any non-Vault OIDC provider (e.g. ArgoCD, Grafana), switch to `oidc_redirect_uris_including_regexes` to continue receiving the full URI set including regex patterns"
      ],
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(oidc): prevent regex redirect URIs from reaching Vault OIDC",
          "link": "https://github.com/Panfactum/stack/commit/df00050879ea60803aca824155bde37fc8d19e2c"
        },
        {
          "type": "external-docs",
          "summary": "RFC 8252 - OAuth 2.0 for Native Apps: Loopback interface redirection",
          "link": "https://tools.ietf.org/html/rfc8252#section-7.3"
        },
        {
          "type": "external-docs",
          "summary": "Vault JWT/OIDC Auth Method — Redirect URIs and allowed_redirect_uris configuration",
          "link": "https://developer.hashicorp.com/vault/docs/auth/jwt#redirect-uris"
        },
        {
          "type": "internal-docs",
          "summary": "authentik_vault_sso module reference",
          "link": "/docs/main/modules/authentik_vault_sso"
        },
        {
          "type": "internal-docs",
          "summary": "vault_auth_oidc module reference",
          "link": "/docs/main/modules/vault_auth_oidc"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "authentik_vault_sso",
          "summary": "`oidc_redirect_uris` output now returns only strict-mode URIs; new `oidc_redirect_uris_including_regexes` output provides the full set including regex patterns for providers that support them"
        },
        {
          "type": "iac-module",
          "component": "vault_auth_oidc",
          "summary": "Hardcodes `http://localhost:8250/oidc/callback` internally so callers no longer need to supply it"
        }
      ]
    },
    {
      "id": "ec222c16-7774-4890-ac5e-22d2894643be",
      "type": "fix",
      "summary": "Fixed Cilium operator only running 1 replica on SLA-1 clusters, causing networking failures during pod disruptions",
      "description": "The Cilium operator is responsible for cluster-wide IPAM — it allocates IP addresses to new nodes as they join. Previously, the operator replica count was gated on `sla_target`, so SLA-1 clusters ran only a single operator pod. Because the operator is a single point of failure for node networking, a routine pod disruption (node eviction, rolling update, spot interruption) could take it offline and prevent new nodes from receiving IP addresses, stalling scale-out and causing networking instability across the cluster. The operator now always runs 2 replicas with Kubernetes leader election so that one replica can take over immediately if the other is disrupted, regardless of the cluster's SLA target.",
      "action_items": [
        "Re-apply the `kube_cilium` module to update the operator deployment configuration"
      ],
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix: ensure cilium operator HA and fix website TS types",
          "link": "https://github.com/Panfactum/stack/commit/c64d99d027fb6c53b366a22cdfd6e10420f14076"
        },
        {
          "type": "external-docs",
          "summary": "Cilium operator proper rolling update support (enables safe HA upgrades)",
          "link": "https://github.com/cilium/cilium/pull/23589"
        },
        {
          "type": "external-docs",
          "summary": "Cilium operator HA mode internals documentation",
          "link": "https://github.com/cilium/cilium/blob/master/Documentation/internals/cilium_operator.rst"
        },
        {
          "type": "internal-docs",
          "summary": "kube_cilium module reference",
          "link": "/modules/kube_cilium"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "kube_cilium",
          "summary": "Operator replica count hardcoded to 2 for all clusters; SLA-1 clusters now run a highly available operator instead of a single replica"
        }
      ]
    },
    {
      "id": "a62d8777-1ab6-4015-b3ba-3946c02bf134",
      "type": "fix",
      "summary": "Fixed `cluster add` command failing to detect partially deployed cluster regions, making automated recovery impossible",
      "description": "When the `setupClusterExtensions` step failed partway through its concurrent module deployments, `pf cluster add` would incorrectly treat the region as fully deployed on subsequent runs and skip it entirely — making automated recovery impossible without manual intervention. The root cause was that `isClusterDeployed` checked only `kube_reloader` as a proxy for cluster completion. Because all extension modules deploy concurrently, `kube_reloader` could reach `deploy_status=success` while sibling modules were still failing. The fix requires all 8 extension modules to have `deploy_status=success` AND all 5 anti-affinity modules to have `bootstrap_mode_enabled !== true` in their `module.yaml` before a region is considered fully deployed, enabling `pf cluster add` to correctly resume interrupted installations.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(cluster): detect partial setupClusterExtensions deployments",
          "link": "https://github.com/Panfactum/stack/commit/55a4bca301fe41b8851b7a4c784af0e530ae8439"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "cluster add",
          "summary": "Enhanced cluster deployment detection to prevent skipping partially-installed regions"
        }
      ]
    },
    {
      "id": "cdd46545-6a59-483c-87b2-df8e25aef3ef",
      "type": "fix",
      "summary": "Fixed `cluster add` command Vault address becoming permanently stuck on localhost after DNS timeout during ingress setup",
      "description": "When switching Vault to ingress mode, DNS propagation can exceed the health-check timeout, causing the Vault address update step to fail after `KUBE_INGRESS_NGINX` is already deployed. On retry, the `setupInboundNetworking` step would be skipped entirely because the NGINX module showed `deploy_status=success`, leaving `vault_addr` permanently pointing to a local address and preventing cluster setup completion. The fix ensures `setupInboundNetworking` completion requires both NGINX deployment success AND `vault_addr` resolving to a non-local URL, mirroring the existing `setupVault` pattern.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(cluster/add): prevent vault_addr stuck on local after DNS timeout",
          "link": "https://github.com/Panfactum/stack/commit/db90bc63ddedd76053a16a8df49fa2f51418f258"
        },
        {
          "type": "internal-docs",
          "summary": "Bootstrapping guide: Inbound Networking",
          "link": "/docs/main/guides/bootstrapping/inbound-networking"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "cluster add",
          "summary": "Fixed Vault address remaining on localhost after DNS propagation timeout during setup"
        }
      ]
    },
    {
      "id": "2b08774c-8df4-4687-8181-704021b8bf8b",
      "type": "fix",
      "summary": "Fixed `pf` CLI Nix build failing with `EPERM` errors when `bun` hardlinks read-only files from the Nix store.",
      "description": "The CLI package build was experiencing `EPERM` failures when `bun` attempted to hardlink packages\nduring installation. Two separate root causes were identified and fixed:\n\n1. The `bun-cache` derivation is a `symlinkJoin` whose entries point to read-only Nix store paths.\n   The `bun2nix` hook copied these with `cp -r`, which preserves symlinks, so `chmod -R u+rwx`\n   never reached the underlying read-only files. The fix switches to `cp -rL` to dereference\n   symlinks into real files before applying `chmod`, making the cache writable.\n2. After the first fix, a subsequent `EPERM` emerged from the `bun2nix` build hook itself copying\n   pre-fetched packages directly from the read-only Nix store (mode `444`). The fix adds a\n   `postBunSetInstallCacheDirPhase` that runs `chmod -R u+rwx` after the hook's copy, restoring\n   write permissions before `bun install` attempts hardlinking.\n\nAdditionally fixed a `nixpkgs` hash mismatch caused by GNU `parallel` re-releasing their tarball\nwith different content — a known recurring practice. A `nixpkgs` overlay in `flake.nix` now pins\nthe correct `sha256` hash.\n",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(build): fix bun EPERM and parallel hash in nix builds",
          "link": "https://github.com/Panfactum/stack/commit/7b4fe36705dbfdadc48b159772815758e2a450f9"
        },
        {
          "type": "internal-commit",
          "summary": "fix(cli): chmod bun cache after copy from read-only nix store",
          "link": "https://github.com/Panfactum/stack/commit/a689ba3e921f95c0deb7af061a7864fd3e9c5524"
        },
        {
          "type": "issue-report",
          "summary": "bun2nix copies read-only files causing EPERM on hardlink",
          "link": "https://github.com/nix-community/bun2nix/issues/73"
        },
        {
          "type": "external-docs",
          "summary": "bun2nix — Nix build tool for bun packages",
          "link": "https://github.com/nix-community/bun2nix"
        }
      ],
      "impacts": [
        {
          "type": "devshell",
          "component": "pf",
          "summary": "Fixed `EPERM` errors during Nix build caused by read-only `bun` cache files — both the symlink-dereference issue and the `bun2nix` hook copy issue are resolved"
        },
        {
          "type": "devshell",
          "component": "parallel",
          "summary": "Fixed `nixpkgs` overlay hash for `parallel` after GNU re-released the tarball with different content"
        }
      ]
    },
    {
      "id": "a38e4b81-8d72-4ba6-b329-3beafb8be19c",
      "type": "fix",
      "summary": "Fixed Vault OIDC authentication failing when CLI uses dynamic ports by switching Authentik redirect URI from strict to regex matching.",
      "description": "The CLI was recently changed to use a random ephemeral port for the OIDC callback listener instead of the fixed port `8250` to avoid port conflicts on developer machines. However, the `authentik_vault_sso` module still enforced strict matching against `http://localhost:8250/oidc/callback`, causing every local login attempt to be rejected by Authentik. The localhost redirect URI entry now uses regex matching with the pattern `http://localhost:[0-9]+/oidc/callback` so any dynamically assigned port is accepted, bringing the Authentik config back in sync with the CLI's port selection behavior.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(authentik_vault_sso): allow dynamic ports in Vault SSO OIDC callback URI",
          "link": "https://github.com/Panfactum/stack/commit/9fc490ccb99c5a9a56580187e83b47bd628f08b4"
        },
        {
          "type": "external-docs",
          "summary": "OAuth 2.0 provider redirect URI configuration with regex matching",
          "link": "https://docs.goauthentik.io/docs/add-secure-apps/providers/oauth2"
        },
        {
          "type": "issue-report",
          "summary": "Authentik upstream: wildcard regex in port part of redirect URI causes ValueError",
          "link": "https://github.com/goauthentik/authentik/issues/13023"
        },
        {
          "type": "internal-docs",
          "summary": "authentik_vault_sso module reference",
          "link": "/modules/authentik_vault_sso"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "authentik_vault_sso",
          "summary": "Changed localhost redirect URI from strict to regex matching for dynamic port support"
        }
      ]
    },
    {
      "id": "141f48cb-8d69-4332-a4b4-f9b85141f855",
      "type": "improvement",
      "summary": "Replaced `bun` workspace architecture with per-package lockfiles to eliminate Docker build failures and simplify dependency management.",
      "description": "The shared `bun` workspace caused recurring Docker build failures (`--filter` requires all workspace `package.json` files to be present), corrupt shared `node_modules` hoisting and cache, and required a fragile `sed+jq` hack in the CLI Nix build to extract a standalone lockfile from the root `bun.lock`. Since `cli`, `scraper`, and `website` have no cross-package dependencies, the workspace added complexity with zero benefit. Each package now has its own `bun.lock` and `bunfig.toml` with a local `.bun` cache dir, eliminating these failure modes and simplifying builds across Docker, Nix, and devshell environments.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "refactor(bun): replace workspace with per-package lockfiles",
          "link": "https://github.com/Panfactum/stack/commit/6127695e8ea9a5135419804986b9d6359a1b762b"
        },
        {
          "type": "external-docs",
          "summary": "`bun install --frozen-lockfile --filter` fails in Docker when `bun.lock` was generated from the full workspace",
          "link": "https://github.com/oven-sh/bun/issues/28402"
        }
      ],
      "impacts": [
        {
          "type": "devshell",
          "component": "pf",
          "summary": "CLI Nix build simplified by removing `sed+jq` lockfile transformation hack; devshell now runs `bun install` once per package directory instead of a single root-level install"
        }
      ]
    },
    {
      "id": "e2f3c822-d7aa-4200-951d-8871325beb71",
      "type": "addition",
      "summary": "Added `kube_temporal` module for deploying Temporal workflow orchestration with full Panfactum operational guarantees",
      "description": "Provides a self-hosted Temporal workflow orchestration server on Kubernetes following all Panfactum conventions for high availability, vertical pod autoscaling, pod disruption budgets, Linkerd mTLS, and monitoring. Implements 4 separate Deployments (frontend on port 7233, history on port 7234, matching on port 7235, worker on port 7239) for independent scaling and disruption, uses a single `kube_pg_cluster` with dual PostgreSQL schemas (`temporal` and `temporal_visibility`) for persistence and visibility storage, secures the Web UI with Vault OIDC authentication, and ensures correct initialization ordering via schema and namespace setup Jobs. The `num_history_shards` variable is set once at schema initialization and cannot be changed afterward without a full data migration. Teams using Panfactum now have production-grade durable workflow orchestration without manual Temporal setup.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(kube_temporal): add Temporal workflow orchestration module",
          "link": "https://github.com/Panfactum/stack/commit/95d846afa1014c48bdbe6a031b1048bb3b49608b"
        },
        {
          "type": "internal-docs",
          "summary": "`kube_temporal` module reference documentation",
          "link": "/docs/main/modules/kube_temporal/reference"
        },
        {
          "type": "external-docs",
          "summary": "Temporal — durable execution platform official website",
          "link": "https://temporal.io"
        },
        {
          "type": "external-docs",
          "summary": "Temporal server architecture — Frontend, History, Matching, and Worker services",
          "link": "https://docs.temporal.io/server"
        },
        {
          "type": "external-docs",
          "summary": "temporalio/temporal — Temporal server GitHub repository",
          "link": "https://github.com/temporalio/temporal"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "kube_temporal",
          "summary": "New module providing a self-hosted Temporal workflow orchestration server on Kubernetes"
        },
        {
          "type": "iac-module",
          "component": "kube_pg_cluster",
          "summary": "Added `extra_schemas` variable to support initializing multiple PostgreSQL schemas at cluster creation time"
        }
      ]
    },
    {
      "id": "8b00b2b7-cb1a-4d2f-b637-e74182e07e7c",
      "type": "fix",
      "summary": "Fixed `pf wf git-checkout` command failing on private repositories due to incorrect authentication handling",
      "description": "When using `pf wf git-checkout` to check out a private repository in a CI/CD workflow, the command would fail during git reference resolution even when valid credentials were supplied. The TypeScript rewrite of this command had inadvertently diverged from the original bash implementation: git reference resolution was running in the caller's working directory rather than inside the cloned repository, so the credential configuration set up during cloning was not visible. This caused authentication to fail at the hash resolution step. The fix ensures reference resolution runs inside the cloned repository where credentials are properly configured, matching the behavior of the original bash script.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(wf/git-checkout): fix getCommitHash failing on private repos",
          "link": "https://github.com/Panfactum/stack/commit/2725a5c0419aa9e917fd623fddd10f3105996d00"
        },
        {
          "type": "internal-docs",
          "summary": "Checking out Git Repositories guide",
          "link": "/docs/main/guides/cicd/checking-out-code"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "wf git-checkout",
          "summary": "Now properly handles private repository authentication during git reference resolution"
        }
      ]
    },
    {
      "id": "822ad2a1-f271-4cdd-b7bf-09b772132c55",
      "type": "fix",
      "summary": "Fixed concurrent Vault OIDC authentication failures when multiple processes attempt login simultaneously",
      "description": "Previously, `vault login -method=oidc` bound its OAuth2 callback listener to a fixed port (`8250`), causing resource contention when multiple processes attempted OIDC authentication in parallel, such as during concurrent Terragrunt runs. This resulted in all authentication attempts except the first one failing with port binding conflicts. The fix allocates a dynamic ephemeral port for each authentication session, eliminating the conflict entirely and allowing parallel authentication flows to succeed.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(vault): use dynamic port for OIDC login callback listener",
          "link": "https://github.com/Panfactum/stack/commit/5b0a4cc9a59c841bd558a42f016727b1d74bfa50"
        },
        {
          "type": "issue-report",
          "summary": "Vault OIDC authentication fails when used simultaneously by multiple users on shared machine (port 8250 conflict)",
          "link": "https://github.com/hashicorp/vault/issues/15421"
        },
        {
          "type": "external-docs",
          "summary": "Vault JWT/OIDC auth method — OIDC login CLI port parameter documentation",
          "link": "https://developer.hashicorp.com/vault/docs/auth/jwt"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "vault get-token",
          "summary": "Now supports concurrent OIDC authentication sessions without port binding conflicts"
        }
      ]
    },
    {
      "id": "58d69c0a-50c3-4a45-9754-7d666844378d",
      "type": "fix",
      "summary": "Fixed `authentik_github_sso` `sso_post_url` output returning the wrong SAML binding URL, breaking GitHub SSO configuration.",
      "description": "The `sso_post_url` output was incorrectly returning `url_sso_redirect` (the HTTP-Redirect binding URL) instead of `url_sso_post` (the HTTP-POST binding URL). GitHub's SAML SSO configuration requires the HTTP-POST binding and uses the `sso_post_url` value as the \"Sign on URL\" in the Authentication security settings. Supplying the redirect URL in this field causes SAML authentication to fail, and the error is not always obvious because the URL format looks superficially correct. This fix ensures the output returns the POST binding URL, which is what GitHub actually validates and uses.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(authentik_github_sso): had the wrong output which caused confusion during setup",
          "link": "https://github.com/Panfactum/stack/commit/bfc83fb51c7a1eac473533c10120ba0cee78cbc9"
        },
        {
          "type": "internal-docs",
          "summary": "authentik_github_sso module documentation",
          "link": "/docs/main/modules/authentik_github_sso/overview"
        },
        {
          "type": "external-docs",
          "summary": "Authentik — Integrate with GitHub Organization (SAML POST binding requirement)",
          "link": "https://docs.goauthentik.io/integrations/services/github-organization/"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "authentik_github_sso",
          "summary": "The `sso_post_url` output now returns the correct SAML HTTP-POST binding URL instead of the HTTP-Redirect URL, enabling successful GitHub SAML SSO configuration"
        }
      ]
    },
    {
      "id": "380c8281-a2a3-474e-8aa5-545b5c0c3f98",
      "type": "fix",
      "summary": "Fixed `pf sso add` crashing on first-time AWS SSO setup when the `authentik_aws_sso` module has not yet been deployed",
      "description": "Running `pf sso add` for the first time fails before displaying any prompts because the `authentik_aws_sso/module.yaml` file does not exist until the module has been deployed at least once. The `setupFederatedAuth` function read this file via `readYAMLFile` without tolerating a missing file, causing the command to throw an error and exit immediately. Treating a missing config file as an empty config allows the first-time setup flow to proceed and prompt the user for the required inputs normally.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(sso): handle missing org module YAML on first SSO setup",
          "link": "https://github.com/Panfactum/stack/commit/a53de6458d3d8502334cdf90a47d78444ce03306"
        },
        {
          "type": "internal-docs",
          "summary": "authentik_aws_sso module documentation",
          "link": "/docs/main/modules/authentik_aws_sso/overview"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "sso add",
          "summary": "No longer crashes before showing prompts when `authentik_aws_sso` has not yet been deployed"
        }
      ]
    },
    {
      "id": "7f98e554-9a85-4211-acf1-f78651b091c9",
      "type": "fix",
      "summary": "Fixed Authentik restart process in `sso add` command to wait for deployment rollout completion before proceeding to user setup.",
      "description": "Previously the restart task triggered a rollout but returned immediately, allowing subsequent setup steps to run against pods that were still cycling. This could cause flaky failures when the next steps tried to interact with Authentik before it was healthy. Added `kubectl rollout status` with a 10m timeout to block until the deployment is confirmed ready, and added per-substep task title updates so operators can see which phase of the restart is active.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(sso/add): wait for Authentik rollout readiness after restart",
          "link": "https://github.com/Panfactum/stack/commit/09becda2183b2dce0e5091db9774cdd77d4f69a3"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "sso add",
          "summary": "Authentik restart process now blocks until `kubectl rollout status` confirms the deployment is ready, preventing flaky failures when subsequent setup steps call the Authentik API against pods that are still cycling."
        }
      ]
    },
    {
      "id": "506abc4d-c071-4be7-9a4f-ebfdc48c042d",
      "type": "fix",
      "summary": "Fixed `cluster add` extension setup to prevent Terragrunt provider cache corruption during concurrent module initialization.",
      "description": "Previously, concurrent `terragrunt init` calls during `cluster add` extension setup would each spawn their own provider cache server (or none), causing filesystem corruption in the shared plugin cache directory when 8 extension modules initialized in parallel. This is a known upstream limitation: Terraform's `plugin_cache_dir` is not concurrency-safe when multiple `init` processes write to it simultaneously. The fix adds a dedicated pre-initialization phase that runs `terragrunt init --all` scoped to only the new extension modules, ensuring a single Terragrunt process coordinates all provider downloads through one in-process cache server. Concurrent apply tasks then skip initialization (via `init_status=success`) to avoid re-triggering the race condition.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(cluster): pre-init extension modules to prevent cache corruption",
          "link": "https://github.com/Panfactum/stack/commit/734cabbde022dba65ca5b255406fcfd3aa7b30e1"
        },
        {
          "type": "issue-report",
          "summary": "Allow multiple Terraform instances to write to plugin_cache_dir concurrently",
          "link": "https://github.com/hashicorp/terraform/issues/31964"
        },
        {
          "type": "issue-report",
          "summary": "Terragrunt run-all init concurrency/parallelism issues with shared plugin cache",
          "link": "https://github.com/gruntwork-io/terragrunt/issues/2542"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "cluster add",
          "summary": "Extension module setup now runs a pre-initialization phase to prevent provider cache corruption during concurrent `terragrunt apply` operations"
        },
        {
          "type": "devshell",
          "component": "terragrunt",
          "summary": "`terragruntInitAll` now accepts an optional `modules` parameter to scope initialization to specific module directories using `--queue-include-dir` and `--queue-strict-include` flags"
        }
      ]
    },
    {
      "id": "32d92099-e42d-4ebd-8d49-18b42efd284a",
      "type": "fix",
      "summary": "Fixed EC2 vCPU quota check to be aware of existing deployments and avoid false failures on `cluster add` re-runs",
      "description": "The quota check previously required 32 vCPUs of headroom unconditionally, causing false failures when re-running `pf cluster add` after `aws_vpc` and/or `aws_eks` were already deployed. Those modules' instances are already reflected in current usage metrics, so they should reduce the required headroom rather than count against it. The fix skips quota checks entirely when `kube_karpenter` is already deployed, subtracts `aws_vpc` vCPUs (6 for SLA 2/3, 2 for SLA 1) from required headroom when `aws_vpc` is deployed, subtracts `aws_eks` vCPUs (6) when `aws_eks` is deployed, and lowers baseline requirements for SLA 1 clusters (16 vCPUs vs 32) since they run in a single AZ with fewer managed nodes.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(cluster/add): make vCPU quota check aware of existing deployments",
          "link": "https://github.com/Panfactum/stack/commit/b3b91a48ce6eff18e40c5ab64d7d8e75ce17d991"
        },
        {
          "type": "external-docs",
          "summary": "Amazon EC2 service quotas — vCPU-based On-Demand Instance limits",
          "link": "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-resource-limits.html"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "cluster add",
          "summary": "EC2 vCPU quota check now accounts for existing `aws_vpc`, `aws_eks`, and `kube_karpenter` deployment status to prevent unnecessary quota increase requests on re-runs"
        }
      ]
    },
    {
      "id": "7258c043-8680-4df0-9f05-d958534eadff",
      "type": "fix",
      "summary": "Fixed Vault setup to be idempotent on re-runs by checking initialization status and reading tokens from proper file locations",
      "description": "`pf cluster add` is designed to be resumable — interrupted installations can be re-run and completed steps are skipped. However, the Vault setup phase had several bugs that caused it to fail on re-runs: it attempted to re-initialize an already-initialized Vault (which fails by design), looked for recovery keys in the wrong file (`secrets.yaml` instead of `recovery.yaml`), and never read the vault root token from `region.secrets.yaml` on subsequent runs. Additionally, step completion detection for the `vault_core_resources` module was unreliable because that module is deployed multiple times — the fix now also treats a non-local `vault_addr` as a completion signal. Together these fixes ensure that resuming an interrupted cluster installation does not require manual intervention to recover Vault state.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(cluster/vault): make vault setup idempotent on re-runs",
          "link": "https://github.com/Panfactum/stack/commit/8084b20fbe5a6174adf836a019ec8f69e12f7dd7"
        },
        {
          "type": "internal-docs",
          "summary": "kube_vault module reference",
          "link": "/docs/main/reference/infrastructure-modules/kubernetes/kube_vault"
        },
        {
          "type": "internal-docs",
          "summary": "Bootstrapping guide: Vault setup",
          "link": "/docs/main/guides/bootstrapping/vault"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "cluster add",
          "summary": "Vault setup phase now properly handles re-runs without failing on already-initialized Vault instances"
        },
        {
          "type": "iac-module",
          "component": "kube_vault",
          "summary": "Step completion detection updated to treat non-local `vault_addr` as a valid completion signal for `vault_core_resources` re-deploys"
        }
      ]
    },
    {
      "id": "72e4b060-121f-4abd-ae71-598889ec6e49",
      "type": "fix",
      "summary": "Restored the `pf env add` command that was accidentally unregistered from the CLI",
      "description": "During a refactor that renamed the internal class from `EnvironmentInstallCommand` to `EnvironmentAddCommand`, the updated import and CLI registration were omitted from `packages/cli/src/index.ts`. As a result, `pf env add` was completely unreachable — running it would produce a \"command not found\" error. The registration has been restored and the command works normally again.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(cli): reregister env add command that was accidentally removed",
          "link": "https://github.com/Panfactum/stack/commit/dc09c52849fa03d5a420f0e9193f3f83b1318c95"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "env add",
          "summary": "Command was unreachable after a class rename dropped its CLI registration; it is now restored and fully functional"
        }
      ]
    },
    {
      "id": "93102dd3-1bfe-4610-9004-ac0527798158",
      "type": "fix",
      "summary": "Fixes Terragrunt CLI flag compatibility with `v0.85.0`+ by replacing removed `--terragrunt-*` prefixed flags with their modern equivalents.",
      "description": "Terragrunt `v0.85.0` removed all legacy `--terragrunt-*` prefixed flags as part of its CLI redesign roadmap. Before this fix, the Panfactum CLI passed flags like `--terragrunt-non-interactive`, `--terragrunt-no-color`, and `--terragrunt-provider-cache` which Terragrunt silently ignored on `v0.85.0`+, allowing interactive prompts and colorized output to leak into automated contexts. This fix updates all internal Terragrunt utility functions to use the modern equivalents (`--non-interactive`, `--no-color`, `--provider-cache`) and also updates the `providers lock` invocation to use the new `run -- providers lock` form required by the redesigned CLI.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(terragrunt): update flags to new Terragrunt CLI syntax",
          "link": "https://github.com/Panfactum/stack/commit/b01be825db8880f2a5d46a0e01b1357a024ec149"
        },
        {
          "type": "external-docs",
          "summary": "Terragrunt v0.85.0 release notes — removal of legacy `--terragrunt-*` flags",
          "link": "https://github.com/gruntwork-io/terragrunt/releases/tag/v0.85.0"
        },
        {
          "type": "external-docs",
          "summary": "Terragrunt CLI redesign migration guide",
          "link": "https://docs.terragrunt.com/migrate/cli-redesign/"
        }
      ],
      "impacts": [
        {
          "type": "devshell",
          "component": "terragrunt",
          "summary": "Invocations now use `--non-interactive`, `--no-color`, and `--provider-cache` flags, and `providers lock` now uses `run -- providers lock`; compatible with `v0.85.0`+"
        }
      ]
    },
    {
      "id": "27feafcb-30f5-4bc4-9517-83ceefbd695f",
      "type": "fix",
      "summary": "Fixed Vault OIDC browser login silently suppressed and `--silent` not blocking interactive prompts in `pf vault get-token`",
      "description": "There were two distinct bugs. First, `getPanfactumConfig` called `getVaultToken` with `silent=true`, which caused the OIDC browser login to be suppressed whenever Terragrunt loaded region config — even in interactive developer sessions where authentication should be permitted. Second, `performOIDCLogin` did not pass `stdin: 'inherit'` to the `vault login -method=oidc` subprocess, preventing the interactive browser flow from working correctly in terminals. Both are now fixed: `getPanfactumConfig` passes `silent=false`, and `performOIDCLogin` inherits `stdin` and streams `stderr` through the logger. An early `throw` was also added in `getVaultToken` so that callers that explicitly request `silent=true` get a clean `CLIError` instead of falling through to the interactive OIDC prompt.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(vault): fix interactive OIDC login and silent mode behavior",
          "link": "https://github.com/Panfactum/stack/commit/68eb0373a2032ee75a41aa01daa8df140cc9d676"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "vault get-token",
          "summary": "OIDC authentication now properly respects `--silent` mode and the browser-based login flow works correctly in interactive terminal sessions"
        },
        {
          "type": "devshell",
          "component": "pf",
          "summary": "Terragrunt config loading via `getPanfactumConfig` no longer suppresses interactive Vault OIDC login when running in a developer shell"
        }
      ]
    },
    {
      "id": "7d567255-dc1c-4cc9-b2cd-15d3cc85c070",
      "type": "fix",
      "summary": "Fixed `kube get-token` command to properly require the devshell environment",
      "description": "`pf kube get-token` is invoked by `kubectl` as an exec-plugin credential provider and depends on the `aws` CLI to generate EKS authentication tokens. The command was incorrectly flagged as not requiring the devshell, so users who ran `kubectl` outside the devshell environment received cryptic failures instead of a clear actionable error. Setting `requiresDevshell = true` ensures the CLI exits immediately with an explanation when the devshell is not active.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(cli): require devshell for kube get-token command",
          "link": "https://github.com/Panfactum/stack/commit/6111f6c0ceb4b99881f5a1de54736779c765bb54"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "kube get-token",
          "summary": "Now correctly requires the devshell and exits with a clear error message when run outside it"
        }
      ]
    },
    {
      "id": "5a60bf64-f9bb-474c-a9bf-4e93fdd6518e",
      "type": "fix",
      "summary": "Fixed `cluster add` and `sso add` commands crashing when resuming a partially completed installation.",
      "description": "When resuming a partially completed cluster or SSO install, setup commands read `module.yaml` files to check prior state. If the install never reached a given step, its `module.yaml` does not exist yet, causing an unhandled crash. The fix passes `throwOnMissing: false` to all `readYAMLFile` calls across the affected setup steps — `setupCertificates`, `setupEKS`, `setupClusterExtensions`, `setupAuthentik`, and `setupFederatedAuth` — so that a missing file is treated as a clean slate rather than a fatal error.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(cli/cluster): prevent crash resuming install at certificates step",
          "link": "https://github.com/Panfactum/stack/commit/fd2a9c8f19cd183bb9ebbcc22bb9b22fb99646dd"
        },
        {
          "type": "internal-commit",
          "summary": "fix(cli/cluster): tolerate missing YAML files when resuming install",
          "link": "https://github.com/Panfactum/stack/commit/faaac6c484140a28353016b4295eecebb20ae039"
        },
        {
          "type": "internal-commit",
          "summary": "fix(cluster/add): allow missing module.yaml during cluster extension setup",
          "link": "https://github.com/Panfactum/stack/commit/90a154bdf13bc1bec435c1620235840146b59caa"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "cluster add",
          "summary": "No longer crashes when resuming installation at a step whose `module.yaml` has not yet been written"
        },
        {
          "type": "cli",
          "component": "sso add",
          "summary": "No longer crashes when resuming installation at a step whose `module.yaml` has not yet been written"
        }
      ]
    },
    {
      "id": "633b6630-0254-4328-932b-90b95028e54d",
      "type": "fix",
      "summary": "Fixed `default_file_strict` regex in `aws_s3_public_website` to correctly recognize paths with non-extension dots (e.g., date-suffixed paths like `edge.25-04-03`) as directories.",
      "description": "The previous `default_file_strict` regex matched only paths with no dots at all, so paths like `edge.25-04-03` — where the last dot is followed by a hyphen and digits rather than a file extension — were incorrectly treated as file paths and never had `default_file` appended. The new regex defines a file extension as alphanumeric characters after a final dot, so only paths like `style.css` or `app.min.js` are treated as files; paths where the text after the last dot contains any non-alphanumeric character are treated as directory-like and get `default_file` appended as expected. The implementation uses alternation instead of lookaheads because CloudFront Functions (`cloudfront-js-2.0`) does not support them, and uses `[.]` instead of `\\.` to avoid backslash escaping issues in the `jsonencode` to JS string to JSON evaluation chain. The `default_file_strict` variable description has also been updated to reflect the corrected semantics.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(s3_public_website): fix strict mode regex to handle non-extension dots",
          "link": "https://github.com/Panfactum/stack/commit/445862a20bccb8e9b1ef06dd985486aa4d5d389f"
        },
        {
          "type": "internal-docs",
          "summary": "aws_s3_public_website module documentation",
          "link": "/docs/main/modules/aws_s3_public_website/overview"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "aws_s3_public_website",
          "summary": "`default_file_strict` mode now correctly appends `default_file` to directory-like paths containing non-extension dots, not just dot-free paths"
        }
      ]
    },
    {
      "id": "5c275c1d-dd9f-40b7-935e-51678ea74761",
      "type": "breaking_change",
      "summary": "`aws_organization` upgrades to AWS provider v6, enables `SERVICE_CONTROL_POLICY`, `TAG_POLICY`, and greatly expands trusted service access principals by default.",
      "description": "The `aws_organization` module now pins the `hashicorp/aws` provider to `v6.38.0`, a major version bump from 5.x. Upgrading requires running `terraform init -upgrade` to refresh lock files across all management environment modules. Review the v6 upgrade guide for removed resources and changed defaults (OpsWorks, SimpleDB, Worklink, and nullable boolean validation are notable removals).\nBeyond the provider bump, the default configuration has expanded substantially. `SERVICE_CONTROL_POLICY` and `TAG_POLICY` are now enabled by default, meaning existing organizations will have these policy types applied on the next `terraform apply`. Trusted access is enabled for a much broader set of AWS governance services: GuardDuty, Security Hub, AWS Backup, Inspector, CloudTrail, Access Analyzer, Audit Manager, IPAM, License Manager, Network Manager, and others. A new `aws_notifications_organizations_access` resource (available only in provider v6) enables AWS User Notifications trusted access by default.\nNew opt-out variables (`disabled_aws_service_access_principals`, `disabled_enabled_policy_types`, `enable_notifications_access`, `enable_support_service_access`) allow consumers to selectively disable any default without forking the module.",
      "action_items": [
        "Run `terraform init -upgrade` in all management environment module directories to update provider lock files to v6.",
        "Review the AWS Provider v6 Upgrade Guide for breaking changes that may affect other modules in your stack.",
        "Run `terraform plan` on `aws_organization` and review the diff — expect new policy types (`SERVICE_CONTROL_POLICY`, `TAG_POLICY`) and an `aws_notifications_organizations_access` resource to be created.",
        "If any newly-enabled service access principals or policy types are unwanted, set `disabled_aws_service_access_principals` or `disabled_enabled_policy_types` before applying."
      ],
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(aws_organization): upgrade AWS provider to v6, expand org service access",
          "link": "https://github.com/Panfactum/stack/commit/b5cab6457e1c5ba504e14e063839827285694d9d"
        },
        {
          "type": "external-docs",
          "summary": "AWS Provider v6 Upgrade Guide",
          "link": "https://registry.terraform.io/providers/hashicorp/aws/latest/docs/guides/version-6-upgrade"
        },
        {
          "type": "external-docs",
          "summary": "hashicorp/terraform-provider-aws v6.38.0 release notes",
          "link": "https://github.com/hashicorp/terraform-provider-aws/releases/tag/v6.38.0"
        },
        {
          "type": "external-docs",
          "summary": "AWS services that support trusted access with Organizations",
          "link": "https://docs.aws.amazon.com/organizations/latest/userguide/orgs_integrate_services_list.html"
        },
        {
          "type": "internal-docs",
          "summary": "`aws_organization` module reference",
          "link": "/docs/main/modules/aws_organization/reference"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "aws_organization",
          "summary": "Pins `hashicorp/aws` to v6.38.0; enables `SERVICE_CONTROL_POLICY` and `TAG_POLICY` by default; adds `aws_notifications_organizations_access` resource; greatly expands default trusted service access principals; adds opt-out variables for all new defaults"
        }
      ]
    },
    {
      "id": "1f143057-6331-4bbc-989a-a17c28035f27",
      "type": "fix",
      "summary": "Fixed Vault pods failing to start during cluster bootstrap due to instance-type anti-affinity constraints.",
      "description": "During cluster bootstrap, only the EKS controller node group exists (single `t4g.large` instance type), but `sla_target = 3` clusters require Vault pods on 3 different instance types via anti-affinity rules. This left all 3 Vault pods in `Pending` state indefinitely, blocking the entire bootstrap process. The fix adds a `bootstrap_mode_enabled` parameter to `kube_vault` to temporarily disable instance-type anti-affinity during initial deployment. The CLI sets `bootstrap_mode_enabled = true` in the `setupVault` phase, then re-applies with `bootstrap_mode_enabled = false` in `setupClusterExtensions` once Karpenter and diverse node pools are running. Skip logic ensures the re-apply is idempotent on subsequent runs.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(kube_vault): add bootstrap mode to bypass instance-type anti-affinity",
          "link": "https://github.com/Panfactum/stack/commit/4b2379c6d54f0f0d0b8d4c764427eaea7d7278eb"
        },
        {
          "type": "internal-docs",
          "summary": "kube_vault module documentation",
          "link": "/modules/kube_vault"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "kube_vault",
          "summary": "Adds `bootstrap_mode_enabled` parameter to temporarily bypass instance-type anti-affinity during cluster setup"
        },
        {
          "type": "cli",
          "component": "cluster add",
          "summary": "Automatically sets `bootstrap_mode_enabled = true` during `setupVault` and re-applies with `false` after Karpenter node pools are available"
        }
      ]
    },
    {
      "id": "a4a30f08-b54f-4e06-b366-0dd3474e8155",
      "type": "improvement",
      "summary": "Enhanced `cluster add` to detect and gracefully handle NAT ASG capacity failures with clear error messages",
      "description": "Previously, when AWS exhausts `t4g.nano` capacity in an availability zone during cluster setup, `pf cluster add` would hang indefinitely or fail with an opaque Terraform error, leaving users without guidance. This improvement works around a known Terraform provider bug where `wait_for_capacity_timeout` is ignored on `InsufficientInstanceCapacity` errors. The CLI now polls NAT ASGs after deployment, providing clear capacity failure messages and actionable advice when AWS lacks sufficient instance capacity.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(vpc): detect NAT ASG capacity failures during cluster add",
          "link": "https://github.com/Panfactum/stack/commit/18037313b748444c9a8e28ae40307bcdc0807e21"
        },
        {
          "type": "issue-report",
          "summary": "Terraform provider bug: wait_for_capacity_timeout ignored on InsufficientInstanceCapacity",
          "link": "https://github.com/hashicorp/terraform-provider-aws/issues/29753"
        },
        {
          "type": "internal-docs",
          "summary": "aws_vpc module reference",
          "link": "/docs/main/modules/aws_vpc/overview"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "aws_vpc",
          "summary": "Sets `wait_for_capacity_timeout = \"0\"` on NAT ASGs and exposes `nat_config` output for CLI polling"
        },
        {
          "type": "cli",
          "component": "cluster add",
          "summary": "Adds \"Verify NAT Gateways\" task that polls ASG instances and provides clear capacity failure messages"
        }
      ]
    },
    {
      "id": "da7dc7c0-591a-407b-92e0-c08542cd6b43",
      "type": "improvement",
      "summary": "Optimized git commit verification for the Panfactum stack repository by using the GitHub REST API instead of `git fetch`.",
      "description": "When verifying commit SHAs, the CLI previously used `git fetch` which involves a full remote negotiation and is relatively slow. For the Panfactum stack repository specifically, commit verification now uses the faster unauthenticated GitHub REST API (`GET /repos/panfactum/stack/git/commits/{sha}`) instead. The optimization maintains correctness by falling back to the original `git fetch` method if the API call is inconclusive (e.g., rate-limited or network error).",
      "references": [
        {
          "type": "internal-commit",
          "summary": "perf(cli/git): use GitHub API to verify Panfactum stack commits",
          "link": "https://github.com/Panfactum/stack/commit/cddda88240b583463fc8b0005d6ec662f3247623"
        },
        {
          "type": "external-docs",
          "summary": "GitHub REST API endpoints for Git commits",
          "link": "https://docs.github.com/en/rest/git/commits"
        },
        {
          "type": "issue-report",
          "summary": "Bug: git fetch failures with .netrc parser error when verifying Panfactum stack commits",
          "link": "https://github.com/Panfactum/stack/issues/287"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "util get-commit-hash",
          "summary": "Commit SHA verification for the Panfactum stack repository is now significantly faster via the GitHub REST API"
        }
      ]
    },
    {
      "id": "7652ac8e-9120-456d-b6bf-fbe45429ce3e",
      "type": "fix",
      "summary": "Fixed cert-manager webhook pods failing to start during SLA-3 cluster bootstrap due to instance-type anti-affinity constraints",
      "description": "During SLA-3 bootstrap, cert-manager webhook pods require instance-type anti-affinity (2 pods on different instance types), but only the EKS controller node group exists until Karpenter deploys at step 10. This left webhook pods in `Pending` state indefinitely, blocking bootstrap. Following the same two-phase pattern used for Vault, both `kube_certificates` and `kube_cert_manager` now support `bootstrap_mode_enabled` to disable webhook anti-affinity during initial deployment, then restore proper pod spread after Karpenter is available.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(cert-manager): add bootstrap_mode_enabled to fix SLA-3 spinup",
          "link": "https://github.com/Panfactum/stack/commit/e65792746f79608e289128bda697e18b64b52239"
        },
        {
          "type": "internal-docs",
          "summary": "kube_cert_manager module documentation",
          "link": "/docs/main/reference/infrastructure-modules/kubernetes/kube_cert_manager"
        },
        {
          "type": "internal-docs",
          "summary": "kube_certificates module documentation",
          "link": "/docs/main/reference/infrastructure-modules/kubernetes/kube_certificates"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "kube_cert_manager",
          "summary": "Adds `bootstrap_mode_enabled` parameter to temporarily bypass webhook instance-type anti-affinity during cluster setup"
        },
        {
          "type": "iac-module",
          "component": "kube_certificates",
          "summary": "Adds `bootstrap_mode_enabled` parameter to temporarily bypass webhook instance-type anti-affinity during cluster setup"
        },
        {
          "type": "cli",
          "component": "cluster add",
          "summary": "Automatically manages cert-manager anti-affinity settings during bootstrap and post-Karpenter phases"
        }
      ]
    },
    {
      "id": "8460ed65-2f32-445b-a92e-d466ce321c15",
      "type": "addition",
      "summary": "`cluster add` now validates EC2 vCPU quota headroom before deployment and auto-submits increase requests when insufficient.",
      "description": "New AWS accounts and regions often have EC2 vCPU quotas too low to support the EKS controller node group plus Karpenter workloads, causing opaque mid-deployment failures. The new `ensureEC2QuotaHeadroom` function checks both the On-Demand Standard vCPU quota (`L-1216C47A`) and the Spot Standard vCPU quota (`L-34B43A08`), using CloudWatch usage metrics to calculate real headroom. Required headroom is 16 vCPUs for SLA-1 clusters and 32 vCPUs for SLA-2/3 clusters, with automatic reductions applied when the `aws_vpc` or `aws_eks` modules are already deployed (since those instances are already reflected in current usage). When headroom is insufficient, the check detects any pending quota increase requests to avoid duplicates, then either warns the user to retry after approval or automatically submits a new increase request. Running this check early in the workflow gives AWS time to approve the increase before the EKS deployment step runs.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(cluster/add): add EC2 vCPU quota headroom check before cluster deploy",
          "link": "https://github.com/Panfactum/stack/commit/a44aa31c65415ef3c6085a129b8689ba8edd9666"
        },
        {
          "type": "external-docs",
          "summary": "Amazon EC2 service quotas — AWS documentation",
          "link": "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-resource-limits.html"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "cluster add",
          "summary": "Now checks EC2 On-Demand and Spot Standard vCPU quota headroom before deployment and auto-submits quota increase requests when headroom is insufficient"
        }
      ]
    },
    {
      "id": "a9f87fc5-04c4-49dc-a021-47704aef271c",
      "type": "fix",
      "summary": "Fixed Linkerd pods failing to start during SLA-3 cluster bootstrap due to instance-type anti-affinity constraints",
      "description": "During SLA-3 bootstrap, Linkerd's `destination`, `identity`, and `proxy-injector` pods require instance-type anti-affinity but only the EKS controller node group exists until Karpenter deploys. This left Linkerd pods in `Pending` state indefinitely, blocking the entire service mesh bootstrap process. Following the same two-phase pattern used for Vault and cert-manager, `kube_linkerd` now supports `bootstrap_mode_enabled` to disable instance-type anti-affinity during initial deployment, then restore proper pod spread after Karpenter is available.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(kube_linkerd): disable anti-affinity constraints during cluster bootstrap",
          "link": "https://github.com/Panfactum/stack/commit/619581f64077dbfbf6e98c1b658266d547781134"
        },
        {
          "type": "internal-docs",
          "summary": "Service mesh bootstrapping guide",
          "link": "/docs/main/guides/bootstrapping/service-mesh"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "kube_linkerd",
          "summary": "Adds `bootstrap_mode_enabled` parameter to temporarily bypass instance-type anti-affinity during cluster setup"
        },
        {
          "type": "cli",
          "component": "cluster add",
          "summary": "Automatically manages Linkerd anti-affinity settings during bootstrap and post-Karpenter phases"
        }
      ]
    },
    {
      "id": "a74b231a-9dbe-4fcc-a8dc-9c01a2297e35",
      "type": "fix",
      "summary": "Fixed `prek` hook installation and autofix detection in multi-worktree Git setups and switched Nix formatter to `nixfmt`",
      "description": "Three related issues with the development environment were corrected:\n\n1. `prek` exits non-zero on both autofixes and real errors, making it impossible to tell them apart. The\n   `hook-stop-lint.sh` script now hashes all changed files before running `prek`; if the files are unchanged\n   after a failure it is a real error, and if they changed an autofix occurred and `prek` re-runs to confirm\n   resolution.\n\n2. `prek install -c <abs-path>` baked the installing worktree's config path into the shared bare-repo hook,\n   so commits from other worktrees silently used the wrong `.pre-commit-config.yaml`. The `-c` flag was\n   dropped so `prek` resolves the config from the active worktree at hook-execution time. `core.hooksPath`\n   is also unset to prevent stale local overrides from interfering.\n\n3. The Nix formatter was switched from `nixfmt-rfc-style` to `nixfmt`, and `bun.nix` (a generated file) is\n   now excluded from formatting to avoid noisy spurious reformats on every commit.\n",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(dev-tooling): fix prek hooks and worktree install behavior",
          "link": "https://github.com/Panfactum/stack/commit/7d52bf149064f1f8348420a2db8ae77331d48ac4"
        },
        {
          "type": "external-docs",
          "summary": "prek — faster pre-commit alternative used for Panfactum's git hooks",
          "link": "https://github.com/j178/prek"
        }
      ],
      "impacts": [
        {
          "type": "devshell",
          "component": "git",
          "summary": "`prek` hooks now work correctly across multiple Git worktrees and reliably distinguish autofix runs from real errors"
        },
        {
          "type": "devshell",
          "component": "enter-shell-local",
          "summary": "Drops `-c` from `prek install` and unsets `core.hooksPath` so each worktree uses its own `.pre-commit-config.yaml`"
        }
      ]
    },
    {
      "id": "42092845-9963-48ed-83d1-2de41c294c14",
      "type": "fix",
      "summary": "Fixed `kube_ingress_nginx` pods failing to start during cluster bootstrap due to instance-type anti-affinity constraints",
      "description": "During cluster bootstrap, the autoscaler isn't running yet and only the EKS controller node group exists (single `t4g.large` instance type), but SLA-3 clusters require NGINX ingress pods on 3 different instance types via anti-affinity rules. This left ingress pods in `Pending` state indefinitely, blocking the entire inbound networking bootstrap process. The fix adds a `bootstrap_mode_enabled` parameter to `kube_ingress_nginx` to temporarily disable instance-type anti-affinity during initial deployment; the `cluster add` command automatically re-enables it once the cluster is fully operational and diverse node pools are available.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(kube_ingress_nginx): add bootstrap mode to skip instance anti-affinity",
          "link": "https://github.com/Panfactum/stack/commit/e6966a7265c57c351bf4bff88fa8b56cd6e1ce53"
        },
        {
          "type": "issue-report",
          "summary": "Pod Anti-Affinity prevents scale up, requires manual pod deletion",
          "link": "https://github.com/kubernetes/autoscaler/issues/5741"
        },
        {
          "type": "internal-docs",
          "summary": "kube_ingress_nginx module documentation",
          "link": "/docs/main/modules/kube_ingress_nginx/overview"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "kube_ingress_nginx",
          "summary": "Adds `bootstrap_mode_enabled` input to temporarily bypass instance-type anti-affinity during cluster bootstrap"
        },
        {
          "type": "cli",
          "component": "cluster add",
          "summary": "Automatically sets `bootstrap_mode_enabled` on `kube_ingress_nginx` during bootstrap and clears it once the cluster is fully operational"
        }
      ]
    },
    {
      "id": "95948c3b-48e2-43fb-9662-d2f86f22b263",
      "type": "fix",
      "summary": "Fixed `kube_ingress_nginx` replica count calculation during bootstrap mode to prevent pod scheduling failures on clusters with SLA target >= 2.",
      "description": "When `sla_target` is >= 2, `kube_ingress_nginx` previously set `replicaCount` to 6. During bootstrap, however, only the EKS controller node group is present — typically 3 nodes of a single instance type — so 6 replicas could never all be scheduled, leaving the cluster stuck with Pending ingress pods and blocking the entire inbound networking setup. The fix introduces a check on `bootstrap_mode_enabled`: when bootstrap mode is active, `replicaCount` is forced to 3 regardless of `sla_target`, matching the node count actually available at that phase. Once bootstrap mode is disabled and Karpenter has provisioned diverse node pools, the module returns to the SLA-driven replica count.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(kube_ingress_nginx): use 3 replicas in bootstrap mode regardless of SLA",
          "link": "https://github.com/Panfactum/stack/commit/4f3f9e9c5c64e824747fea6f05d55efc1b93d716"
        },
        {
          "type": "internal-docs",
          "summary": "kube_ingress_nginx module reference",
          "link": "/docs/main/modules/kube_ingress_nginx/overview"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "kube_ingress_nginx",
          "summary": "`bootstrap_mode_enabled` now caps `replicaCount` at 3, preventing scheduling failures on SLA >= 2 clusters during initial cluster deployment"
        }
      ]
    },
    {
      "id": "52f831fd-8f33-48c5-9921-f0be17a96537",
      "type": "fix",
      "summary": "Increased default `memory_mb` for `wf_dockerfile_build` workflow pods from 100MB to 150MB to prevent out-of-memory failures during Docker builds.",
      "description": "Docker BuildKit operations can consume significant memory during image builds, especially for larger applications or complex build contexts. The previous 100MB default was insufficient for many real-world builds, causing workflow pods to be killed by the `OOMKiller`. Increasing the default to 150MB provides adequate headroom while remaining resource-efficient for typical builds.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix: bump memory on steps in wf_dockerfile_build",
          "link": "https://github.com/Panfactum/stack/commit/5665081a522208fccd77f18c21337d027a270c3b"
        },
        {
          "type": "internal-docs",
          "summary": "wf_dockerfile_build module overview",
          "link": "/docs/main/modules/wf_dockerfile_build"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "wf_dockerfile_build",
          "summary": "Default `memory_mb` for workflow pods increased from 100MB to 150MB to handle typical Docker build memory requirements"
        }
      ]
    },
    {
      "id": "ab5cffb1-c309-4caa-b7da-fd3403ddd67b",
      "type": "addition",
      "summary": "`pf cluster add` gains a CloudFront account verification pre-flight check, preventing 30+ minutes of wasted deployment on unverified AWS accounts.",
      "description": "New AWS accounts are subject to a fraud-prevention hold that blocks all CloudFront resource creation until the user opens an AWS Support case. Without this check, `pf cluster add` would silently waste ~30 minutes deploying EC2, EKS, and Vault before failing deep in the install when modules like `kube_vault`, `kube_authentik`, `kube_grist`, `kube_nocodb`, and `kube_opensearch` attempt to create distributions. The check probes by creating (and immediately deleting) a CloudFront Origin Access Identity — gated by the same verification check as `CreateDistribution` but free and fast. If the account already has any distribution, the probe is skipped entirely as a verified signal. Errors are classified into three actionable buckets: account not yet verified (with step-by-step AWS Support case instructions), missing IAM permissions (with required actions listed), or non-actionable transient failures (debug-logged, install continues).",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(cluster): add CloudFront account verification pre-flight check",
          "link": "https://github.com/Panfactum/stack/commit/895d099dcf643dce2fbd9ef5dbb4c58c4a5a8546"
        },
        {
          "type": "issue-report",
          "summary": "AWS CloudFront account verification requirement preventing resource creation",
          "link": "https://repost.aws/questions/QUHAzHD_-nSjiaAxMw7toQ3A/unable-to-create-cloudfront-distribution-account-must-be-verified"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "cluster add",
          "summary": "New pre-flight check verifies the AWS account is permitted to create CloudFront distributions before any infrastructure is deployed, providing actionable guidance when the account is unverified or IAM permissions are insufficient"
        }
      ]
    },
    {
      "id": "68494dee-5f8e-487b-90c3-682502eaa7ee",
      "type": "improvement",
      "summary": "Replaced static `.pre-commit-config.yaml` with Nix-generated configuration for reproducible development environments.",
      "description": "Previously, the pre-commit configuration used PATH-resolved tool names that could differ across environments, causing inconsistencies in linting and formatting behavior. By generating the configuration from `lint.nix` with exact Nix store paths, all hook binaries are pinned to specific derivations for perfect reproducibility. The configuration is now written to the repo root on shell activation and automatically stays in sync with the devshell. Additionally, this includes new Nix code quality hooks (`deadnix`, `statix`) and improved hook ordering where generators/fixers run before formatters/linters. Dev scripts have also been renamed with a `ds-` prefix to avoid collisions in the shared PATH.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "build(devshell): generate pre-commit config from Nix for reproducibility",
          "link": "https://github.com/Panfactum/stack/commit/c3fbf871e8db75a60787e81c0cf225381125b46d"
        },
        {
          "type": "internal-docs",
          "summary": "Development shell customization guide",
          "link": "/docs/main/guides/development-shell/customizing"
        }
      ],
      "impacts": [
        {
          "type": "devshell",
          "component": "git",
          "summary": "Pre-commit configuration is now Nix-generated from `lint.nix` with exact tool paths for reproducible linting and formatting; also adds `deadnix` and `statix` Nix quality hooks and renames dev scripts with a `ds-` prefix to avoid PATH collisions"
        }
      ]
    },
    {
      "id": "a09371a0-7230-4d62-b29b-428b10df4889",
      "type": "fix",
      "summary": "Fixed CLI template generation for `kube_linkerd` and removed obsolete certificate manager templates.",
      "description": "The `kube_linkerd` template had a duplicate dependency on `kube_certificates` (once as `cert_manager` with `skip_outputs` and once as `cert_issuers`), causing ambiguous dependency names that didn't match the actual module. This fix removes the redundant dependency and renames `cert_issuers` to `kube_certificates` for clarity. Additionally, removes the deprecated `kube_cert_manager` and `kube_cert_issuers` template files since these modules have been consolidated into the unified `kube_certificates` module.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(templates): fix linkerd cert deps and add cert module templates",
          "link": "https://github.com/Panfactum/stack/commit/e25a952b7916898c077dcb704db6604cc07079c7"
        },
        {
          "type": "internal-docs",
          "summary": "kube_linkerd module documentation",
          "link": "/docs/main/modules/kube_linkerd"
        },
        {
          "type": "internal-docs",
          "summary": "kube_certificates module documentation",
          "link": "/docs/main/modules/kube_certificates"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "cluster add",
          "summary": "`kube_linkerd` template generation now uses the correct `kube_certificates` dependency name and obsolete `kube_cert_manager` and `kube_cert_issuers` templates are removed."
        }
      ]
    },
    {
      "id": "e3db78f5-983e-4279-b78a-b636e2b20b9e",
      "type": "addition",
      "summary": "New `pf cluster reset` command safely resets EKS clusters; `--force` flag skips protection of already-deployed Panfactum-managed workloads.",
      "description": "Exposes cluster reset functionality as a first-class CLI command so operators can re-initialize EKS clusters without invoking internal APIs directly. The `--force` flag guards against a footgun: when resuming a failed installation, `clusterReset` would previously delete Panfactum-managed resources (those carrying the `panfactum.com/workload` label) that were already successfully deployed. Without `--force`, those resources are now skipped, making the reset safe to re-run mid-install. The `--force` flag is only needed for a complete re-installation from scratch.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(cluster): add `pf cluster reset` command with --force flag",
          "link": "https://github.com/Panfactum/stack/commit/cfdc6a1d55b023f8f057fd0135fa8876ba028974"
        },
        {
          "type": "internal-docs",
          "summary": "Kubernetes Cluster bootstrapping guide — Reset EKS Cluster section",
          "link": "/docs/main/guides/bootstrapping/kubernetes-cluster"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "cluster reset",
          "summary": "New command removes default AWS EKS addons and resources in preparation for Panfactum's hardened replacements; skips resources labeled `panfactum.com/workload` by default to protect already-deployed Panfactum workloads during resumed installations"
        }
      ]
    },
    {
      "id": "836e7992-2125-453c-8ed6-207b5c60baff",
      "type": "fix",
      "summary": "Fixed `kube_aws_lb_controller` webhooks to prevent an unrecoverable bootstrap deadlock by restricting `serviceMutatorWebhook` and `ingressValidationWebhook` scope to `loadbalancer/enabled` namespaces only.",
      "description": "Previously, the `serviceMutatorWebhook` and `ingressValidationWebhook` applied cluster-wide with `failurePolicy: Fail`. When the controller became unavailable due to DNS failures during cluster bootstrap, these webhooks would block `Service` and `Ingress` creation in `kube-system` (including the `kube-dns` service), creating an unrecoverable circular dependency. Both webhooks are now restricted to namespaces labeled `loadbalancer/enabled=true`, matching the existing `webhookNamespaceSelector` policy already in place. This aligns Panfactum with an upstream fix that added `serviceMutatorWebhookConfig.namespaceSelectors` support to the official Helm chart.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(aws-lb-controller): restrict webhooks to lb-enabled namespaces only",
          "link": "https://github.com/Panfactum/stack/commit/14c87e0f28a12809fe103b1a91ad2655fd293833"
        },
        {
          "type": "issue-report",
          "summary": "AWS Load Balancer Controller webhook deadlock preventing resource creation",
          "link": "https://github.com/kubernetes-sigs/aws-load-balancer-controller/issues/4140"
        },
        {
          "type": "issue-report",
          "summary": "fix: Allowing namespace selectors for mservice webhook (upstream PR #4646)",
          "link": "https://github.com/kubernetes-sigs/aws-load-balancer-controller/pull/4646"
        },
        {
          "type": "internal-docs",
          "summary": "kube_aws_lb_controller module documentation",
          "link": "/modules/kube_aws_lb_controller/overview"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "kube_aws_lb_controller",
          "summary": "`serviceMutatorWebhook` and `ingressValidationWebhook` are now scoped to `loadbalancer/enabled` namespaces only, preventing `kube-system` service creation deadlocks during cluster bootstrap"
        }
      ]
    },
    {
      "id": "290a2d7a-c3e7-4996-832e-ba0ec78cfec1",
      "type": "fix",
      "summary": "Fixed Cilium pods failing to start on nodes during Karpenter disruption by tolerating both `karpenter.sh/disruption` and `karpenter.sh/disrupted` taints.",
      "description": "Karpenter uses two distinct taints during node lifecycle management: `karpenter.sh/disruption` is applied early in the disruption process to signal that a node is being considered for removal, while `karpenter.sh/disrupted` is applied to nodes that are actively undergoing disruption and draining. `kube_cilium` already tolerated `karpenter.sh/disruption` but was missing the toleration for `karpenter.sh/disrupted`. Without it, Cilium pods could be evicted from actively draining nodes at exactly the moment cluster networking is needed to gracefully migrate workloads, causing a networking outage during node lifecycle events such as consolidation, drift, or expiration.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(kube_cilium): tolerate karpenter.sh/disrupted taint on cilium pods",
          "link": "https://github.com/Panfactum/stack/commit/f083d4346bd5baf413f140b2b57d177cc4225e11"
        },
        {
          "type": "issue-report",
          "summary": "Deadlock: disrupted taint blocks DaemonSets when do-not-disrupt prevents node disruption",
          "link": "https://github.com/kubernetes-sigs/karpenter/issues/2704"
        },
        {
          "type": "external-docs",
          "summary": "Karpenter disruption lifecycle documentation",
          "link": "https://karpenter.sh/docs/concepts/disruption/"
        },
        {
          "type": "internal-docs",
          "summary": "kube_cilium module documentation",
          "link": "/modules/kube_cilium/overview"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "kube_cilium",
          "summary": "Cilium pods now tolerate the `karpenter.sh/disrupted` taint to prevent networking outages during active node draining"
        }
      ]
    },
    {
      "id": "fa15b91c-8eda-4a11-95fe-62078721886b",
      "type": "improvement",
      "summary": "Removed overly prescriptive Vault recovery key storage guidance to allow teams to follow their own security policies",
      "description": "Previously, the `cluster add` success message advised against storing Vault recovery keys in a company password vault, which contradicts common enterprise security practices. This guidance was removed to let organizations follow their own established security policies for root access keys without the CLI steering them toward potentially unsafe alternatives. The warning about storing keys from `recovery.yaml` securely is preserved; only the specific prohibition against company password vaults was removed.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(cluster/add): remove overly prescriptive Vault key storage guidance",
          "link": "https://github.com/Panfactum/stack/commit/91f0bc927c03b1a37e3b9e873e43b66425115d5d"
        },
        {
          "type": "internal-docs",
          "summary": "Vault bootstrapping guide covering recovery key setup and storage decisions",
          "link": "/docs/main/guides/bootstrapping/vault"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "cluster add",
          "summary": "Success message no longer prohibits storing Vault recovery keys in company password vaults"
        }
      ]
    },
    {
      "id": "59736296-2c0f-4bac-b8c7-a849ad0a486e",
      "type": "improvement",
      "summary": "`sso add` now automates Authentik user setup entirely via API, eliminating the two-browser-tab workflow and enabling fully headless deployment.",
      "description": "Previously, the `sso add` command required manual browser interaction at two points: opening a password-recovery\nlink in one tab and copy-pasting an API token from the Authentik UI in another. This was fragile, difficult to\nreproduce, and completely incompatible with CI or headless environments.\n\nThe rewrite replaces both manual steps with direct Authentik API calls:\n\n1. `coreUsersSetPasswordCreate` is called with a `crypto.randomBytes`-generated password, bypassing the recovery-link flow.\n2. `coreTokensCreate` creates a stable token under the identifier `local-pf-installer-token`, and `coreTokensViewKeyRetrieve`\n   fetches the plaintext key automatically — no user paste required.\n3. The new token is verified via `coreUsersList` before the bootstrap token is deleted, preventing lockout if\n   token creation fails partway through.\n4. The installer API token is now stored in `global.user.yaml` (gitignored, user-local) instead of the\n   region-scoped `region.secrets.yaml`, keeping credentials out of environment state.\n5. A new `getAuthentikBootstrapToken()` utility deduplicates bootstrap-token retrieval across multiple tasks.\n\nThe result is a fully automated SSO deployment path that works in CI pipelines and headless servers while\nmaintaining the same security guarantees.\n",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(authentik): automate user setup without browser interaction",
          "link": "https://github.com/Panfactum/stack/commit/a4626526d92176e71713c13835045616c0efeb59"
        },
        {
          "type": "external-docs",
          "summary": "Authentik API - Set password for user (coreUsersSetPasswordCreate)",
          "link": "https://docs.goauthentik.io/docs/developer-docs/api/reference/core-users-set-password-create"
        },
        {
          "type": "external-docs",
          "summary": "Authentik API - Token management (coreTokensCreate / coreTokensViewKeyRetrieve)",
          "link": "https://docs.goauthentik.io/docs/developer-docs/api/reference/core-tokens-create"
        },
        {
          "type": "internal-docs",
          "summary": "kube_authentik module reference",
          "link": "/docs/main/modules/kube_authentik"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "sso add",
          "summary": "Authentik user setup is now fully automated via API — no browser tabs, no manual token paste, and no incompatibility with headless or CI environments."
        }
      ]
    },
    {
      "id": "04c2ccfb-b18a-4dc7-ba71-346a21392653",
      "type": "fix",
      "summary": "Fixed `authentik_core_resources` deployment failing due to missing `AUTHENTIK_TOKEN` environment variable during `sso add` setup.",
      "description": "The `authentik_core_resources` module uses the Authentik Terraform provider, which requires the `AUTHENTIK_TOKEN` environment variable for authentication. During the `sso add` flow, the bootstrap token was retrieved earlier in the process but never forwarded to the module deployment step, causing the Terraform provider to fail authentication silently — leaving users with a broken SSO setup and no clear error message. The token is now correctly passed into the deployment environment so `panfactum.hcl` can supply it to the provider via `get_env(\"AUTHENTIK_TOKEN\")`.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(sso): forward AUTHENTIK_TOKEN to core resources deployment",
          "link": "https://github.com/Panfactum/stack/commit/2adceae0c0722bfc3973d2a33fed5e84fed4a239"
        },
        {
          "type": "internal-docs",
          "summary": "authentik_core_resources module reference",
          "link": "/docs/main/modules/authentik_core_resources/overview"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "sso add",
          "summary": "`authentik_core_resources` deployment step now properly forwards the authentication token, preventing silent Terraform provider failures during SSO setup"
        }
      ]
    },
    {
      "id": "27fecb51-a22e-4cbd-8553-6c68e1441973",
      "type": "improvement",
      "summary": "Streamlined SSO setup by removing unnecessary bootstrap admin email prompt and hardcoding it based on SSO domain.",
      "description": "The bootstrap `akadmin` user created by Authentik is immediately disabled after setup completes, so prompting users for its email address provided no real benefit while adding unnecessary friction to the setup process. The prompt and associated `authentikRootEmail` context field have been removed, with the `akadmin` email now deterministically generated as `akadmin@<sso_subdomain>.<ancestor_domain>` directly in the deployment task. This reduces user interaction requirements while maintaining the same functional outcome.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "refactor(sso): hardcode akadmin email, remove unnecessary prompt",
          "link": "https://github.com/Panfactum/stack/commit/900fd7f3dc3bd611be5d4967a1a2ed81d92fed28"
        },
        {
          "type": "internal-docs",
          "summary": "Identity provider bootstrapping guide — akadmin_email and akadmin user setup",
          "link": "/docs/main/guides/bootstrapping/identity-provider"
        },
        {
          "type": "internal-docs",
          "summary": "kube_authentik module documentation",
          "link": "/docs/main/modules/kube_authentik/overview"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "sso add",
          "summary": "Removes `akadmin_email` prompt and hardcodes the bootstrap admin email based on SSO domain configuration"
        }
      ]
    },
    {
      "id": "2f0a5c89-1e91-4736-b40f-55c3ef136350",
      "type": "fix",
      "summary": "Fixed fragile skip condition for disabling default Authentik resources in `sso add` command",
      "description": "The `Disabling default Authentik resources` task was incorrectly skipped whenever `authentik_url` was present in config, regardless of whether the brand-disabling step had actually run. This caused silent skips when the URL was set by any other means, leaving the default Authentik brand enabled. Removed the skip condition and added a guard so the brand update API call only fires when the brand's `_default` field is still `true`, making the step idempotent without relying on the fragile completion marker. This improves the reliability of Authentik SSO setup by ensuring proper brand configuration occurs consistently.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(sso): fix fragile skip for disabling default Authentik resources",
          "link": "https://github.com/Panfactum/stack/commit/84d74a048c1a183fbf1648297c441fa77b266435"
        },
        {
          "type": "external-docs",
          "summary": "Authentik Identity Provider Documentation",
          "link": "https://docs.goauthentik.io/"
        },
        {
          "type": "internal-docs",
          "summary": "kube_authentik module documentation",
          "link": "/docs/main/modules/kube_authentik/overview"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "sso add",
          "summary": "Default Authentik resources disabling step is now properly idempotent and won't be silently skipped inappropriately"
        }
      ]
    },
    {
      "id": "cafe2ba3-5f03-4a9c-baf6-22fceab75ef0",
      "type": "addition",
      "summary": "Added automated AWS SES production access request during `pf sso add` command to streamline email functionality setup",
      "description": "AWS SES production access cannot be requested via Terraform since no provider resource exists and manual requests through the console required additional context about email usage. The CLI automation uses context already collected during SSO setup to submit the request with pre-filled values: transactional mail type, Authentik URL as the website, and organization-specific use case description. The task polls `GetAccountCommand` to handle different states (already enabled, pending review, denied, or needs request) and surfaces AWS Support case IDs for tracking. Post-task messaging provides clear guidance about review timelines and email sending limitations until approval.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(sso): automate SES sandbox-to-production promotion",
          "link": "https://github.com/Panfactum/stack/commit/668538efd8dcb78d88b5317f3c31a5d71a936ee2"
        },
        {
          "type": "external-docs",
          "summary": "AWS SES production access request documentation",
          "link": "https://docs.aws.amazon.com/ses/latest/dg/request-production-access.html"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "sso add",
          "summary": "Now automatically requests AWS SES production access and provides status updates on the review process"
        }
      ]
    },
    {
      "id": "2edfead6-cd09-4c40-b1de-d097b74dee06",
      "type": "fix",
      "summary": "Fixed `sso add` command to throw actionable error messages when Authentik domain is missing from module configuration",
      "description": "Previously, when the `kube_authentik` module configuration was missing or had malformed domain settings, the SSO setup would produce cryptic schema validation errors or silently return undefined, leaving users without clear guidance on how to fix the issue. This fix improves error handling by making the domain field optional in the schema to survive partial configs, then throwing a targeted CLIError with the exact configuration path and remediation steps when the domain cannot be resolved. Users now receive a clear error message that specifically identifies the missing configuration and provides actionable remediation steps.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(sso): throw actionable error when authentik domain is missing",
          "link": "https://github.com/Panfactum/stack/commit/7901a721ce816bd99f2f5eadb488a8d8e9f6a826"
        },
        {
          "type": "internal-docs",
          "summary": "kube_authentik module reference documentation",
          "link": "/modules/kube_authentik/reference"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "sso add",
          "summary": "Now provides clear error messages and remediation guidance when `kube_authentik` domain configuration is missing or malformed"
        }
      ]
    },
    {
      "id": "100d2aff-ec58-4daa-aada-e4785076d6b3",
      "type": "improvement",
      "summary": "Enhanced `sso add` command with inline validation for confirmation prompts and automatic IAM Identity Center verification",
      "description": "Users needed explicit instructions to enable AWS IAM Identity Center before the portal URL step, since the workflow previously jumped straight to URL configuration without verifying the service was active. All confirm prompts in the SSO add flow would throw a `CLIError` on a \"no\" answer, crashing the task with no opportunity to retry. Inline validation provides a better UX by keeping the user in the prompt loop. Added a pre-flight step that calls `ListInstances` to verify IAM Identity Center is active, guiding users to enable it if not with a direct console link and looping until confirmed or the API confirms success. Replaced all `confirm → if (!confirmed) throw CLIError` patterns with a `validate` callback that re-displays the prompt inline on a \"no\" answer, matching the UX of `@inquirer/input`.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(sso): add inline confirm validation to SSO federated auth setup",
          "link": "https://github.com/Panfactum/stack/commit/7ddf20ded72f5807243954fd43b9e4046aad84c9"
        },
        {
          "type": "external-docs",
          "summary": "AWS IAM Identity Center ListInstances API Documentation",
          "link": "https://docs.aws.amazon.com/singlesignon/latest/APIReference/API_ListInstances.html"
        },
        {
          "type": "external-docs",
          "summary": "Enable IAM Identity Center - AWS Documentation",
          "link": "https://docs.aws.amazon.com/singlesignon/latest/userguide/enable-identity-center.html"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "sso add",
          "summary": "Enhanced confirmation prompts with inline validation and added automatic IAM Identity Center verification"
        }
      ]
    },
    {
      "id": "f32b0d83-64d6-4cb9-a4f4-52fb0ce73e2f",
      "type": "improvement",
      "summary": "Automated SAML metadata download in `sso add` command to eliminate manual Authentik UI navigation steps",
      "description": "Eliminates a manual step where users had to download the SAML metadata XML from the Authentik Web UI during SSO setup. The system now automatically retrieves the metadata using the `saml_metadata` Terraform output from the deployed `authentik_aws_sso` module and saves it directly to disk. Users are now shown the saved file path for the AWS upload step instead of being prompted to manually navigate and download from the Authentik admin interface. This improvement reduces setup complexity and eliminates potential user errors during the federated authentication configuration process.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(sso): automate SAML metadata download in SSO setup",
          "link": "https://github.com/Panfactum/stack/commit/d686a88e72714378a164dfcaf24b06102bf048a4"
        },
        {
          "type": "internal-docs",
          "summary": "authentik_aws_sso module documentation",
          "link": "/docs/main/reference/infrastructure-modules/authentik_aws_sso"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "sso add",
          "summary": "No longer requires manual metadata download from Authentik UI"
        },
        {
          "type": "iac-module",
          "component": "authentik_aws_sso",
          "summary": "SAML metadata output now automatically consumed during SSO setup"
        }
      ]
    },
    {
      "id": "f8f7746c-256a-4add-a823-68a4b62661de",
      "type": "fix",
      "summary": "Fixed Docker Hub PAT validation in `cluster enable` ECR setup to use proper registry token endpoint",
      "description": "Docker deprecated using a PAT directly as a Bearer token to hub.docker.com, which now returns 403 with no clear error message. The old validation flow was therefore always rejecting valid tokens during ECR setup. Switched to the proper Docker registry token flow (GET auth.docker.io/token with Basic Auth), which is the same authentication flow that ECR pull-through cache uses internally. This eliminates false validation failures and ensures the ECR setup works reliably with current Docker Hub token formats.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(cli): fix Docker Hub PAT validation and tsc in pre-commit hook",
          "link": "https://github.com/Panfactum/stack/commit/eeed85350ada15d10ca67eb32e91a6317e65cdf0"
        },
        {
          "type": "external-docs",
          "summary": "Docker registry authentication flow documentation",
          "link": "https://docs.docker.com/reference/api/registry/auth/"
        },
        {
          "type": "external-docs",
          "summary": "Docker Hub personal access tokens guide",
          "link": "https://docs.docker.com/security/access-tokens/"
        },
        {
          "type": "internal-docs",
          "summary": "Kubernetes cluster bootstrapping guide covering Docker Hub credentials",
          "link": "/docs/main/guides/bootstrapping/kubernetes-cluster"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "cluster enable",
          "summary": "No longer incorrectly rejects valid Docker Hub PATs during ECR pull-through cache setup"
        }
      ]
    },
    {
      "id": "c98f3e13-889e-449d-bf76-60d89007cb54",
      "type": "improvement",
      "summary": "Enhanced CLI build configuration with structured options to support smaller Docker image builds",
      "description": "Replaced the boolean `withPFCLI` parameter with a structured `cli` configuration object that allows callers to specify both whether the CLI should be included and whether to use Bun's `--smol` compilation mode. The Docker image now builds with `smol = true` to produce smaller binaries and reduce image size, while development shells default to the full binary for optimal performance. This change makes the build system more extensible for future CLI build options and enables teams to optimize their container images for size when deploying the Panfactum toolkit.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "refactor(cli): replace withPFCLI bool with structured cli config",
          "link": "https://github.com/Panfactum/stack/commit/2c27bbd2eb8071b414ce97c8ae432b455604fb42"
        },
        {
          "type": "external-docs",
          "summary": "Bun --smol compilation mode documentation",
          "link": "https://bun.sh/docs/bundler/executables"
        },
        {
          "type": "internal-docs",
          "summary": "mkDevShell customization guide",
          "link": "/docs/main/guides/development-shell/customizing"
        }
      ],
      "impacts": [
        {
          "type": "devshell",
          "component": "pf",
          "summary": "Now supports configurable CLI build options including smaller binary compilation mode"
        },
        {
          "type": "configuration",
          "component": "flake.nix",
          "summary": "Updated to use structured `cli` configuration instead of boolean parameter"
        }
      ]
    },
    {
      "id": "970a1157-71b9-44cb-9845-693c4b11fa61",
      "type": "improvement",
      "summary": "Enhanced Terragrunt backend bootstrapping to be explicit and compatible with Terragrunt >= 0.87.0",
      "description": "Previously, Terragrunt auto-provisioned S3 and DynamoDB during init, performing infrastructure side-effects during what appeared to be a read-only operation. This obscured the critical bootstrapping dependency in `pf env add` where the `tf_bootstrap_resources` module creates the state bucket but its own init relies on that bucket existing. Additionally, Terragrunt >= 0.87.0 disables auto-bootstrap by default, which would cause silent failures on version upgrades. The enhancement adds explicit backend bootstrap operations and fail-fast validation to make the dependency clear and prevent silent failures. This change ensures forward compatibility with newer Terragrunt versions while making infrastructure bootstrapping operations more transparent and predictable.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(terragrunt): make backend bootstrap explicit, require pre-existing",
          "link": "https://github.com/Panfactum/stack/commit/98330d5cea30ddcbd554e32b947184ff50bd2810"
        },
        {
          "type": "external-docs",
          "summary": "Terragrunt v0.87.0 release notes - Breaking change: Automatic Backend Provisioning Removed",
          "link": "https://github.com/gruntwork-io/terragrunt/releases/tag/v0.87.0"
        },
        {
          "type": "internal-docs",
          "summary": "tf_bootstrap_resources module documentation",
          "link": "/docs/main/modules/tf_bootstrap_resources/overview"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "env add",
          "summary": "Backend bootstrapping dependency is now explicit and self-documenting"
        },
        {
          "type": "installer",
          "component": "install.sh",
          "summary": "Environment setup now explicitly handles Terragrunt backend initialization"
        },
        {
          "type": "iac-module",
          "component": "tf_bootstrap_resources",
          "summary": "Module initialization now uses explicit backend bootstrap to resolve circular dependency"
        }
      ]
    },
    {
      "id": "48f6b5cb-b12b-4d91-bc56-7d8646f403e5",
      "type": "improvement",
      "summary": "Increased minimum PostgreSQL memory allocation in Authentik deployment from 500MB to 1GB for better performance and stability",
      "description": "The previous 500MB floor was insufficient for normal Authentik workloads and could cause memory pressure issues affecting authentication service reliability. This change provides adequate memory headroom for the PostgreSQL instance supporting Authentik operations, addressing known issues with recent Authentik versions that require higher memory allocation for stable database performance.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "perf(kube_authentik): increase Authentik postgres min memory to 1GB",
          "link": "https://github.com/Panfactum/stack/commit/d243b6130426aa2b62bf65b8f40f175f8a7be5fd"
        },
        {
          "type": "internal-docs",
          "summary": "kube_authentik module documentation",
          "link": "/docs/main/modules/kube_authentik"
        },
        {
          "type": "issue-report",
          "summary": "Significant PostgreSQL DB load on Authentik 2025.10.x causing issues",
          "link": "https://github.com/goauthentik/authentik/issues/19302"
        },
        {
          "type": "issue-report",
          "summary": "2026.2 worker memory usage doubled vs 2025.12 (~500Mi → ~1020Mi)",
          "link": "https://github.com/goauthentik/authentik/issues/20537"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "kube_authentik",
          "summary": "PostgreSQL minimum memory increased from 500MB to 1GB"
        }
      ]
    },
    {
      "id": "efbc7606-942f-4976-a806-5a531cde59eb",
      "type": "fix",
      "summary": "Fixed `kube_policies` ECR pull-through cache to route both `docker.io` and `index.docker.io` registry hostnames to the Docker Hub cache",
      "description": "The Kyverno mutating policy that rewrites image registries to the ECR pull-through cache only handled `docker.io`, not `index.docker.io` — the canonical Docker Hub hostname used by Docker's registry API. Images referenced with `index.docker.io` (such as those in `kube_temporal`) bypassed the cache entirely, causing direct Docker Hub pulls and potential rate-limiting issues. This fix ensures both hostnames are consistently routed through the pull-through cache.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(kube_policies): route index.docker.io through ECR pull-through cache",
          "link": "https://github.com/Panfactum/stack/commit/8ee8165a8db8ed21096e14df04cfecde0312898d"
        },
        {
          "type": "external-docs",
          "summary": "Docker Hub Registry Hostnames and API Endpoints",
          "link": "https://stackoverflow.com/questions/34198392/docker-official-registry-docker-hub-url"
        },
        {
          "type": "external-docs",
          "summary": "Kyverno Policy Examples for Registry Rewriting",
          "link": "https://blog.oponomarov.com/posts/rewriting-docker-image-registries-with-kyverno/"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "kube_policies",
          "summary": "Added index.docker.io → docker-hub/ mapping to registry_replacements"
        }
      ]
    },
    {
      "id": "a0921450-11d2-45a4-a159-bdfb677585b2",
      "type": "addition",
      "summary": "Added configurable pre-commit hooks (`tf-fmt` and `hcl-fmt`) to `mkDevShell` with automatic merge support for custom user hooks",
      "description": "Devshells now include opt-in pre-commit hooks for Terraform and HCL formatting using `prek` (faster pre-commit replacement). The `mkDevShell` function accepts a `precommit` parameter with configurable `enable`, `tf_fmt`, and `hcl_fmt` boolean options (all default to true). At shell activation, Panfactum generates `.pre-commit-config.yaml` by merging its hooks with any user-defined hooks from `.pre-commit-config.custom.yaml`, preserving user configurations while adding automated infrastructure formatting checks. The generated config is automatically gitignored to avoid conflicts.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(devshell): add tf-fmt and hcl-fmt pre-commit hooks to mkDevShell",
          "link": "https://github.com/Panfactum/stack/commit/6cf4816914e8cd69e2bfb01749e6402d3c21fd91"
        },
        {
          "type": "external-docs",
          "summary": "prek - Faster pre-commit alternative built in Rust",
          "link": "https://github.com/j178/prek"
        },
        {
          "type": "internal-docs",
          "summary": "Development Shell Customization Guide",
          "link": "/docs/main/guides/development-shell/customizing"
        }
      ],
      "impacts": [
        {
          "type": "devshell",
          "component": "enter-shell-local",
          "summary": "Added configurable pre-commit hooks for infrastructure formatting with automatic merge of custom user configurations"
        },
        {
          "type": "cli",
          "component": "precommit install",
          "summary": "Added `precommit install` command to merge Panfactum and user-defined pre-commit configurations"
        }
      ]
    },
    {
      "id": "f93a6395-49e3-4e9b-b6ca-b646fa90d25f",
      "type": "fix",
      "summary": "Fixed EBS CSI controller `PodDisruptionBudget` failing to update during Helm upgrades due to race condition with other controllers",
      "description": "Helm's atomic upgrade used optimistic concurrency (`resourceVersion`) when patching the `ebs-csi-controller` `PodDisruptionBudget`. If `Kyverno`, `VPA`, or any other controller mutated the `PDB` between Helm's read and write, Kubernetes rejected the update with a `409` conflict, rolling back the entire release intermittently. This caused EBS CSI driver upgrades to fail unpredictably when multiple controllers were active on the cluster. The fix replaces Helm's `PDB` management with a `kubectl_manifest` resource using server-side apply, which uses field ownership rather than `resourceVersion` and is immune to this race condition. This ensures reliable cluster storage upgrades in environments with active policy engines and resource controllers.",
      "action_items": [
        "Re-apply the `kube_aws_ebs_csi` module to update the PDB management configuration"
      ],
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(kube_aws_ebs_csi): use SSA for EBS CSI controller PDB to avoid 409s",
          "link": "https://github.com/Panfactum/stack/commit/be0b6ea7c643b3efd786a98c78a9eafac2e26aa3"
        },
        {
          "type": "external-docs",
          "summary": "Kubernetes Server-Side Apply documentation",
          "link": "https://kubernetes.io/docs/reference/using-api/server-side-apply/"
        },
        {
          "type": "issue-report",
          "summary": "AWS EBS CSI driver race condition issue in volume creation",
          "link": "https://github.com/kubernetes-sigs/aws-ebs-csi-driver/issues/1951"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "kube_aws_ebs_csi",
          "summary": "Disabled Helm-managed `PodDisruptionBudget` and replaced with `kubectl_manifest` using server-side apply to prevent upgrade conflicts"
        }
      ]
    },
    {
      "id": "97a4b9c7-9b92-4f81-92fd-b356a19988fd",
      "type": "improvement",
      "summary": "Removed PostHog analytics telemetry from the CLI to eliminate user tracking and reduce dependency surface area",
      "description": "The CLI previously collected anonymous usage analytics through PostHog, including command executions and user identification. This telemetry has been completely removed to respect user privacy and eliminate third-party data egress entirely. This change improves security posture by eliminating external data collection and ensures the CLI operates with full local autonomy. Additionally, removing the `posthog-node` dependency and its transitive dependencies (`axios`, `follow-redirects`, `proxy-from-env`) reduces the overall dependency footprint and potential attack surface.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(cli): remove PostHog analytics telemetry from CLI",
          "link": "https://github.com/Panfactum/stack/commit/951c8a1f552d7bc61f01a895ad3779a155a816c9"
        },
        {
          "type": "external-docs",
          "summary": "PostHog analytics platform website",
          "link": "https://posthog.com/"
        }
      ],
      "impacts": [
        {
          "type": "devshell",
          "component": "pf",
          "summary": "Removed all PostHog analytics tracking calls and dependencies"
        }
      ]
    },
    {
      "id": "a2374748-de42-4e88-b8b3-5470fb411ab7",
      "type": "fix",
      "summary": "Fixed `pf buildkit suspend` command crashing when running inside its Kubernetes pod due to git repository lookup failures",
      "description": "The `pf buildkit suspend` command was failing when executing inside its Kubernetes pod environment because the CLI startup routine attempts to locate the git repository root via `git rev-parse --show-toplevel`, which fails in containerized environments where no git repository is present. This fix enables BuildKit scaling operations to function correctly in containerized contexts, ensuring proper cost optimization capabilities when BuildKit pods need to scale themselves down during periods of inactivity. The change aligns the command with its sibling `pf buildkit resume` which already handled this scenario correctly.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "fix(buildkit/suspend): prevent git lookup crash when running in-cluster",
          "link": "https://github.com/Panfactum/stack/commit/ce7399b51922537b02f7951a5d5052724c3a5dae"
        },
        {
          "type": "internal-docs",
          "summary": "BuildKit concepts and architecture",
          "link": "/docs/main/concepts/buildkit"
        }
      ],
      "impacts": [
        {
          "type": "cli",
          "component": "buildkit suspend",
          "summary": "Fixed crash when running in containerized environments, enabling proper cost optimization scaling"
        }
      ]
    },
    {
      "id": "e9c20b14-8e10-441b-9745-b7a81361e4bd",
      "type": "addition",
      "summary": "Added AWS Service Quotas Automatic Management to `aws_account` module for proactive quota monitoring and automatic increase requests",
      "description": "AWS Service Quotas are one of the most common causes of unexpected production incidents. The previous approach was purely reactive: operators only learned a quota mattered after a failed Terraform apply, EKS scale-out, or workload error. This enhancement opts every account into AWS Service Quotas Automatic Management with `NotifyAndAdjust` as the default, so AWS proactively monitors utilization and submits increase requests before limits become an incident. When `operations_contact` is set, a User Notifications pipeline routes `SERVICEQUOTAS` threshold events to the ops email address. Auto-management still runs without a contact configured -- notifications are additive.",
      "action_items": [
        "Review new module inputs: `quota_auto_management_opt_in_type`, `quota_auto_management_regions`, and `quota_auto_management_exclusion_list`",
        "Configure `operations_contact` if you want quota threshold notifications sent to your operations team",
        "Optionally customize the `quota_auto_management_regions` list to target specific AWS regions"
      ],
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(aws_account): enable Service Quotas Automatic Management",
          "link": "https://github.com/Panfactum/stack/commit/cef868ff0510bf116b0bac5c15c504b89cc39d06"
        },
        {
          "type": "internal-docs",
          "summary": "aws_account module reference",
          "link": "/modules/aws_account/reference"
        },
        {
          "type": "external-docs",
          "summary": "AWS Service Quotas Automatic Management Documentation",
          "link": "https://docs.aws.amazon.com/servicequotas/latest/userguide/automatic-management.html"
        },
        {
          "type": "external-commit",
          "summary": "AWS Provider v6.40.0 release with aws_servicequotas_auto_management resource",
          "link": "https://github.com/hashicorp/terraform-provider-aws/releases/tag/v6.40.0"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "aws_account",
          "summary": "Added Service Quotas Automatic Management with configurable regions, exclusion lists, and optional notification pipeline"
        }
      ]
    },
    {
      "id": "c9612edf-7744-4de5-9d4b-761226b02700",
      "type": "update",
      "summary": "Updated AWS Terraform provider from `v6.38.0` to `v6.40.0` across all infrastructure modules",
      "description": "The provider bump was required to unlock the `aws_servicequotas_auto_management` Terraform resource, which landed in upstream PR `#45968`. This upgrade enables AWS Service Quotas Automatic Management functionality, allowing proactive quota monitoring and automatic quota increase requests to prevent production incidents caused by hitting service limits. This update affects all 60 AWS infrastructure modules in the stack.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(aws_account): enable Service Quotas Automatic Management",
          "link": "https://github.com/Panfactum/stack/commit/cef868ff0510bf116b0bac5c15c504b89cc39d06"
        },
        {
          "type": "external-docs",
          "summary": "AWS Terraform Provider v6.40.0 Release Notes",
          "link": "https://github.com/hashicorp/terraform-provider-aws/releases/tag/v6.40.0"
        },
        {
          "type": "issue-report",
          "summary": "feat: add service quotas automatic management",
          "link": "https://github.com/hashicorp/terraform-provider-aws/pull/45968"
        },
        {
          "type": "internal-docs",
          "summary": "`aws_account` module documentation",
          "link": "/docs/main/modules/aws_account/overview"
        }
      ],
      "impacts": [
        {
          "type": "iac-provider",
          "component": "aws",
          "summary": "Updated from `v6.38.0` to `v6.40.0` across all infrastructure modules"
        }
      ]
    },
    {
      "id": "d4693f4f-f543-4ba7-8601-b72c1aa390a2",
      "type": "improvement",
      "summary": "Enhanced `kube_pg_cluster` module with per-schema Vault roles, automatic schema initialization, and PgBouncer `search_path` support",
      "description": "CNPG's `postInitApplicationSQL` only runs at cluster bootstrap, so schemas added to `extra_schemas` after initial setup were silently ignored. The module now runs idempotent schema SQL on every apply via a Kubernetes Job, creating schemas and per-schema base roles (`reader_<schema>`, `writer_<schema>`) with appropriate grants. PgBouncer cannot set `search_path` per-connection in transaction/session pooling mode, so per-schema Vault roles now use `ALTER ROLE SET search_path` to pre-configure the correct schema, removing the need for clients to set it at connection time. Added `pgbouncer_ignore_startup_parameters` variable to prevent PgBouncer from rejecting connections from JDBC drivers and other clients that automatically send `search_path` as a startup parameter.",
      "references": [
        {
          "type": "internal-commit",
          "summary": "feat(kube_pg_cluster): add per-schema roles, schema init job, PgBouncer fix",
          "link": "https://github.com/Panfactum/stack/commit/9053ab836b67d438d2d1587c7624f63c53adf45b"
        },
        {
          "type": "external-docs",
          "summary": "CloudNativePG PostgreSQL Operator",
          "link": "https://github.com/cloudnative-pg/cloudnative-pg"
        },
        {
          "type": "external-docs",
          "summary": "PgBouncer Configuration Documentation - ignore_startup_parameters",
          "link": "https://github.com/pgbouncer/pgbouncer/blob/master/doc/config.md"
        },
        {
          "type": "external-docs",
          "summary": "Vault PostgreSQL Database Secrets Engine",
          "link": "https://developer.hashicorp.com/vault/docs/secrets/databases/postgresql"
        },
        {
          "type": "internal-docs",
          "summary": "kube_pg_cluster Module Documentation",
          "link": "/docs/main/modules/kube_pg_cluster/overview"
        }
      ],
      "impacts": [
        {
          "type": "iac-module",
          "component": "kube_pg_cluster",
          "summary": "Added per-schema Vault roles with pre-configured `search_path`, schema initialization Job for existing clusters, and `pgbouncer_ignore_startup_parameters` variable for JDBC compatibility"
        }
      ]
    }
  ],
  "on_upgrade_path": true,
  "list_url": "/docs/changelog/edge.json",
  "llm_txt_url": "/docs/changelog/edge.26-04-24/llm.txt",
  "prev": "/docs/changelog/edge.26-04-05.json"
}