diff --git a/ECS_RGO_DAG.md b/ECS_RGO_DAG.md
new file mode 100644
index 000000000..dcfc78f84
--- /dev/null
+++ b/ECS_RGO_DAG.md
@@ -0,0 +1,23 @@
+```mermaid
+flowchart TD
+ classDef prov fill:#1f3a5f,stroke:#9ecbff,color:#eaf2ff;
+ classDef pop fill:#3a2f1f,stroke:#ffce9e,color:#fff3e6;
+
+ S1["Stage 1
RgoComputePopulationTotalsSystem
over provinces"]:::prov
+ S2["Stage 2
RgoHireSystem
over provinces"]:::prov
+ S3["Stage 3
RgoProduceAndPlaceOrderSystem
over provinces"]:::prov
+ S4["Stage 4
RgoResolveSellOrderAndOwnerShareSystem
over provinces"]:::prov
+ S5a["Stage 5a
RgoComputeOwnerIncomeSystem
over provinces"]:::prov
+ S5b["Stage 5b
RgoComputeEmployeeIncomeSystem
over provinces"]:::prov
+ S6a["Stage 6a
ApplyEmployeeIncomeToPopsSystem
over pops"]:::pop
+ S6b["Stage 6b
ApplyOwnerIncomeToPopsSystem
over pops"]:::pop
+ S7["Stage 7
AggregatePopIncomeSystem
over pops"]:::pop
+
+ S1 --> S2 --> S3 --> S4
+ S4 --> S5a
+ S4 --> S5b
+ S5b --> S6a
+ S5a --> S6b
+ S6a --> S7
+ S6b --> S7
+```
diff --git a/ECS_RGO_DATAFLOW.md b/ECS_RGO_DATAFLOW.md
new file mode 100644
index 000000000..08f8081c9
--- /dev/null
+++ b/ECS_RGO_DATAFLOW.md
@@ -0,0 +1,85 @@
+```mermaid
+flowchart LR
+ classDef sys fill:#243b53,stroke:#9ecbff,color:#eaf2ff;
+ classDef prov fill:#102a43,stroke:#5b8fc9,color:#d6e6ff;
+ classDef pop fill:#3a2f1f,stroke:#ffce9e,color:#fff3e6;
+ classDef sing fill:#2b2b2b,stroke:#9e9e9e,color:#eee;
+
+ %% Singletons
+ REG["RgoProductionTypeRegistry"]:::sing
+ PRICE["RgoMarketPriceTable"]:::sing
+ RULES["RgoGameRules"]:::sing
+
+ %% Systems
+ S1["RgoComputePopulationTotalsSystem"]:::sys
+ S2["RgoHireSystem"]:::sys
+ S3["RgoProduceAndPlaceOrderSystem"]:::sys
+ S4["RgoResolveSellOrderAndOwnerShareSystem"]:::sys
+ S5a["RgoComputeOwnerIncomeSystem"]:::sys
+ S5b["RgoComputeEmployeeIncomeSystem"]:::sys
+ S6a["ApplyEmployeeIncomeToPopsSystem"]:::sys
+ S6b["ApplyOwnerIncomeToPopsSystem"]:::sys
+ S7["AggregatePopIncomeSystem"]:::sys
+
+ %% Province components
+ CACHE["ProvinceRgoCacheTotals"]:::prov
+ HIRED["ProvinceRgoHired"]:::prov
+ ORDER["ProvinceRgoSellOrder"]:::prov
+ RESULT["ProvinceRgoResult"]:::prov
+ OINC["ProvinceRgoOwnerIncome"]:::prov
+ EINC["ProvinceRgoEmployeeIncome"]:::prov
+
+ %% Pop components
+ PWI["PopWorkerIncome"]:::pop
+ POI["PopOwnerIncome"]:::pop
+ PIT["PopIncomeTotals"]:::pop
+
+ %% Stage 1
+ REG -.->|lookup| S1
+ S1 -->|writes| CACHE
+
+ %% Stage 2
+ CACHE -->|reads| S2
+ REG -.->|lookup| S2
+ S2 -->|writes| HIRED
+
+ %% Stage 3
+ HIRED -->|reads| S3
+ CACHE -->|reads| S3
+ REG -.->|lookup| S3
+ RULES -.->|lookup| S3
+ S3 -->|"writes output_quantity"| RESULT
+ S3 -->|writes| ORDER
+
+ %% Stage 4
+ ORDER -->|reads + clears| S4
+ PRICE -.->|lookup| S4
+ CACHE -->|reads| S4
+ S4 -->|"writes revenue, owner_share,
total_minimum_wage"| RESULT
+ S4 -->|"writes employee min wages"| HIRED
+
+ %% Stage 5
+ RESULT -->|reads| S5a
+ CACHE -->|reads| S5a
+ REG -.->|lookup| S5a
+ S5a -->|writes| OINC
+
+ RESULT -->|reads| S5b
+ HIRED -->|reads| S5b
+ S5b -->|writes| EINC
+
+ %% Stage 6
+ HIRED -.->|reads| S6a
+ EINC -.->|reads| S6a
+ S6a -->|writes| PWI
+
+ RESULT -.->|reads| S6b
+ CACHE -.->|reads| S6b
+ REG -.->|lookup| S6b
+ S6b -->|writes| POI
+
+ %% Stage 7
+ PWI -->|reads| S7
+ POI -->|reads| S7
+ S7 -->|"writes total + cash"| PIT
+```
diff --git a/src/openvic-simulation/ecs/Archetype.hpp b/src/openvic-simulation/ecs/Archetype.hpp
new file mode 100644
index 000000000..f9efd2348
--- /dev/null
+++ b/src/openvic-simulation/ecs/Archetype.hpp
@@ -0,0 +1,293 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "openvic-simulation/ecs/Chunk.hpp"
+#include "openvic-simulation/ecs/ChunkPool.hpp"
+#include "openvic-simulation/ecs/ComponentTypeID.hpp"
+#include "openvic-simulation/ecs/EntityID.hpp"
+
+namespace OpenVic::ecs {
+
+ // Type-erased operations needed to manage a component column without naming the type.
+ // For tag (zero-size, std::is_empty) component types, `size` and `align` are 0 and the
+ // move/destroy thunks are no-ops — the archetype then holds no slab for that column,
+ // only a row count tracked by chunks.
+ struct ColumnVTable {
+ std::size_t size;
+ std::size_t align;
+ void (*move_construct)(void* dst, void* src);
+ void (*destroy)(void* dst);
+ };
+
+ template
+ inline ColumnVTable const& column_vtable_for() {
+ if constexpr (std::is_empty_v) {
+ static ColumnVTable const v {
+ 0,
+ 0,
+ [](void*, void*) {},
+ [](void*) {}
+ };
+ return v;
+ } else {
+ static ColumnVTable const v {
+ sizeof(C),
+ alignof(C),
+ [](void* dst, void* src) {
+ ::new (dst) C(std::move(*static_cast(src)));
+ static_cast(src)->~C();
+ },
+ [](void* dst) {
+ static_cast(dst)->~C();
+ }
+ };
+ return v;
+ }
+ }
+
+ // Sentinel for "tag column has no slab" / "component not in archetype".
+ constexpr std::size_t NO_COLUMN_OFFSET = static_cast(-1);
+ constexpr std::size_t NO_COLUMN_INDEX = static_cast(-1);
+
+ // One archetype = one unique sorted set of component type IDs.
+ // Storage is a list of fixed-size 16 KB chunks. Each chunk holds up to `chunk_capacity`
+ // rows; rows fill chunks left-to-right. When the last chunk is full, a fresh chunk is
+ // allocated. Removal swap-pops with the *last* row of the *last* chunk (cross-chunk swap
+ // when needed), and an emptied trailing chunk is dropped.
+ struct Archetype {
+ std::vector signature;
+ std::vector vtables;
+ // Byte offset of each column's slab within a chunk's `data`. Tag columns
+ // (vtable->size == 0) carry NO_COLUMN_OFFSET; their data must never be dereferenced.
+ std::vector column_offsets;
+ // Per-column monotonic version counter, bumped on every push / swap-pop / migration
+ // touching the column. Replaces the per-Column version stamp from the pre-chunked
+ // design — `World::component_version_in(eid)` reads from here.
+ std::vector column_versions;
+ std::size_t chunk_capacity = 0; // rows per chunk; constant for the archetype's life
+ std::size_t total_entity_count = 0;
+ std::vector chunks;
+ // Bitfield prefilter: one bit per component, derived from `id % 63`. Used by
+ // `World::resolve_query_cache` to fast-reject archetypes before the sorted-set walk.
+ uint64_t matcher_hash = 0;
+ // Non-owning pointer to the World's ChunkPool. Set by World when the archetype is
+ // created. Nullable: tests that construct an Archetype outside a World fall through
+ // to ::operator new / ::operator delete in allocate_chunk and the destructor.
+ ChunkPool* chunk_pool = nullptr;
+
+ Archetype() = default;
+ Archetype(Archetype const&) = delete;
+ Archetype& operator=(Archetype const&) = delete;
+ Archetype(Archetype&&) = default;
+ Archetype& operator=(Archetype&&) = default;
+
+ // Destroy every live component and release each chunk's backing block. With a
+ // ChunkPool wired in, World::~World calls drain_to_pool first and the loop below
+ // then sees an empty `chunks` vector — this destructor is the non-pool fallback
+ // (bare Archetype in tests, or any path that constructs an Archetype directly).
+ ~Archetype() {
+ for (std::size_t ci = 0; ci < chunks.size(); ++ci) {
+ DataChunk& chunk = chunks[ci];
+ for (std::size_t row = 0; row < chunk.count; ++row) {
+ for (std::size_t col = 0; col < signature.size(); ++col) {
+ if (column_offsets[col] == NO_COLUMN_OFFSET) {
+ continue;
+ }
+ vtables[col]->destroy(row_in_column(ci, col, row));
+ }
+ }
+ chunk.count = 0;
+ if (chunk.data != nullptr) {
+ ::operator delete(chunk.data, std::align_val_t { CHUNK_BLOCK_ALIGN });
+ chunk.data = nullptr;
+ }
+ }
+ }
+
+ // Returns NO_COLUMN_INDEX if the archetype doesn't carry `id`.
+ std::size_t column_index_for(component_type_id_t id) const {
+ for (std::size_t i = 0; i < signature.size(); ++i) {
+ if (signature[i] == id) {
+ return i;
+ }
+ }
+ return NO_COLUMN_INDEX;
+ }
+
+ bool has_component(component_type_id_t id) const {
+ return column_index_for(id) != NO_COLUMN_INDEX;
+ }
+
+ // Both inputs sorted ascending; returns true iff `required ⊆ signature`.
+ bool matches_all(std::vector const& required) const {
+ std::size_t i = 0;
+ std::size_t j = 0;
+ while (i < required.size() && j < signature.size()) {
+ if (required[i] == signature[j]) {
+ ++i;
+ ++j;
+ } else if (signature[j] < required[i]) {
+ ++j;
+ } else {
+ return false;
+ }
+ }
+ return i == required.size();
+ }
+
+ // Both inputs sorted ascending; returns true iff `excluded ∩ signature == ∅`.
+ bool matches_none(std::vector const& excluded) const {
+ std::size_t i = 0;
+ std::size_t j = 0;
+ while (i < excluded.size() && j < signature.size()) {
+ if (excluded[i] == signature[j]) {
+ return false;
+ } else if (signature[j] < excluded[i]) {
+ ++j;
+ } else {
+ ++i;
+ }
+ }
+ return true;
+ }
+
+ // Returns the number of bytes of slab in a chunk that store EntityIDs (= chunk_capacity * sizeof(EntityID)).
+ std::size_t entity_slab_bytes() const {
+ return chunk_capacity * sizeof(EntityID);
+ }
+
+ EntityID* entity_array(std::size_t chunk_index) {
+ return reinterpret_cast(chunks[chunk_index].data);
+ }
+ EntityID const* entity_array(std::size_t chunk_index) const {
+ return reinterpret_cast(chunks[chunk_index].data);
+ }
+
+ // Returns the base address of column `col`'s slab in the given chunk, or nullptr if
+ // the column is a tag (no slab). Callers must not dereference for tag columns.
+ void* column_array(std::size_t chunk_index, std::size_t col) {
+ if (column_offsets[col] == NO_COLUMN_OFFSET) {
+ return nullptr;
+ }
+ return chunks[chunk_index].data + column_offsets[col];
+ }
+ void const* column_array(std::size_t chunk_index, std::size_t col) const {
+ if (column_offsets[col] == NO_COLUMN_OFFSET) {
+ return nullptr;
+ }
+ return chunks[chunk_index].data + column_offsets[col];
+ }
+
+ // Returns a pointer to the column's slot at (chunk, row), or nullptr for tag columns.
+ void* row_in_column(std::size_t chunk_index, std::size_t col, std::size_t row) {
+ if (column_offsets[col] == NO_COLUMN_OFFSET) {
+ return nullptr;
+ }
+ return chunks[chunk_index].data + column_offsets[col] + row * vtables[col]->size;
+ }
+ void const* row_in_column(std::size_t chunk_index, std::size_t col, std::size_t row) const {
+ if (column_offsets[col] == NO_COLUMN_OFFSET) {
+ return nullptr;
+ }
+ return chunks[chunk_index].data + column_offsets[col] + row * vtables[col]->size;
+ }
+
+ // Allocates a new fresh chunk with no rows. The chunk's `data` pointer is non-null.
+ // Routes through chunk_pool when set; falls back to ::operator new otherwise (used
+ // by tests that construct an Archetype bare, without a World).
+ std::size_t allocate_chunk() {
+ DataChunk fresh;
+ if (chunk_pool != nullptr) {
+ fresh.data = chunk_pool->acquire();
+ } else {
+ fresh.data = static_cast(
+ ::operator new(CHUNK_BLOCK_BYTES, std::align_val_t { CHUNK_BLOCK_ALIGN })
+ );
+ }
+ chunks.push_back(std::move(fresh));
+ return chunks.size() - 1;
+ }
+
+ // Drops the trailing chunk if it's empty, returning its block to the pool (or to
+ // ::operator delete if no pool is wired). No-op if there are no chunks or the
+ // trailing chunk still holds rows. No retain-one rule — a fully-drained archetype
+ // has chunks.size() == 0, and the next reserve_row pulls a fresh block from the
+ // pool at the same cost as the previous "retained spare" indexing.
+ void drop_empty_trailing_chunk() {
+ if (chunks.empty() || chunks.back().count != 0) {
+ return;
+ }
+ DataChunk& back = chunks.back();
+ if (chunk_pool != nullptr) {
+ chunk_pool->release(back.data);
+ } else if (back.data != nullptr) {
+ ::operator delete(back.data, std::align_val_t { CHUNK_BLOCK_ALIGN });
+ }
+ back.data = nullptr;
+ chunks.pop_back();
+ }
+
+ // Destroys every live component and routes every chunk's block to the pool, then
+ // clears the chunks vector and resets total_entity_count. Called explicitly by
+ // World::~World before the archetypes vector destroys, so the (non-pool fallback)
+ // Archetype destructor sees an empty chunks vector and has nothing to do.
+ void drain_to_pool(ChunkPool& pool) {
+ for (std::size_t ci = 0; ci < chunks.size(); ++ci) {
+ DataChunk& chunk = chunks[ci];
+ for (std::size_t row = 0; row < chunk.count; ++row) {
+ for (std::size_t col = 0; col < signature.size(); ++col) {
+ if (column_offsets[col] == NO_COLUMN_OFFSET) {
+ continue;
+ }
+ vtables[col]->destroy(row_in_column(ci, col, row));
+ }
+ }
+ chunk.count = 0;
+ pool.release(chunk.data);
+ chunk.data = nullptr;
+ }
+ chunks.clear();
+ total_entity_count = 0;
+ }
+
+ // Reserve a new row at the end of storage — finds the first non-full chunk (or
+ // allocates a new one), bumps that chunk's `count`, and returns (chunk_index, row).
+ // Caller must placement-new component values into each non-tag column at this slot
+ // AFTER calling reserve_row, then push the EntityID via `entity_array(chunk_index)[row] = eid`.
+ // Bumps every column_version.
+ struct RowLocation {
+ std::size_t chunk_index;
+ std::size_t row;
+ };
+ RowLocation reserve_row() {
+ std::size_t chunk_index;
+ if (chunks.empty() || chunks.back().count >= chunk_capacity) {
+ chunk_index = allocate_chunk();
+ } else {
+ chunk_index = chunks.size() - 1;
+ }
+ std::size_t const row = chunks[chunk_index].count;
+ ++chunks[chunk_index].count;
+ ++total_entity_count;
+ for (uint64_t& v : column_versions) {
+ ++v;
+ }
+ return { chunk_index, row };
+ }
+
+ // Returns the (chunk_index, row) of the global last row, used when swap-popping.
+ // Precondition: total_entity_count > 0.
+ RowLocation last_row_location() const {
+ std::size_t const last_chunk = chunks.size() - 1;
+ std::size_t const last_row = chunks[last_chunk].count - 1;
+ return { last_chunk, last_row };
+ }
+ };
+}
diff --git a/src/openvic-simulation/ecs/CachedRef.hpp b/src/openvic-simulation/ecs/CachedRef.hpp
new file mode 100644
index 000000000..cf0a5c320
--- /dev/null
+++ b/src/openvic-simulation/ecs/CachedRef.hpp
@@ -0,0 +1,71 @@
+#pragma once
+
+#include
+
+#include "openvic-simulation/ecs/EntityID.hpp"
+#include "openvic-simulation/ecs/World.hpp"
+
+namespace OpenVic::ecs {
+
+ // Soft component-pointer that survives across structural mutations of the world by
+ // re-resolving on a per-column version mismatch. Cheaper than calling
+ // `World::get_component` every time — the fast path is one comparison and an
+ // indirection. Storage is the entity ID plus a cached version stamp and pointer.
+ //
+ // `get(world)` returns the latest pointer (refreshing the cache if stale or the entity
+ // has changed archetype), or nullptr if the entity is dead or no longer carries C.
+ //
+ // Lifetime: a CachedRef may be stored across ticks. It's safe to copy. Holding one
+ // after `World::clear_systems` / `end_game_session` is fine but get() may return
+ // nullptr once the entity has been swept.
+ template
+ struct CachedRef {
+ EntityID entity_id = INVALID_ENTITY_ID;
+ uint64_t cached_version = 0;
+ C* cached_pointer = nullptr;
+
+ static CachedRef from(World& world, EntityID id) {
+ CachedRef ref;
+ ref.entity_id = id;
+ ref.refresh(world);
+ return ref;
+ }
+
+ EntityID entity() const {
+ return entity_id;
+ }
+
+ bool is_valid(World const& world) const {
+ return cached_pointer != nullptr && world.is_alive(entity_id);
+ }
+
+ void invalidate() {
+ cached_pointer = nullptr;
+ cached_version = 0;
+ }
+
+ // Returns the current component pointer, refreshing the cache if the column has
+ // mutated since the last successful resolve or the entity is in a different
+ // archetype now. Returns nullptr if the entity is dead or no longer has C.
+ C* get(World& world) {
+ uint64_t const live_version = world.template component_version_in(entity_id);
+ if (live_version == 0) {
+ // Entity dead, or doesn't carry C in its current archetype.
+ cached_pointer = nullptr;
+ cached_version = 0;
+ return nullptr;
+ }
+ if (live_version != cached_version || cached_pointer == nullptr) {
+ cached_pointer = world.template get_component(entity_id);
+ cached_version = cached_pointer != nullptr ? live_version : 0;
+ }
+ return cached_pointer;
+ }
+
+ private:
+ void refresh(World& world) {
+ cached_pointer = world.template get_component(entity_id);
+ cached_version = cached_pointer != nullptr ? world.template component_version_in(entity_id) : 0;
+ }
+ };
+}
diff --git a/src/openvic-simulation/ecs/Chunk.hpp b/src/openvic-simulation/ecs/Chunk.hpp
new file mode 100644
index 000000000..d52e3c334
--- /dev/null
+++ b/src/openvic-simulation/ecs/Chunk.hpp
@@ -0,0 +1,85 @@
+#pragma once
+
+#include
+#include
+#include
+
+namespace OpenVic::ecs {
+
+ // Fixed 16 KB chunk size, matching decs's `BLOCK_MEMORY_16K`. Chunks are the unit of
+ // growth — when an archetype runs out of capacity in its current chunks, a fresh chunk
+ // is allocated rather than relocating the existing column data. That's the principal
+ // performance advantage over per-column std::vector storage.
+ constexpr std::size_t CHUNK_BLOCK_BYTES = 16 * 1024;
+
+ // Alignment for the chunk's heap block. Generous enough to cover EntityID and any
+ // reasonable component (cache-line aligned for iteration efficiency).
+ constexpr std::size_t CHUNK_BLOCK_ALIGN = 64;
+
+ // `restrict` for typed pointers that loop bodies read/write. Tells the compiler the
+ // pointee is not reached by any other pointer in the enclosing scope — without this
+ // promise, writes through one column's pointer are assumed to potentially alias reads
+ // through another column's pointer (because both pointers trace back to the same
+ // `unsigned char*` chunk block), which blocks register-promotion and reordering in
+ // the hot inner loops of the system drivers. Honored most reliably when applied to a
+ // local declaration; weaker on function returns. Not standard C++ — each target
+ // compiler spells it differently.
+#if defined(_MSC_VER)
+# define OV_RESTRICT __restrict
+#elif defined(__GNUC__) || defined(__clang__)
+# define OV_RESTRICT __restrict__
+#else
+# define OV_RESTRICT
+#endif
+
+ // Passive holder for one chunk's 16 KB block. Lifecycle is managed explicitly at every
+ // callsite that owns a chunk:
+ // - Archetype::allocate_chunk calls ChunkPool::acquire and stores the result in `data`.
+ // - Archetype::drop_empty_trailing_chunk / World::compact_archetype_after_external_move
+ // call ChunkPool::release(data) and null `data` before pop_back.
+ // - Archetype::drain_to_pool (called from ~World) walks every chunk and releases.
+ // There is intentionally no destructor here — pool routing must be visible at the
+ // callsite, not hidden in RAII. Leaving `data` non-null when a DataChunk is destroyed
+ // is a programmer error; the move-assign assert catches the common case where a moved-
+ // into slot already held a live block, and the Archetype destructor catches the rest
+ // (it ::operator delete's any leftover data as a non-pool fallback for bare-Archetype
+ // test paths — but the pool-driven path drains chunks before Archetype destruction).
+ //
+ // Layout of the block (computed once at archetype creation, identical across every
+ // chunk owned by that archetype):
+ //
+ // [entity_id slab: EntityID[chunk_capacity]]
+ // [component_0 slab: aligned to vtable[0]->align, chunk_capacity * vtable[0]->size]
+ // [component_1 slab: ...]
+ // ...
+ //
+ // `count` is rows-currently-in-this-chunk; `chunk_capacity` is rows-per-chunk for the
+ // owning archetype (constant for the chunk's lifetime). Tag (zero-size) columns get a
+ // sentinel offset (size_t(-1)) and contribute no slab — they are tracked at the
+ // archetype level only via `column_versions`.
+ struct DataChunk {
+ unsigned char* data = nullptr;
+ std::size_t count = 0;
+
+ DataChunk() = default;
+ DataChunk(DataChunk const&) = delete;
+ DataChunk& operator=(DataChunk const&) = delete;
+
+ DataChunk(DataChunk&& other) noexcept : data { other.data }, count { other.count } {
+ other.data = nullptr;
+ other.count = 0;
+ }
+ DataChunk& operator=(DataChunk&& other) noexcept {
+ if (this != &other) {
+ // The destination must have been drained first — overwriting a live block
+ // here would silently leak its 16 KB.
+ assert(data == nullptr && "DataChunk move-assign over live block");
+ data = other.data;
+ count = other.count;
+ other.data = nullptr;
+ other.count = 0;
+ }
+ return *this;
+ }
+ };
+}
diff --git a/src/openvic-simulation/ecs/ChunkPool.cpp b/src/openvic-simulation/ecs/ChunkPool.cpp
new file mode 100644
index 000000000..aa2097447
--- /dev/null
+++ b/src/openvic-simulation/ecs/ChunkPool.cpp
@@ -0,0 +1,59 @@
+#include "openvic-simulation/ecs/ChunkPool.hpp"
+
+#include
+#include
+
+#include "openvic-simulation/ecs/Chunk.hpp"
+
+using namespace OpenVic::ecs;
+
+ChunkPool::~ChunkPool() {
+ for (PooledBlock const& blk : free_blocks_) {
+ ::operator delete(blk.data, std::align_val_t { CHUNK_BLOCK_ALIGN });
+ ++total_deallocations_;
+ }
+ free_blocks_.clear();
+}
+
+unsigned char* ChunkPool::acquire() {
+ if (!free_blocks_.empty()) {
+ unsigned char* data = free_blocks_.back().data;
+ free_blocks_.pop_back();
+ return data;
+ }
+ ++total_allocations_;
+ return static_cast(
+ ::operator new(CHUNK_BLOCK_BYTES, std::align_val_t { CHUNK_BLOCK_ALIGN })
+ );
+}
+
+void ChunkPool::release(unsigned char* data) {
+ if (data == nullptr) {
+ return;
+ }
+ if (free_blocks_.size() >= MAX_POOL_SIZE) {
+ ::operator delete(data, std::align_val_t { CHUNK_BLOCK_ALIGN });
+ ++total_deallocations_;
+ return;
+ }
+ free_blocks_.push_back({ data, current_tick_ });
+}
+
+void ChunkPool::advance_tick() {
+ ++current_tick_;
+ // Swap-pop blocks older than the threshold. free_blocks_.size() is bounded by
+ // MAX_POOL_SIZE, so the O(n) scan is trivial.
+ std::size_t i = 0;
+ while (i < free_blocks_.size()) {
+ // current_tick_ - released_at_tick > AGE_THRESHOLD_TICKS
+ // released_at_tick <= current_tick_ by construction, so subtraction is safe.
+ if (current_tick_ - free_blocks_[i].released_at_tick > AGE_THRESHOLD_TICKS) {
+ ::operator delete(free_blocks_[i].data, std::align_val_t { CHUNK_BLOCK_ALIGN });
+ ++total_deallocations_;
+ free_blocks_[i] = free_blocks_.back();
+ free_blocks_.pop_back();
+ } else {
+ ++i;
+ }
+ }
+}
diff --git a/src/openvic-simulation/ecs/ChunkPool.hpp b/src/openvic-simulation/ecs/ChunkPool.hpp
new file mode 100644
index 000000000..181cba859
--- /dev/null
+++ b/src/openvic-simulation/ecs/ChunkPool.hpp
@@ -0,0 +1,73 @@
+#pragma once
+
+#include
+#include
+#include
+
+namespace OpenVic::ecs {
+
+ // Pool of fixed-size 16 KB aligned blocks matching DataChunk's layout (size = CHUNK_BLOCK_BYTES,
+ // alignment = CHUNK_BLOCK_ALIGN — see Chunk.hpp). Owned by World; single-threaded — structural
+ // mutations are serialised on the main tick thread, so no synchronisation here.
+ //
+ // Released blocks are pushed LIFO so a ping-pong archetype reuses warm memory. Aging policy:
+ // blocks whose release tick falls more than AGE_THRESHOLD_TICKS behind the current tick are
+ // freed on the next advance_tick. A working set that keeps acquiring + releasing every tick
+ // refreshes its released_at_tick on each cycle and never ages out. A truly idle archetype's
+ // chunks all drain to the OS after AGE_THRESHOLD_TICKS ticks of disuse.
+ //
+ // MAX_POOL_SIZE caps the cached block count. Releases above the cap go straight to
+ // ::operator delete so a one-off burst can't lock down megabytes for the aging window.
+ class ChunkPool {
+ public:
+ static constexpr std::size_t MAX_POOL_SIZE = 64;
+ static constexpr uint64_t AGE_THRESHOLD_TICKS = 256;
+
+ ChunkPool() = default;
+ ChunkPool(ChunkPool const&) = delete;
+ ChunkPool& operator=(ChunkPool const&) = delete;
+ ChunkPool(ChunkPool&&) = delete;
+ ChunkPool& operator=(ChunkPool&&) = delete;
+ ~ChunkPool();
+
+ // Returns a CHUNK_BLOCK_BYTES-sized, CHUNK_BLOCK_ALIGN-aligned block. Pops from the
+ // free list if any block is cached; otherwise calls ::operator new and increments
+ // total_allocations_.
+ unsigned char* acquire();
+
+ // Returns a block to the pool. If the free list is at MAX_POOL_SIZE, frees the block
+ // immediately via ::operator delete and increments total_deallocations_. Passing
+ // nullptr is a no-op.
+ void release(unsigned char* data);
+
+ // Increments the tick counter and frees any cached block whose release tick is
+ // older than AGE_THRESHOLD_TICKS. Called once per World tick from tick_systems.
+ void advance_tick();
+
+ // Test / diagnostic accessors. Used by ChunkPool tests to assert pool behaviour and
+ // by integration tests to verify allocator round-trips through the pool.
+ std::size_t pooled_count() const {
+ return free_blocks_.size();
+ }
+ uint64_t total_allocations() const {
+ return total_allocations_;
+ }
+ uint64_t total_deallocations() const {
+ return total_deallocations_;
+ }
+ uint64_t current_tick() const {
+ return current_tick_;
+ }
+
+ private:
+ struct PooledBlock {
+ unsigned char* data;
+ uint64_t released_at_tick;
+ };
+
+ std::vector free_blocks_;
+ uint64_t current_tick_ = 0;
+ uint64_t total_allocations_ = 0;
+ uint64_t total_deallocations_ = 0;
+ };
+}
diff --git a/src/openvic-simulation/ecs/ChunkSystem.hpp b/src/openvic-simulation/ecs/ChunkSystem.hpp
new file mode 100644
index 000000000..cc6a05ea2
--- /dev/null
+++ b/src/openvic-simulation/ecs/ChunkSystem.hpp
@@ -0,0 +1,60 @@
+#pragma once
+
+#include
+#include
+
+#include "openvic-simulation/ecs/ChunkView.hpp"
+#include "openvic-simulation/ecs/SystemImpl.hpp"
+
+namespace OpenVic::ecs {
+
+ // CRTP chunk-exec base. Derived class implements:
+ // void tick_chunk(ChunkView view, TickContext const& ctx);
+ // and inherits as `: ChunkSystem`.
+ //
+ // Useful for tight inner loops over large archetypes — slabs are contiguous, so the
+ // inner per-row loop avoids per-element function-call overhead. The decs analogue is
+ // `PureSystem`.
+ template
+ struct ChunkSystem {
+ // Compile-time access set is computed from Cs... directly (each `C const` becomes
+ // Read, `C` becomes Write — same semantics as System's tick-signature
+ // inference).
+ static constexpr auto declared_access() {
+ return std::array { ComponentAccess {
+ component_type_id_of>(),
+ std::is_const_v> ? AccessMode::Read : AccessMode::Write
+ }... };
+ }
+
+ static constexpr system_type_id_t type_id() {
+ return system_type_id_of();
+ }
+
+ static constexpr std::array declared_run_after() { return {}; }
+ static constexpr std::array declared_run_before() { return {}; }
+ static constexpr std::array extra_reads() { return {}; }
+ static constexpr bool is_threaded = false;
+
+ // Sorted-unique component ids defining the iteration query. ChunkSystem doesn't
+ // derive from System<>, so it needs its own version — but the result is the same
+ // shape: just Cs... folded through component_type_id_of, sorted, deduped.
+ // Consumed by the scheduler's query-cache prewarm for multi-system stages.
+ static std::vector compute_tick_query_require_ids() {
+ std::vector ids = {
+ component_type_id_of>()...
+ };
+ std::sort(ids.begin(), ids.end());
+ ids.erase(std::unique(ids.begin(), ids.end()), ids.end());
+ return ids;
+ }
+
+ void tick_all(World& world, TickContext const& ctx) {
+ Derived& self = static_cast(*this);
+ world.template for_each_chunk...>(
+ [&](ChunkView...> view) {
+ self.tick_chunk(view, ctx);
+ });
+ }
+ };
+}
diff --git a/src/openvic-simulation/ecs/ChunkView.hpp b/src/openvic-simulation/ecs/ChunkView.hpp
new file mode 100644
index 000000000..60b004eb3
--- /dev/null
+++ b/src/openvic-simulation/ecs/ChunkView.hpp
@@ -0,0 +1,77 @@
+#pragma once
+
+#include
+#include
+#include
+
+#include "openvic-simulation/ecs/Chunk.hpp"
+#include "openvic-simulation/ecs/EntityID.hpp"
+
+namespace OpenVic::ecs {
+
+ // Lightweight view passed to `for_each_chunk` lambdas. Wraps a single chunk's worth of
+ // component data: an EntityID array and one raw component-array pointer per Cs... in the
+ // caller's argument list. All arrays share the same length, `count()`. Tag (zero-size)
+ // component arrays are nullptr — callers must not dereference them.
+ //
+ // The view is valid only inside the `for_each_chunk` callback — the underlying chunk
+ // data may be relocated by any subsequent structural mutation of the World.
+ //
+ // `entities()` and `array()` return OV_RESTRICT pointers — the compiler is told they
+ // do not alias one another within this view's chunk. For the strongest effect, also bind
+ // each typed slab into an OV_RESTRICT local at the top of `tick_chunk` (return-type
+ // qualifiers are honored less reliably than locals):
+ // auto* OV_RESTRICT pos = view.array();
+ // auto* OV_RESTRICT vel = view.array();
+ template
+ struct ChunkView {
+ std::size_t row_count = 0;
+ EntityID* eids = nullptr;
+ // One raw pointer per Cs... in declared order. Tag types map to nullptr.
+ std::array raw_arrays {};
+
+ std::size_t count() const {
+ return row_count;
+ }
+
+ EntityID* OV_RESTRICT entities() {
+ return eids;
+ }
+ EntityID const* OV_RESTRICT entities() const {
+ return eids;
+ }
+
+ // Returns the component slab for type C — must match exactly one of Cs...
+ // For tag types this returns nullptr (no per-row data is stored).
+ template
+ C* OV_RESTRICT array() {
+ constexpr std::size_t idx = index_of();
+ static_assert(idx < sizeof...(Cs), "ChunkView::array: C is not in this view's component list");
+ return static_cast(raw_arrays[idx]);
+ }
+
+ template
+ C const* OV_RESTRICT array() const {
+ constexpr std::size_t idx = index_of();
+ static_assert(idx < sizeof...(Cs), "ChunkView::array: C is not in this view's component list");
+ return static_cast(raw_arrays[idx]);
+ }
+
+ private:
+ template
+ static constexpr std::size_t index_of_impl() {
+ if constexpr (std::is_same_v) {
+ return I;
+ } else if constexpr (sizeof...(Rest) == 0) {
+ return sizeof...(Cs); // not found
+ } else {
+ return index_of_impl();
+ }
+ }
+
+ template
+ static constexpr std::size_t index_of() {
+ return index_of_impl();
+ }
+ };
+}
diff --git a/src/openvic-simulation/ecs/CommandBuffer.cpp b/src/openvic-simulation/ecs/CommandBuffer.cpp
new file mode 100644
index 000000000..9488160e7
--- /dev/null
+++ b/src/openvic-simulation/ecs/CommandBuffer.cpp
@@ -0,0 +1,329 @@
+#include "openvic-simulation/ecs/CommandBuffer.hpp"
+
+#include
+#include
+
+#include "openvic-simulation/ecs/Archetype.hpp"
+#include "openvic-simulation/ecs/ComponentTypeID.hpp"
+#include "openvic-simulation/ecs/EntityID.hpp"
+#include "openvic-simulation/ecs/World.hpp"
+
+using namespace OpenVic::ecs;
+
+void CommandBuffer::apply(World& world) {
+ // Resolution map for deferred placeholders. Indexed by placeholder local_seq (= op.eid.index
+ // for any deferred eid). Empty when the buffer holds no deferred ops — the common case for
+ // serial-system buffers, where this allocation is a no-op.
+ std::vector placeholder_to_real;
+ if (deferred_count_ > 0) {
+ placeholder_to_real.assign(deferred_count_, INVALID_ENTITY_ID);
+ }
+ auto resolve = [&](EntityID eid) -> EntityID {
+ if (!eid.is_deferred()) {
+ return eid;
+ }
+ if (eid.index >= placeholder_to_real.size()) {
+ return INVALID_ENTITY_ID; // out-of-range placeholder — shouldn't happen
+ }
+ return placeholder_to_real[eid.index];
+ };
+
+ for (Op& op : ops) {
+ switch (op.kind) {
+ case OpKind::CreateEntity: {
+ // In parallel-mode-recorded ops, op.eid is a deferred placeholder. Allocate a
+ // real slot here (single-threaded, deterministic order) and store the mapping
+ // before we finalise — subsequent ops that reference this placeholder resolve
+ // via the map. Serial-mode ops already carry a real reserved EntityID and skip
+ // the allocation step.
+ EntityID real_eid = op.eid;
+ bool const was_deferred = op.eid.is_deferred();
+ if (was_deferred) {
+ real_eid = world.reserve_entity_slot();
+ if (op.eid.index < placeholder_to_real.size()) {
+ placeholder_to_real[op.eid.index] = real_eid;
+ }
+ }
+ // Hand the World move-only payload pointers; finalize_reserved_entity moves
+ // them into the archetype's column slabs. After the call, the payload slots
+ // are moved-from but still need their allocations freed.
+ std::vector raw_slots;
+ raw_slots.reserve(op.create.sorted_values.size());
+ for (PayloadSlot& slot : op.create.sorted_values) {
+ raw_slots.push_back(slot.data);
+ }
+ world.finalize_reserved_entity(
+ real_eid, op.create.sorted_sig, op.create.sorted_vtables, raw_slots
+ );
+ // Free the moved-from payload allocations. We don't call destroy() on them —
+ // the move-construct already destructively transferred the value. Capture
+ // `align` into a local BEFORE `release_data()` — `release_data()` clears the
+ // slot's `vtable` pointer, and the argument-evaluation order for the delete
+ // call below is unspecified, so any access to `slot.vtable->align` in the
+ // same expression is undefined.
+ for (PayloadSlot& slot : op.create.sorted_values) {
+ if (slot.data != nullptr && slot.vtable != nullptr && slot.vtable->size > 0) {
+ std::size_t const align = slot.vtable->align;
+ ::operator delete(slot.release_data(), std::align_val_t { align });
+ slot.vtable = nullptr;
+ }
+ }
+ break;
+ }
+ case OpKind::DestroyEntity: {
+ // World::destroy_entity is a no-op on dead entities and on
+ // reserved-but-unfinalised slots (calls drop_reserved_slot internally).
+ world.destroy_entity(resolve(op.eid));
+ break;
+ }
+ case OpKind::AddComponent: {
+ // Build a single-component sorted signature against the entity's current
+ // archetype + the new id, then dispatch through the existing template
+ // add_component path is awkward (requires the type at the call site). We
+ // instead replicate the migration logic at the type-erased level: find or
+ // create the target archetype and move the new component plus all existing
+ // components over.
+ //
+ // For simplicity this implementation goes through add_component_typeerased
+ // (a private World helper added below). If the entity already carries C,
+ // the existing slot is overwritten via move-assign… but move-assign isn't
+ // available type-erased. So if the component already exists, we destroy
+ // the existing value first then move-construct the new one in place.
+ EntityID const eid = resolve(op.eid);
+ if (!eid.is_valid() || eid.is_deferred()) {
+ // Unresolved placeholder (a same-buffer add referencing a placeholder whose
+ // CreateEntity op never ran, e.g. due to allocation failure). Drop silently.
+ break;
+ }
+ if (eid.index >= world.entity_slots.size()) {
+ break;
+ }
+ EntitySlot const& slot = world.entity_slots[eid.index];
+ if (!slot.alive || slot.generation != eid.generation) {
+ break;
+ }
+ if (slot.archetype_index == INVALID_ARCHETYPE) {
+ // Entity is reserved-but-unfinalised — adding a component before the
+ // CreateEntity op has been applied is undefined-by-policy. Ignore.
+ break;
+ }
+ ColumnVTable const* new_vt = op.add.value.vtable;
+ component_type_id_t const new_id = op.add.id;
+ uint32_t const src_idx = slot.archetype_index;
+ uint32_t const src_chunk = slot.chunk_index;
+ uint32_t const src_row = slot.row;
+
+ // In-place replace if already present.
+ {
+ Archetype& src = world.archetypes[src_idx];
+ std::size_t const existing_col = src.column_index_for(new_id);
+ if (existing_col != NO_COLUMN_INDEX) {
+ if (src.column_offsets[existing_col] != NO_COLUMN_OFFSET) {
+ void* dst = src.row_in_column(src_chunk, existing_col, src_row);
+ src.vtables[existing_col]->destroy(dst);
+ src.vtables[existing_col]->move_construct(dst, op.add.value.data);
+ ++src.column_versions[existing_col];
+ }
+ // Free the moved-from payload allocation. Capture `align` BEFORE
+ // `release_data()` clears the vtable pointer — see CreateEntity branch
+ // for the order-of-evaluation rationale.
+ if (op.add.value.data != nullptr && op.add.value.vtable != nullptr
+ && op.add.value.vtable->size > 0) {
+ std::size_t const align = op.add.value.vtable->align;
+ ::operator delete(
+ op.add.value.release_data(), std::align_val_t { align }
+ );
+ op.add.value.vtable = nullptr;
+ }
+ break;
+ }
+ }
+
+ // Build target signature = src.signature ∪ {new_id}, sorted ascending.
+ std::vector target_sig;
+ std::vector target_vtables;
+ {
+ Archetype const& src = world.archetypes[src_idx];
+ target_sig.reserve(src.signature.size() + 1);
+ target_vtables.reserve(src.signature.size() + 1);
+ bool inserted = false;
+ for (std::size_t i = 0; i < src.signature.size(); ++i) {
+ component_type_id_t const sid = src.signature[i];
+ if (!inserted && sid > new_id) {
+ target_sig.push_back(new_id);
+ target_vtables.push_back(new_vt);
+ inserted = true;
+ }
+ target_sig.push_back(sid);
+ target_vtables.push_back(src.vtables[i]);
+ }
+ if (!inserted) {
+ target_sig.push_back(new_id);
+ target_vtables.push_back(new_vt);
+ }
+ }
+
+ uint32_t const target_idx
+ = world.find_or_create_archetype(target_sig, target_vtables.data());
+
+ Archetype::RowLocation target_loc = world.archetypes[target_idx].reserve_row();
+ world.archetypes[target_idx].entity_array(target_loc.chunk_index)[target_loc.row] = eid;
+
+ {
+ Archetype& target = world.archetypes[target_idx];
+ Archetype& src = world.archetypes[src_idx];
+ for (std::size_t i = 0; i < target.signature.size(); ++i) {
+ component_type_id_t const tid = target.signature[i];
+ if (target.column_offsets[i] == NO_COLUMN_OFFSET) {
+ continue; // tag column — no data
+ }
+ void* dst = target.row_in_column(
+ target_loc.chunk_index, i, target_loc.row
+ );
+ if (tid == new_id) {
+ target.vtables[i]->move_construct(dst, op.add.value.data);
+ } else {
+ std::size_t const src_col_idx = src.column_index_for(tid);
+ void* srcp = src.row_in_column(src_chunk, src_col_idx, src_row);
+ target.vtables[i]->move_construct(dst, srcp);
+ }
+ }
+ }
+
+ world.compact_archetype_after_external_move(src_idx, src_chunk, src_row);
+
+ EntitySlot& mutable_slot = world.entity_slots[eid.index];
+ mutable_slot.archetype_index = target_idx;
+ mutable_slot.chunk_index = static_cast(target_loc.chunk_index);
+ mutable_slot.row = static_cast(target_loc.row);
+
+ // Free the moved-from payload allocation. Capture `align` BEFORE
+ // `release_data()` clears the vtable pointer — see CreateEntity branch
+ // for the order-of-evaluation rationale.
+ if (op.add.value.data != nullptr && op.add.value.vtable != nullptr
+ && op.add.value.vtable->size > 0) {
+ std::size_t const align = op.add.value.vtable->align;
+ ::operator delete(
+ op.add.value.release_data(), std::align_val_t { align }
+ );
+ op.add.value.vtable = nullptr;
+ }
+ break;
+ }
+ case OpKind::RemoveComponent: {
+ EntityID const eid = resolve(op.eid);
+ if (!eid.is_valid() || eid.is_deferred()) {
+ break;
+ }
+ if (eid.index >= world.entity_slots.size()) {
+ break;
+ }
+ EntitySlot const& slot = world.entity_slots[eid.index];
+ if (!slot.alive || slot.generation != eid.generation) {
+ break;
+ }
+ if (slot.archetype_index == INVALID_ARCHETYPE) {
+ break;
+ }
+ uint32_t const src_idx = slot.archetype_index;
+ uint32_t const src_chunk = slot.chunk_index;
+ uint32_t const src_row = slot.row;
+
+ std::size_t drop_col_idx = NO_COLUMN_INDEX;
+ {
+ Archetype const& src = world.archetypes[src_idx];
+ drop_col_idx = src.column_index_for(op.remove_id);
+ if (drop_col_idx == NO_COLUMN_INDEX) {
+ break; // entity doesn't carry it — silent no-op
+ }
+ if (src.signature.size() == 1) {
+ // Removing the sole component is forbidden; mirror World::remove_component.
+ break;
+ }
+ }
+
+ std::vector target_sig;
+ std::vector target_vtables;
+ {
+ Archetype const& src = world.archetypes[src_idx];
+ target_sig.reserve(src.signature.size() - 1);
+ target_vtables.reserve(src.signature.size() - 1);
+ for (std::size_t i = 0; i < src.signature.size(); ++i) {
+ if (src.signature[i] == op.remove_id) {
+ continue;
+ }
+ target_sig.push_back(src.signature[i]);
+ target_vtables.push_back(src.vtables[i]);
+ }
+ }
+
+ uint32_t const target_idx
+ = world.find_or_create_archetype(target_sig, target_vtables.data());
+
+ Archetype::RowLocation target_loc = world.archetypes[target_idx].reserve_row();
+ world.archetypes[target_idx].entity_array(target_loc.chunk_index)[target_loc.row] = eid;
+
+ {
+ Archetype& target = world.archetypes[target_idx];
+ Archetype& src = world.archetypes[src_idx];
+ if (src.column_offsets[drop_col_idx] != NO_COLUMN_OFFSET) {
+ src.vtables[drop_col_idx]->destroy(
+ src.row_in_column(src_chunk, drop_col_idx, src_row)
+ );
+ }
+ for (std::size_t i = 0; i < target.signature.size(); ++i) {
+ component_type_id_t const tid = target.signature[i];
+ if (target.column_offsets[i] == NO_COLUMN_OFFSET) {
+ continue;
+ }
+ std::size_t const src_col_idx = src.column_index_for(tid);
+ void* dst = target.row_in_column(target_loc.chunk_index, i, target_loc.row);
+ void* srcp = src.row_in_column(src_chunk, src_col_idx, src_row);
+ target.vtables[i]->move_construct(dst, srcp);
+ }
+ }
+
+ world.compact_archetype_after_external_move(src_idx, src_chunk, src_row);
+
+ EntitySlot& mutable_slot = world.entity_slots[eid.index];
+ mutable_slot.archetype_index = target_idx;
+ mutable_slot.chunk_index = static_cast(target_loc.chunk_index);
+ mutable_slot.row = static_cast(target_loc.row);
+ break;
+ }
+ }
+ }
+ ops.clear();
+ deferred_count_ = 0;
+}
+
+void CommandBuffer::clear() {
+ // PayloadSlot destructors clean up any remaining values via their vtables.
+ ops.clear();
+ deferred_count_ = 0;
+}
+
+void CommandBuffer::merge_from(CommandBuffer&& other) {
+ if (other.ops.empty()) {
+ // Even with zero ops, fold deferred_count_ in case the caller cleared between recording
+ // and merging (defensive — current uses don't hit this path).
+ deferred_count_ += other.deferred_count_;
+ other.deferred_count_ = 0;
+ return;
+ }
+ ops.reserve(ops.size() + other.ops.size());
+ uint32_t const base = deferred_count_;
+ // Rebase incoming placeholder local_seqs by `base` so placeholders stay unique post-merge.
+ // CreateEntity ops carry their own placeholder in op.eid; AddComponent / DestroyEntity /
+ // RemoveComponent carry whatever placeholder the recorder passed in. All four kinds get
+ // the same rewrite — `is_deferred()` is a pure flag check, so a real EID survives untouched.
+ for (Op& op : other.ops) {
+ if (op.eid.is_deferred()) {
+ op.eid.index += base;
+ }
+ ops.push_back(std::move(op));
+ }
+ deferred_count_ += other.deferred_count_;
+ other.ops.clear();
+ other.deferred_count_ = 0;
+}
diff --git a/src/openvic-simulation/ecs/CommandBuffer.hpp b/src/openvic-simulation/ecs/CommandBuffer.hpp
new file mode 100644
index 000000000..08a08b4f5
--- /dev/null
+++ b/src/openvic-simulation/ecs/CommandBuffer.hpp
@@ -0,0 +1,302 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "openvic-simulation/ecs/Archetype.hpp"
+#include "openvic-simulation/ecs/ComponentTypeID.hpp"
+#include "openvic-simulation/ecs/EntityID.hpp"
+#include "openvic-simulation/ecs/World.hpp"
+
+namespace OpenVic::ecs {
+
+ // Type-erased holder for one component value queued in a CommandBuffer. Owns the
+ // allocation; destructor cleans up correctly regardless of payload type. Move-only.
+ struct PayloadSlot {
+ void* data = nullptr;
+ ColumnVTable const* vtable = nullptr;
+
+ PayloadSlot() = default;
+
+ PayloadSlot(PayloadSlot const&) = delete;
+ PayloadSlot& operator=(PayloadSlot const&) = delete;
+
+ PayloadSlot(PayloadSlot&& other) noexcept : data { other.data }, vtable { other.vtable } {
+ other.data = nullptr;
+ other.vtable = nullptr;
+ }
+ PayloadSlot& operator=(PayloadSlot&& other) noexcept {
+ if (this != &other) {
+ reset();
+ data = other.data;
+ vtable = other.vtable;
+ other.data = nullptr;
+ other.vtable = nullptr;
+ }
+ return *this;
+ }
+
+ ~PayloadSlot() {
+ reset();
+ }
+
+ void reset() {
+ if (data != nullptr && vtable != nullptr && vtable->size > 0) {
+ vtable->destroy(data);
+ ::operator delete(data, std::align_val_t { vtable->align });
+ }
+ data = nullptr;
+ vtable = nullptr;
+ }
+
+ // Allocate aligned storage for one value of vt, but do not construct anything.
+ // Caller placement-news into `data`. For tag types (size == 0) data stays nullptr.
+ void allocate(ColumnVTable const* vt) {
+ vtable = vt;
+ if (vt != nullptr && vt->size > 0) {
+ data = ::operator new(vt->size, std::align_val_t { vt->align });
+ }
+ }
+
+ // Release ownership of the payload (caller must destroy the value or move it
+ // onwards). Used during apply() — the archetype's column move-constructs from
+ // `data`, then we still need to free the now-moved-from allocation.
+ void* release_data() {
+ void* d = data;
+ data = nullptr;
+ vtable = nullptr;
+ return d;
+ }
+ };
+
+ struct CommandBuffer {
+ // In **serial mode** (default): reserves a slot in `world` synchronously and returns its
+ // real EntityID. `world.is_alive(eid)` returns false until `apply()` finalises it.
+ // Components are copied / moved into a type-erased PayloadSlot per component.
+ //
+ // In **parallel mode** (`set_parallel_mode(true)` — set by SystemThreaded on every
+ // per-chunk buffer): no World mutation. Returns a *deferred placeholder* EntityID
+ // `{ index = local_seq, generation = DEFERRED_GENERATION_BIT }`. The placeholder
+ // satisfies `is_valid()` and `is_deferred()`, fails `world.is_alive()`, and is accepted
+ // by other ops on the same buffer (`add_component`, `destroy_entity`, `remove_component`).
+ // `apply()` resolves placeholders to real EntityIDs at the stage barrier, on a single
+ // thread, in record order. Allocation order is therefore worker-count-invariant given
+ // the chunk_idx-ascending merge done by `SystemThreaded::tick_all` before apply.
+ template
+ EntityID create_entity(World& world, Cs&&... values);
+
+ inline void destroy_entity(EntityID id) {
+ Op op;
+ op.kind = OpKind::DestroyEntity;
+ op.eid = id;
+ ops.push_back(std::move(op));
+ }
+
+ template
+ void add_component(EntityID id, C&& value);
+
+ template
+ void add_component(EntityID id); // default-construct
+
+ template
+ void remove_component(EntityID id);
+
+ // Drains the buffer onto the world in record order. The scheduler invokes this
+ // once per system per stage in registration_index order. For SystemThreaded, each
+ // chunk has its own CommandBuffer; the system-level pending CommandBuffer is built
+ // by `merge_from`-ing each per-chunk buffer in chunk_idx ascending order before
+ // `apply()` is called.
+ void apply(World& world);
+
+ // Splice `other`'s queued ops onto the end of our op vector. After return, `other`
+ // is empty (op_count() == 0). Used by `SystemThreaded::tick_all` to combine the
+ // per-chunk buffers into the system's pending buffer in chunk_idx ascending order.
+ // PayloadSlot moves are zero-copy (just pointer transfer).
+ void merge_from(CommandBuffer&& other);
+
+ // Resets without applying — every queued payload is destroyed via its vtable. After
+ // clear(), op_count() == 0 and empty() == true.
+ void clear();
+
+ // When set, `create_entity` switches to deferred mode: no World mutation, returns a
+ // placeholder EntityID resolved at apply time. add_component / remove_component /
+ // destroy_entity continue to record op intent (they always defer). Set by
+ // SystemThreaded on every per-chunk buffer; cleared on the system's pending buffer
+ // before merge_from + apply so the resolution path is exercised on a single thread.
+ // Default false.
+ void set_parallel_mode(bool enabled) {
+ parallel_mode_ = enabled;
+ }
+ bool parallel_mode() const {
+ return parallel_mode_;
+ }
+
+ // Number of deferred CreateEntity ops queued (placeholder entities pending resolution).
+ // Bumped by `create_entity` while parallel_mode is set, summed across `merge_from` calls,
+ // reset to 0 by `apply` and `clear`. Used by `apply` to size the placeholder→real map.
+ uint32_t deferred_count() const {
+ return deferred_count_;
+ }
+
+ std::size_t op_count() const {
+ return ops.size();
+ }
+ bool empty() const {
+ return ops.empty();
+ }
+
+ private:
+ enum class OpKind {
+ CreateEntity, // payload: full archetype signature + per-component slots
+ DestroyEntity, // no payload
+ AddComponent, // payload: 1 component slot (tag-aware)
+ RemoveComponent // no payload — only the type id
+ };
+
+ struct CreatePayload {
+ std::vector sorted_sig;
+ std::vector sorted_vtables;
+ std::vector sorted_values; // length == sorted_sig.size()
+ };
+
+ struct AddPayload {
+ component_type_id_t id;
+ PayloadSlot value; // .vtable always set (even for tag — size==0); .data null for tag/default
+ bool is_default; // true when add_component() with no value
+ };
+
+ struct Op {
+ OpKind kind;
+ EntityID eid;
+ component_type_id_t remove_id = 0; // RemoveComponent only
+ CreatePayload create; // CreateEntity only
+ AddPayload add; // AddComponent only
+ };
+
+ std::vector ops;
+ bool parallel_mode_ = false;
+ // Count of deferred (placeholder) CreateEntity ops queued in this buffer. When two
+ // buffers are spliced via `merge_from`, the receiver rebases incoming placeholder
+ // `index`es by its current `deferred_count_` so placeholders stay unique post-merge.
+ // `apply` consumes this to size its placeholder→real-EntityID resolution map; `clear`
+ // and `apply` reset it to 0.
+ uint32_t deferred_count_ = 0;
+ };
+
+ // === template definitions ===
+
+ template
+ EntityID CommandBuffer::create_entity(World& world, Cs&&... values) {
+ static_assert(sizeof...(Cs) > 0, "CommandBuffer::create_entity requires at least one component");
+
+ // Build the same sorted signature as World::create_entity does, recording the
+ // vtable pointer alongside each id.
+ component_type_id_t const raw_ids[] = { component_type_id_of>()... };
+ ColumnVTable const* const raw_vtables[] = { &column_vtable_for>()... };
+ constexpr std::size_t const N = sizeof...(Cs);
+
+ component_type_id_t sorted_ids[N];
+ ColumnVTable const* sorted_vtables[N];
+ for (std::size_t i = 0; i < N; ++i) {
+ sorted_ids[i] = raw_ids[i];
+ sorted_vtables[i] = raw_vtables[i];
+ }
+ for (std::size_t i = 0; i < N; ++i) {
+ for (std::size_t j = i + 1; j < N; ++j) {
+ if (sorted_ids[j] < sorted_ids[i]) {
+ std::swap(sorted_ids[i], sorted_ids[j]);
+ std::swap(sorted_vtables[i], sorted_vtables[j]);
+ }
+ }
+ }
+
+ // In parallel mode (SystemThreaded per-chunk buffers), defer slot reservation: no World
+ // mutation here, just hand back a placeholder EntityID with DEFERRED_GENERATION_BIT set.
+ // `apply()` allocates the real slot at the stage barrier and rewrites the placeholder.
+ // In serial mode, reserve a real slot up-front so callers get a usable EntityID
+ // immediately (e.g. for `cmd.add_component(eid, ...)` later in the same recording).
+ EntityID const eid = parallel_mode_
+ ? EntityID { deferred_count_++, DEFERRED_GENERATION_BIT }
+ : world.reserve_entity_slot();
+
+ Op op;
+ op.kind = OpKind::CreateEntity;
+ op.eid = eid;
+ op.create.sorted_sig.assign(sorted_ids, sorted_ids + N);
+ op.create.sorted_vtables.assign(sorted_vtables, sorted_vtables + N);
+ op.create.sorted_values.resize(N);
+ for (std::size_t i = 0; i < N; ++i) {
+ op.create.sorted_values[i].allocate(sorted_vtables[i]);
+ }
+
+ // Move each value into the corresponding sorted slot. Use a fold expression with the
+ // raw (unsorted) parameter pack and look up the sorted index.
+ auto place = [&](C&& value) {
+ using TC = std::remove_cvref_t;
+ component_type_id_t const id = component_type_id_of();
+ std::size_t target = N;
+ for (std::size_t i = 0; i < N; ++i) {
+ if (op.create.sorted_sig[i] == id) {
+ target = i;
+ break;
+ }
+ }
+ if constexpr (!std::is_empty_v) {
+ ::new (op.create.sorted_values[target].data) TC(std::forward(value));
+ } else {
+ (void) value;
+ (void) target;
+ }
+ };
+ (place(std::forward(values)), ...);
+
+ ops.push_back(std::move(op));
+ return eid;
+ }
+
+ template
+ void CommandBuffer::add_component(EntityID id, C&& value) {
+ using TC = std::remove_cvref_t;
+ Op op;
+ op.kind = OpKind::AddComponent;
+ op.eid = id;
+ op.add.id = component_type_id_of();
+ op.add.is_default = false;
+ op.add.value.allocate(&column_vtable_for());
+ if constexpr (!std::is_empty_v) {
+ ::new (op.add.value.data) TC(std::forward(value));
+ } else {
+ (void) value;
+ }
+ ops.push_back(std::move(op));
+ }
+
+ template
+ void CommandBuffer::add_component(EntityID id) {
+ using TC = std::remove_cvref_t;
+ Op op;
+ op.kind = OpKind::AddComponent;
+ op.eid = id;
+ op.add.id = component_type_id_of();
+ op.add.is_default = true;
+ op.add.value.allocate(&column_vtable_for());
+ if constexpr (!std::is_empty_v) {
+ ::new (op.add.value.data) TC {};
+ }
+ ops.push_back(std::move(op));
+ }
+
+ template
+ void CommandBuffer::remove_component(EntityID id) {
+ using TC = std::remove_cvref_t;
+ Op op;
+ op.kind = OpKind::RemoveComponent;
+ op.eid = id;
+ op.remove_id = component_type_id_of();
+ ops.push_back(std::move(op));
+ }
+}
diff --git a/src/openvic-simulation/ecs/ComponentTypeID.hpp b/src/openvic-simulation/ecs/ComponentTypeID.hpp
new file mode 100644
index 000000000..3608bff31
--- /dev/null
+++ b/src/openvic-simulation/ecs/ComponentTypeID.hpp
@@ -0,0 +1,50 @@
+#pragma once
+
+#include
+#include
+
+namespace OpenVic::ecs {
+ using component_type_id_t = uint64_t;
+
+ // FNV-1a 64-bit. Pure constexpr — the same input string yields the same hash on every
+ // compiler and platform, so component IDs are byte-identical across builds. This is the
+ // foundation OpenVic relies on for cross-platform deterministic protocols (multiplayer,
+ // save sharing, replay logs).
+ constexpr component_type_id_t fnv1a_64(std::string_view s) {
+ constexpr uint64_t FNV_PRIME = 0x100000001b3ULL;
+ constexpr uint64_t FNV_OFFSET = 0xcbf29ce484222325ULL;
+ uint64_t h = FNV_OFFSET;
+ for (char c : s) {
+ h ^= static_cast(c);
+ h *= FNV_PRIME;
+ }
+ return h;
+ }
+
+ // Primary template intentionally undefined. Every component used with World must specialise
+ // this trait — the typical path is the ECS_COMPONENT macro defined below. Failure to register
+ // becomes a clear compile error: "incomplete type ComponentName".
+ template
+ struct ComponentName;
+
+ template
+ constexpr component_type_id_t component_type_id_of() {
+ return fnv1a_64(ComponentName::value);
+ }
+}
+
+// Specialise OpenVic::ecs::ComponentName with a stable string literal that becomes the
+// component's identity across all builds. Must be invoked at namespace scope (outside any other
+// namespace). The literal must be globally unique within the simulation; renames are breaking
+// changes to anything that persists component IDs (saves, replays, network protocol).
+//
+// Example:
+// struct LeaderTemplate { ... };
+// ECS_COMPONENT(OpenVic::LeaderTemplate, "OpenVic::LeaderTemplate")
+#define ECS_COMPONENT(Type, NameLiteral) \
+ namespace OpenVic::ecs { \
+ template<> \
+ struct ComponentName { \
+ static constexpr std::string_view value = NameLiteral; \
+ }; \
+ }
diff --git a/src/openvic-simulation/ecs/EcsThreadPool.cpp b/src/openvic-simulation/ecs/EcsThreadPool.cpp
new file mode 100644
index 000000000..49e251de7
--- /dev/null
+++ b/src/openvic-simulation/ecs/EcsThreadPool.cpp
@@ -0,0 +1,134 @@
+#include "openvic-simulation/ecs/EcsThreadPool.hpp"
+
+#include
+#include
+
+using namespace OpenVic::ecs;
+
+EcsThreadPool::EcsThreadPool(uint32_t worker_count) {
+ uint32_t const n = std::max(1u, worker_count);
+ workers_.reserve(n);
+ for (uint32_t i = 0; i < n; ++i) {
+ workers_.emplace_back([this, i]() { worker_loop(i); });
+ }
+}
+
+EcsThreadPool::~EcsThreadPool() {
+ {
+ std::lock_guard lock(queue_mutex_);
+ stop_ = true;
+ }
+ cv_.notify_all();
+ for (std::thread& t : workers_) {
+ if (t.joinable()) {
+ t.join();
+ }
+ }
+}
+
+void EcsThreadPool::worker_loop(uint32_t worker_id) {
+ for (;;) {
+ Job job;
+ bool have_job = false;
+ {
+ std::unique_lock lock(queue_mutex_);
+ cv_.wait(lock, [this]() { return stop_ || !queue_.empty(); });
+ if (stop_ && queue_.empty()) {
+ return;
+ }
+ if (!queue_.empty()) {
+ job = std::move(queue_.back());
+ queue_.pop_back();
+ have_job = true;
+ }
+ }
+ if (!have_job) {
+ continue;
+ }
+
+ if (job.parallel_body != nullptr) {
+ (*job.parallel_body)(job.chunk_idx, worker_id);
+ } else if (job.concurrent_body) {
+ job.concurrent_body();
+ }
+
+ // Decrement the dispatch's own DoneState under its mutex so the caller's
+ // predicate (evaluated under the same mutex inside cv.wait) cannot observe
+ // count == 0 until the decrement-and-notify sequence has completed — that
+ // is what allows the caller's DoneState to live on the caller's stack
+ // without lifetime races.
+ DoneState* const done = job.done;
+ std::lock_guard lock(done->mutex);
+ --done->count;
+ if (done->count == 0) {
+ done->cv.notify_all();
+ }
+ }
+}
+
+void EcsThreadPool::run_parallel_for_impl(std::size_t chunk_count, ParallelForBody body) {
+ // Per-call DoneState — separate counter+CV for each dispatch lets a System
+ // dispatched via run_concurrent itself call parallel_for without trampling the
+ // outer dispatch's accounting. Lives on this stack frame; workers hold its
+ // mutex while decrementing so we cannot return (and destroy it) until the
+ // last worker has fully released the mutex.
+ DoneState done;
+ done.count = chunk_count;
+
+ // Push every chunk index as a separate Job into the queue. The `body` lives on the
+ // caller's stack for the duration of this call; jobs hold a non-owning pointer to it.
+ {
+ std::lock_guard lock(queue_mutex_);
+ queue_.reserve(queue_.size() + chunk_count);
+ for (std::size_t i = 0; i < chunk_count; ++i) {
+ Job j;
+ j.parallel_body = &body;
+ j.chunk_idx = i;
+ j.done = &done;
+ queue_.push_back(std::move(j));
+ }
+ }
+ cv_.notify_all();
+
+ // Wait until every job has decremented its way down to zero. Predicate is
+ // evaluated under done.mutex (cv.wait acquires it), serialising with the
+ // worker_loop decrement-under-lock above.
+ {
+ std::unique_lock lock(done.mutex);
+ done.cv.wait(lock, [&done]() {
+ return done.count == 0;
+ });
+ }
+}
+
+void EcsThreadPool::run_concurrent(std::span const> bodies) {
+ if (bodies.empty()) {
+ return;
+ }
+ if (workers_.size() <= 1 || bodies.size() == 1) {
+ for (auto const& fn : bodies) {
+ fn();
+ }
+ return;
+ }
+ DoneState done;
+ done.count = bodies.size();
+
+ {
+ std::lock_guard lock(queue_mutex_);
+ queue_.reserve(queue_.size() + bodies.size());
+ for (auto const& fn : bodies) {
+ Job j;
+ j.concurrent_body = fn; // copy
+ j.done = &done;
+ queue_.push_back(std::move(j));
+ }
+ }
+ cv_.notify_all();
+ {
+ std::unique_lock lock(done.mutex);
+ done.cv.wait(lock, [&done]() {
+ return done.count == 0;
+ });
+ }
+}
diff --git a/src/openvic-simulation/ecs/EcsThreadPool.hpp b/src/openvic-simulation/ecs/EcsThreadPool.hpp
new file mode 100644
index 000000000..9ac23cece
--- /dev/null
+++ b/src/openvic-simulation/ecs/EcsThreadPool.hpp
@@ -0,0 +1,113 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace OpenVic::ecs {
+ // Dedicated thread pool for ECS scheduler dispatch. Intentionally separate from
+ // `OpenVic::ThreadPool` (which serves un-migrated production-tick / market-clearing
+ // code with a different work-bundle shape) so neither side disturbs the other.
+ //
+ // Workers are numbered 0..worker_count-1 with stable identities; each worker passes
+ // its `worker_id` to the body it executes. ECS callers do not depend on worker_id
+ // for determinism — per-chunk CommandBuffers are keyed by chunk_idx, not worker_id —
+ // but it is exposed for diagnostic or thread-local-scratch uses.
+ //
+ // Hard invariants:
+ // * `parallel_for` is blocking — does not return until every chunk's body has run.
+ // * `run_concurrent` is blocking — does not return until every supplied function
+ // has completed.
+ // * No work is ever silently dropped; bodies that throw will std::terminate (we do
+ // not guarantee exception-safety from inside system bodies — they should be noexcept).
+ class EcsThreadPool {
+ public:
+ // Construct with a fixed worker count. worker_count == 0 is treated as 1 (a
+ // degenerate single-thread pool, useful for tests and headless determinism runs).
+ explicit EcsThreadPool(uint32_t worker_count);
+ ~EcsThreadPool();
+
+ EcsThreadPool(EcsThreadPool const&) = delete;
+ EcsThreadPool& operator=(EcsThreadPool const&) = delete;
+ EcsThreadPool(EcsThreadPool&&) = delete;
+ EcsThreadPool& operator=(EcsThreadPool&&) = delete;
+
+ uint32_t worker_count() const noexcept {
+ return static_cast(workers_.size());
+ }
+
+ // Run body(chunk_idx, worker_id) for every chunk_idx in [0, chunk_count). Blocking.
+ // The internal scheduling strategy (work-queue, modulo, stealing) is opaque and
+ // deliberately not exposed — the only externally observable property is "every
+ // chunk's body runs exactly once and parallel_for does not return early".
+ template
+ void parallel_for(std::size_t chunk_count, Body&& body) {
+ if (chunk_count == 0) {
+ return;
+ }
+ if (workers_.size() <= 1 || chunk_count == 1) {
+ // Fast path: single-thread fall-through. Same observable behaviour as the
+ // parallel path; saves the queue/cv overhead in degenerate cases.
+ for (std::size_t i = 0; i < chunk_count; ++i) {
+ body(i, /*worker_id=*/0u);
+ }
+ return;
+ }
+ run_parallel_for_impl(chunk_count, [&body](std::size_t chunk_idx, uint32_t worker_id) {
+ body(chunk_idx, worker_id);
+ });
+ }
+
+ // Run each supplied function exactly once across the pool — used for inter-system
+ // parallelism within a scheduler stage. Blocking.
+ void run_concurrent(std::span const> bodies);
+
+ private:
+ // Type-erased body for parallel_for. Templated wrapper above forwards to this.
+ using ParallelForBody = std::function;
+
+ // Per-call completion tracker. Stored on the caller's stack inside parallel_for /
+ // run_concurrent and pointed-to by every Job that dispatch submits. Workers
+ // decrement the *job's* DoneState — not a shared pool counter — so a System
+ // dispatched via run_concurrent can itself call parallel_for without the inner
+ // dispatch corrupting the outer dispatch's accounting (pre-fix, both used a
+ // single pool-wide `remaining_` atomic, which caused spurious wakeups and
+ // SIZE_MAX underflow when a `SystemThreaded` shared a stage with another System).
+ // Lifetime is enforced by the worker taking `done->mutex` while decrementing
+ // `done->count` — the caller's `done.cv.wait` predicate is also evaluated under
+ // the same mutex, so the caller can't return (and destroy the DoneState) until
+ // the decrement-and-notify sequence has completed.
+ struct DoneState {
+ std::mutex mutex;
+ std::condition_variable cv;
+ std::size_t count = 0; // Always touched while holding `mutex`.
+ };
+
+ void run_parallel_for_impl(std::size_t chunk_count, ParallelForBody body);
+
+ void worker_loop(uint32_t worker_id);
+
+ std::vector workers_;
+
+ // Work item: either a parallel_for slice or a run_concurrent function.
+ struct Job {
+ // For parallel_for: pointer-back to the shared body + the chunk_idx assigned
+ // to this job. For run_concurrent: a one-shot function to invoke; chunk_idx is 0.
+ ParallelForBody const* parallel_body = nullptr; // borrowed; lives on caller stack
+ std::function concurrent_body; // owned
+ std::size_t chunk_idx = 0;
+ DoneState* done = nullptr; // borrowed; lives on caller stack until count hits 0
+ };
+
+ std::mutex queue_mutex_;
+ std::condition_variable cv_;
+ std::vector queue_; // FIFO; back-popped under queue_mutex_
+ bool stop_ = false;
+ };
+}
diff --git a/src/openvic-simulation/ecs/EntityID.hpp b/src/openvic-simulation/ecs/EntityID.hpp
new file mode 100644
index 000000000..ffccef3ed
--- /dev/null
+++ b/src/openvic-simulation/ecs/EntityID.hpp
@@ -0,0 +1,53 @@
+#pragma once
+
+#include
+
+namespace OpenVic::ecs {
+ // High bit of EntityID::generation reserved for deferred-create placeholders. A placeholder
+ // is `{ index = local_seq, generation = DEFERRED_GENERATION_BIT }` returned from
+ // CommandBuffer::create_entity in parallel mode (inside a SystemThreaded body). The real
+ // generation is assigned when the slot is allocated at apply time. Real generations stay in
+ // [1, 0x7FFFFFFF] — World::allocate_entity_slot clamps over this range.
+ inline constexpr uint32_t DEFERRED_GENERATION_BIT = 0x80000000u;
+
+ struct EntityID {
+ uint32_t index = 0;
+ uint32_t generation = 0;
+
+ constexpr bool operator==(EntityID const& rhs) const {
+ return index == rhs.index && generation == rhs.generation;
+ }
+
+ constexpr bool operator!=(EntityID const& rhs) const {
+ return !(*this == rhs);
+ }
+
+ // Generation 0 is the invalid sentinel — valid entities always have generation >= 1.
+ // Deferred placeholders also satisfy is_valid(): their generation has DEFERRED_GENERATION_BIT
+ // set (non-zero), but they are NOT yet alive in the World. Use is_deferred() to distinguish.
+ constexpr bool is_valid() const {
+ return generation != 0;
+ }
+
+ // True for a placeholder returned by CommandBuffer::create_entity in parallel mode that has
+ // not yet been resolved to a real EntityID by CommandBuffer::apply. Public World accessors
+ // treat deferred IDs as "not present" (return false / nullptr / 0). The placeholder is only
+ // usable as an argument to other ops on the same CommandBuffer recording session.
+ constexpr bool is_deferred() const {
+ return (generation & DEFERRED_GENERATION_BIT) != 0;
+ }
+
+ constexpr uint64_t to_uint64() const {
+ return (static_cast(generation) << 32) | static_cast(index);
+ }
+
+ static constexpr EntityID from_uint64(uint64_t value) {
+ EntityID eid;
+ eid.index = static_cast(value & 0xFFFFFFFFULL);
+ eid.generation = static_cast(value >> 32);
+ return eid;
+ }
+ };
+
+ inline constexpr EntityID INVALID_ENTITY_ID = {};
+}
diff --git a/src/openvic-simulation/ecs/Query.hpp b/src/openvic-simulation/ecs/Query.hpp
new file mode 100644
index 000000000..d7d01a816
--- /dev/null
+++ b/src/openvic-simulation/ecs/Query.hpp
@@ -0,0 +1,46 @@
+#pragma once
+
+#include
+#include
+
+#include "openvic-simulation/ecs/ComponentTypeID.hpp"
+
+namespace OpenVic::ecs {
+
+ // Builder for an archetype-matching query. Use `with()` to require components and
+ // `exclude()` to forbid them. Call `build()` once before passing to a `for_each`
+ // overload — `build()` sorts and deduplicates both lists so the World can compare them
+ // against canonical sorted archetype signatures with a two-pointer scan, and so they
+ // hash stably for the query cache.
+ //
+ // `with()` and `exclude()` may be called multiple times; lists accumulate. After `build()`
+ // the same Query may be reused as long as no further `with`/`exclude` calls are made.
+ struct Query {
+ std::vector require_ids;
+ std::vector exclude_ids;
+
+ template
+ Query& with() {
+ (require_ids.push_back(component_type_id_of()), ...);
+ return *this;
+ }
+
+ template
+ Query& exclude() {
+ (exclude_ids.push_back(component_type_id_of()), ...);
+ return *this;
+ }
+
+ Query& build() {
+ std::sort(require_ids.begin(), require_ids.end());
+ require_ids.erase(std::unique(require_ids.begin(), require_ids.end()), require_ids.end());
+ std::sort(exclude_ids.begin(), exclude_ids.end());
+ exclude_ids.erase(std::unique(exclude_ids.begin(), exclude_ids.end()), exclude_ids.end());
+ return *this;
+ }
+
+ bool operator==(Query const& other) const {
+ return require_ids == other.require_ids && exclude_ids == other.exclude_ids;
+ }
+ };
+}
diff --git a/src/openvic-simulation/ecs/Reductions.hpp b/src/openvic-simulation/ecs/Reductions.hpp
new file mode 100644
index 000000000..c613c4c2c
--- /dev/null
+++ b/src/openvic-simulation/ecs/Reductions.hpp
@@ -0,0 +1,66 @@
+#pragma once
+
+#include
+#include
+#include
+
+#include "openvic-simulation/ecs/EcsThreadPool.hpp"
+
+namespace OpenVic::ecs::reductions {
+ // Deterministic parallel reductions: per-chunk worker bodies write into a
+ // chunk_idx-indexed std::vector; after the parallel section joins, we fold the
+ // per-chunk results sequentially in chunk_idx ascending order. Final result is
+ // bit-identical regardless of worker_count — the only operation order that affects
+ // the output is the sequential fold at the end, which is independent of the pool.
+
+ // Compute body(chunk_idx) -> T per chunk, then sum the results sequentially.
+ // `init` is the sum's starting value; the per-chunk T values are added to it in
+ // chunk_idx ascending order. For integer/fixed_point types where addition is
+ // associative, the result is bit-identical across worker counts.
+ template
+ T parallel_sum(EcsThreadPool& pool, std::size_t chunk_count, T init, Body&& body) {
+ std::vector per_chunk(chunk_count);
+ pool.parallel_for(chunk_count, [&](std::size_t chunk_idx, uint32_t /*worker_id*/) {
+ per_chunk[chunk_idx] = body(chunk_idx);
+ });
+ T acc = init;
+ for (std::size_t i = 0; i < chunk_count; ++i) {
+ acc = acc + per_chunk[i];
+ }
+ return acc;
+ }
+
+ // Compute body(chunk_idx) -> T per chunk, then take the running min. The fold runs
+ // sequentially in chunk_idx ascending order; min/max-of-equal preserves the leftmost
+ // value, so the result depends only on the chunk count and per-chunk values, not on
+ // the worker schedule.
+ template
+ T parallel_min(EcsThreadPool& pool, std::size_t chunk_count, T init, Body&& body) {
+ std::vector per_chunk(chunk_count);
+ pool.parallel_for(chunk_count, [&](std::size_t chunk_idx, uint32_t /*worker_id*/) {
+ per_chunk[chunk_idx] = body(chunk_idx);
+ });
+ T acc = init;
+ for (std::size_t i = 0; i < chunk_count; ++i) {
+ if (per_chunk[i] < acc) {
+ acc = per_chunk[i];
+ }
+ }
+ return acc;
+ }
+
+ template
+ T parallel_max(EcsThreadPool& pool, std::size_t chunk_count, T init, Body&& body) {
+ std::vector per_chunk(chunk_count);
+ pool.parallel_for(chunk_count, [&](std::size_t chunk_idx, uint32_t /*worker_id*/) {
+ per_chunk[chunk_idx] = body(chunk_idx);
+ });
+ T acc = init;
+ for (std::size_t i = 0; i < chunk_count; ++i) {
+ if (per_chunk[i] > acc) {
+ acc = per_chunk[i];
+ }
+ }
+ return acc;
+ }
+}
diff --git a/src/openvic-simulation/ecs/System.hpp b/src/openvic-simulation/ecs/System.hpp
new file mode 100644
index 000000000..a73a2f3e2
--- /dev/null
+++ b/src/openvic-simulation/ecs/System.hpp
@@ -0,0 +1,300 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "openvic-simulation/ecs/ComponentTypeID.hpp"
+#include "openvic-simulation/ecs/EntityID.hpp"
+#include "openvic-simulation/ecs/SystemAccess.hpp"
+#include "openvic-simulation/ecs/SystemTypeID.hpp"
+#include "openvic-simulation/types/Date.hpp"
+
+namespace OpenVic::ecs {
+ struct World;
+ struct CommandBuffer;
+ class EcsThreadPool;
+
+ // (archetype_idx, chunk_idx) pair identifying one chunk of a matched archetype. Used
+ // by the scheduler's multi-system parallel branch to build a flat work-item list across
+ // every SystemThreaded in a stage, dispatched via one outer parallel_for. Lives at
+ // namespace scope (not detail) so SystemRegistration can store a function pointer
+ // returning std::vector without circular includes.
+ struct ChunkLocation {
+ uint32_t archetype_idx;
+ uint32_t chunk_idx;
+ };
+
+ // Context passed to every System on each tick. Carries whatever a System might want to
+ // know about the simulation state at the moment of the tick, plus the system's
+ // CommandBuffer for deferred structural mutations. For SystemThreaded, the `cmd`
+ // reference points at the per-chunk CommandBuffer the driver allocates for the row's
+ // chunk.
+ struct TickContext {
+ World& world;
+ Date today;
+ CommandBuffer& cmd;
+ };
+
+ // Stable handle returned by `register_system`. Generation is bumped on `unregister_system`
+ // so a stale handle reliably fails an `is_valid` / `unregister_system` check rather than
+ // silently mutating the wrong slot.
+ struct SystemHandle {
+ uint32_t index = 0;
+ uint32_t generation = 0;
+
+ constexpr bool operator==(SystemHandle const& rhs) const {
+ return index == rhs.index && generation == rhs.generation;
+ }
+ constexpr bool operator!=(SystemHandle const& rhs) const {
+ return !(*this == rhs);
+ }
+ // Generation 0 is the invalid sentinel — valid handles always have generation >= 1.
+ constexpr bool is_valid() const {
+ return generation != 0;
+ }
+ };
+
+ inline constexpr SystemHandle INVALID_SYSTEM_HANDLE = {};
+
+ // === Member-function-trait machinery for extracting the component pack from
+ // `&Derived::tick`. ===
+
+ namespace detail {
+ // Type-based member-function-traits. Use as `fn_traits`.
+ // MSVC doesn't accept `auto`-non-type template params for member-function pointers
+ // with variadic Args, so we go through the function-pointer type instead.
+ template
+ struct fn_traits;
+
+ template
+ struct fn_traits {
+ using class_type = C;
+ using return_type = R;
+ using args_tuple = std::tuple;
+ static constexpr std::size_t arg_count = sizeof...(Args);
+ };
+
+ template
+ struct fn_traits {
+ using class_type = C;
+ using return_type = R;
+ using args_tuple = std::tuple;
+ static constexpr std::size_t arg_count = sizeof...(Args);
+ };
+
+ // Strip leading `TickContext const&` (and optional `EntityID`) from the args tuple,
+ // yielding the component pack.
+ template
+ struct strip_context_and_entity;
+
+ template
+ struct strip_context_and_entity> {
+ using components = std::tuple;
+ static constexpr bool takes_entity = true;
+ };
+
+ template
+ struct strip_context_and_entity> {
+ using components = std::tuple;
+ static constexpr bool takes_entity = false;
+ };
+
+ template
+ using tick_args_tuple_t =
+ typename fn_traits::args_tuple;
+
+ template
+ using component_pack_t =
+ typename strip_context_and_entity>::components;
+
+ template
+ constexpr bool tick_takes_entity_v =
+ strip_context_and_entity>::takes_entity;
+
+ // Build a static AccessSet array from a component pack tuple.
+ template
+ struct access_set_from_tuple;
+
+ template
+ struct access_set_from_tuple> {
+ static constexpr std::size_t N = sizeof...(Cs);
+ static constexpr std::array value() {
+ return { ComponentAccess {
+ component_type_id_of>(),
+ std::is_const_v> ? AccessMode::Read : AccessMode::Write
+ }... };
+ }
+ };
+
+ template
+ constexpr auto compute_access_set() {
+ return access_set_from_tuple>::value();
+ }
+
+ // Build a sorted-unique vector of component_type_id_t from a tick parameter pack.
+ // Used by the scheduler's query-cache prewarm pass — captures the iteration query
+ // (tick params only, NOT extra_reads) so the scheduler can populate query_cache on
+ // the main thread before dispatching workers.
+ template
+ struct require_ids_from_tuple;
+
+ template
+ struct require_ids_from_tuple> {
+ static std::vector compute() {
+ std::vector ids = {
+ component_type_id_of>()...
+ };
+ std::sort(ids.begin(), ids.end());
+ ids.erase(std::unique(ids.begin(), ids.end()), ids.end());
+ return ids;
+ }
+ };
+
+ // Iteration drivers — declared here, defined in SystemImpl.hpp (which transitively
+ // includes World.hpp + CommandBuffer.hpp + EcsThreadPool.hpp). Templates so the
+ // definitions are picked up at instantiation, breaking the include cycle.
+ template
+ void dispatch_serial(Derived& self, World& world, TickContext const& ctx);
+
+ template
+ void dispatch_threaded(
+ Derived& self, World& world, TickContext const& ctx,
+ EcsThreadPool& pool, std::vector& per_chunk_cmds,
+ CommandBuffer& pending_cmd
+ );
+ }
+
+ // === System base — CRTP, non-virtual tick. ===
+
+ template
+ struct System {
+ // Derived must implement (non-virtual!):
+ // void tick(TickContext const& ctx, [EntityID,] Cs&... components);
+ // where Cs... carry access intent: `C const` = Read, `C` = Write.
+
+ static constexpr bool is_threaded = false;
+
+ // Compile-time access set, derived from &Derived::tick's signature.
+ static constexpr auto declared_access() {
+ return detail::compute_access_set();
+ }
+
+ // Returns the system_type_id_t for Derived. Derived must have an ECS_SYSTEM(Type)
+ // declaration at namespace scope.
+ static constexpr system_type_id_t type_id() {
+ return system_type_id_of();
+ }
+
+ // Default empty dependency lists. Override on the derived class with
+ // static constexpr std::array declared_run_after();
+ // (or `_before` / `extra_reads`) to add explicit ordering or cross-archetype reads.
+ static constexpr std::array declared_run_after() { return {}; }
+ static constexpr std::array declared_run_before() { return {}; }
+ static constexpr std::array extra_reads() { return {}; }
+
+ // Sorted-unique component ids that define this system's iteration query — the tick
+ // parameter pack only, NOT extra_reads. Read by the scheduler at registration time
+ // and again per-tick to prewarm the World's query cache before a multi-system stage
+ // dispatches workers, so resolve_query_cache never has to mutate its hashmap from a
+ // worker thread.
+ static std::vector compute_tick_query_require_ids() {
+ return detail::require_ids_from_tuple>::compute();
+ }
+
+ // Drives serial iteration. Called once per tick by the scheduler. Defined inline
+ // because dispatch_serial is a template — instantiated at the point a derived
+ // system's tick_all_fn is taken (which requires SystemImpl.hpp visible).
+ void tick_all(World& world, TickContext const& ctx) {
+ detail::dispatch_serial(static_cast(*this), world, ctx);
+ }
+ };
+
+ template
+ struct SystemThreaded : System {
+ static constexpr bool is_threaded = true;
+
+ // Drives chunk-parallel iteration via the World's EcsThreadPool. Per-chunk
+ // CommandBuffers (indexed by chunk_idx) are allocated, populated, then merged into
+ // the system's pending CommandBuffer in chunk_idx ascending order — making the
+ // apply order deterministic and identical across all worker counts.
+ //
+ // Used by the scheduler ONLY for single-system stages. For multi-system stages the
+ // scheduler instead invokes collect_chunks + tick_one_chunk per chunk inside one
+ // outer parallel_for that mixes work from every system in the stage — avoiding
+ // nested parallel_for and the current_system_registration_ race.
+ void tick_all(World& world, TickContext const& ctx);
+
+ // Multi-system-stage entry points. The scheduler calls collect_chunks once on the
+ // main thread to enumerate matched chunks (in (arch_idx, chunk_idx) ascending order),
+ // then dispatches one work item per chunk via the outer parallel_for. Each work item
+ // invokes tick_one_chunk with that chunk's per_chunk_cmds_ slot as TickContext::cmd.
+ static std::vector collect_chunks(World& world);
+ static void tick_one_chunk(
+ Derived& self, World& world, TickContext const& ctx,
+ uint32_t archetype_idx, uint32_t chunk_idx
+ );
+
+ // Pooled across ticks to avoid per-tick allocator churn.
+ std::vector