Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions activitysim/abm/tables/households.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,7 @@ def households(state: workflow.State) -> pd.DataFrame:
if households_sample_size == 0:
sample_rate = 1
else:
# TODO: do not round, keep full precision to avoid creating 0 sample_rate for small samples
# Existing CI tests will fail when performing bitwise comparisons with the unrounded sample_rate
# We should update the CI tests
sample_rate = round(households_sample_size / tot_households, 3)
sample_rate = households_sample_size / tot_households

df["sample_rate"] = sample_rate

Expand Down
55 changes: 55 additions & 0 deletions activitysim/abm/test/test_misc/test_sample_rate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# ActivitySim
# See full license in LICENSE.txt.
import pandas as pd

import pytest

from activitysim.core import workflow


@pytest.fixture(scope="session")
def example_root(tmp_path_factory):
root = tmp_path_factory.mktemp("example")
config_dir = root / "configs"
config_dir.mkdir()

data_dir = root / "data"
data_dir.mkdir()

return root


@pytest.fixture(scope="module")
def state(example_root) -> workflow.State:

settings = """
input_table_list:
- tablename: households
filename: households.csv
index_col: household_id

households_sample_size: 2
"""

settings_file = example_root / "configs" / "settings.yaml"
settings_file.write_text(settings)

households = pd.DataFrame(
{
"household_id": range(1, 100001),
"home_zone_id": [1, 2] * 50000,
}
)
households.to_csv(example_root / "data" / "households.csv", index=False)

state = workflow.State.make_default(example_root)

return state


def test_sample_rate_calculation(state):
households_df = state.get_dataframe("households")
sample_rate = households_df["sample_rate"].iloc[0]
assert (
sample_rate == 0.00002
), f"Expected sample rate of 0.00002, but got {sample_rate}"
Loading