Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/crawlee/storage_clients/_redis/_client_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ async def _create_metadata_and_storage(self, storage_name: str, metadata: dict)
async def _drop(self, extra_keys: list[str]) -> None:
async with self._get_pipeline() as pipe:
await pipe.delete(self.metadata_key)
await pipe.delete(f'{self._MAIN_KEY}:id_to_name', self._storage_id)
await pipe.delete(f'{self._MAIN_KEY}:name_to_id', self._storage_name)
await await_redis_response(pipe.hdel(f'{self._MAIN_KEY}:id_to_name', self._storage_id))
await await_redis_response(pipe.hdel(f'{self._MAIN_KEY}:name_to_id', self._storage_name))
await pipe.delete(f'{self._MAIN_KEY}:{self._storage_name}:created_signal')
for key in extra_keys:
await pipe.delete(key)
Expand Down
25 changes: 25 additions & 0 deletions tests/unit/storage_clients/_redis/test_redis_dataset_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,31 @@ async def test_drop_removes_records(dataset_client: RedisDatasetClient) -> None:
assert items_after_drop is None


async def test_drop_preserves_sibling_index_entries(
dataset_client: RedisDatasetClient,
redis_client: FakeAsyncRedis,
) -> None:
"""Test that dropping a dataset does not wipe the shared index entries of other datasets."""
storage_client = RedisStorageClient(redis=redis_client)
sibling_client = await storage_client.create_dataset_client(name='sibling_dataset')
sibling_metadata = await sibling_client.get_metadata()

await dataset_client.drop()

# The sibling's entries in the shared id_to_name and name_to_id hashes must remain intact.
sibling_name = await await_redis_response(redis_client.hget('datasets:id_to_name', sibling_metadata.id))
sibling_id = await await_redis_response(redis_client.hget('datasets:name_to_id', 'sibling_dataset'))

assert sibling_name is not None
assert (sibling_name.decode() if isinstance(sibling_name, bytes) else sibling_name) == 'sibling_dataset'
assert sibling_id is not None
assert (sibling_id.decode() if isinstance(sibling_id, bytes) else sibling_id) == sibling_metadata.id

# Opening the sibling by ID must still work.
reopened = await storage_client.create_dataset_client(id=sibling_metadata.id)
assert (await reopened.get_metadata()).id == sibling_metadata.id


async def test_metadata_record_updates(dataset_client: RedisDatasetClient) -> None:
"""Test that metadata record is updated correctly after operations."""
# Record initial timestamps
Expand Down
Loading