diff --git a/src/crawlee/storage_clients/_redis/_client_mixin.py b/src/crawlee/storage_clients/_redis/_client_mixin.py index 8a54896577..61cf6b94ee 100644 --- a/src/crawlee/storage_clients/_redis/_client_mixin.py +++ b/src/crawlee/storage_clients/_redis/_client_mixin.py @@ -220,8 +220,8 @@ async def _create_metadata_and_storage(self, storage_name: str, metadata: dict) async def _drop(self, extra_keys: list[str]) -> None: async with self._get_pipeline() as pipe: await pipe.delete(self.metadata_key) - await pipe.delete(f'{self._MAIN_KEY}:id_to_name', self._storage_id) - await pipe.delete(f'{self._MAIN_KEY}:name_to_id', self._storage_name) + await await_redis_response(pipe.hdel(f'{self._MAIN_KEY}:id_to_name', self._storage_id)) + await await_redis_response(pipe.hdel(f'{self._MAIN_KEY}:name_to_id', self._storage_name)) await pipe.delete(f'{self._MAIN_KEY}:{self._storage_name}:created_signal') for key in extra_keys: await pipe.delete(key) diff --git a/tests/unit/storage_clients/_redis/test_redis_dataset_client.py b/tests/unit/storage_clients/_redis/test_redis_dataset_client.py index 731f85b887..711f84f9a7 100644 --- a/tests/unit/storage_clients/_redis/test_redis_dataset_client.py +++ b/tests/unit/storage_clients/_redis/test_redis_dataset_client.py @@ -113,6 +113,31 @@ async def test_drop_removes_records(dataset_client: RedisDatasetClient) -> None: assert items_after_drop is None +async def test_drop_preserves_sibling_index_entries( + dataset_client: RedisDatasetClient, + redis_client: FakeAsyncRedis, +) -> None: + """Test that dropping a dataset does not wipe the shared index entries of other datasets.""" + storage_client = RedisStorageClient(redis=redis_client) + sibling_client = await storage_client.create_dataset_client(name='sibling_dataset') + sibling_metadata = await sibling_client.get_metadata() + + await dataset_client.drop() + + # The sibling's entries in the shared id_to_name and name_to_id hashes must remain intact. + sibling_name = await await_redis_response(redis_client.hget('datasets:id_to_name', sibling_metadata.id)) + sibling_id = await await_redis_response(redis_client.hget('datasets:name_to_id', 'sibling_dataset')) + + assert sibling_name is not None + assert (sibling_name.decode() if isinstance(sibling_name, bytes) else sibling_name) == 'sibling_dataset' + assert sibling_id is not None + assert (sibling_id.decode() if isinstance(sibling_id, bytes) else sibling_id) == sibling_metadata.id + + # Opening the sibling by ID must still work. + reopened = await storage_client.create_dataset_client(id=sibling_metadata.id) + assert (await reopened.get_metadata()).id == sibling_metadata.id + + async def test_metadata_record_updates(dataset_client: RedisDatasetClient) -> None: """Test that metadata record is updated correctly after operations.""" # Record initial timestamps