From a50dc861b46d2a47dfacbcbfdd1b02b56825bf54 Mon Sep 17 00:00:00 2001 From: Haiyuan Cao Date: Fri, 24 Apr 2026 11:24:13 -0700 Subject: [PATCH 1/2] fix: auto-detect dataset location for BigQuery analytics view creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The plugin previously passed a configured location (default "US") to bigquery.Client(location=...). This only affects query job routing, not table CRUD or the Storage Write API. When the dataset lives in a non-US region, view creation DDL queries silently fail with a location mismatch — data writes succeed but analytics views are missing. The fix auto-detects the dataset's actual location via get_dataset() during initialization and passes it explicitly to client.query() for view creation. Falls back to the configured location when dataset metadata cannot be resolved. Fixes #5476 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../bigquery_agent_analytics_plugin.py | 26 +++- .../test_bigquery_agent_analytics_plugin.py | 127 ++++++++++++++++++ 2 files changed, 151 insertions(+), 2 deletions(-) diff --git a/src/google/adk/plugins/bigquery_agent_analytics_plugin.py b/src/google/adk/plugins/bigquery_agent_analytics_plugin.py index ea29d1a18c..e3e932b393 100644 --- a/src/google/adk/plugins/bigquery_agent_analytics_plugin.py +++ b/src/google/adk/plugins/bigquery_agent_analytics_plugin.py @@ -1990,6 +1990,7 @@ def __init__( self._setup_lock = None self._user_credentials = credentials self._credentials = credentials + self._resolved_location: Optional[str] = None self.client = None self._loop_state_by_loop: dict[asyncio.AbstractEventLoop, _LoopState] = {} self._write_stream_name = None # Resolved stream name @@ -2184,11 +2185,28 @@ async def _lazy_setup(self, **kwargs) -> None: self._executor, lambda: bigquery.Client( project=self.project_id, - location=self.location, credentials=self._credentials, ), ) + # Auto-detect the dataset's location so view-creation DDL + # queries are routed to the correct region. Table CRUD and the + # Storage Write API derive location from the dataset server-side, + # but client.query() uses the client's default location for job + # routing — a mismatch causes silent view-creation failure. + if self._resolved_location is None: + dataset_id = f"{self.project_id}.{self.dataset_id}" + try: + dataset = await loop.run_in_executor( + self._executor, + lambda: self.client.get_dataset(dataset_id), + ) + self._resolved_location = dataset.location + except Exception: + # Fallback when dataset metadata cannot be resolved, + # preserving current behavior. + self._resolved_location = self.location + self.full_table_id = f"{self.project_id}.{self.dataset_id}.{self.table_id}" if not self._schema: self._schema = _get_events_schema() @@ -2447,7 +2465,7 @@ def _create_analytics_views(self) -> None: event_type=event_type, ) try: - self.client.query(sql).result() + self.client.query(sql, location=self._resolved_location).result() except cloud_exceptions.Conflict: logger.debug( "View %s was updated concurrently by another process.", @@ -2546,6 +2564,8 @@ def __getstate__(self): state["_startup_error"] = None state["_is_shutting_down"] = False state["_init_pid"] = 0 + # Re-detect dataset location after unpickle. + state["_resolved_location"] = None # _credentials is always runtime-resolved; clear unconditionally. state["_credentials"] = None # Preserve _user_credentials if they are picklable (e.g., @@ -2567,6 +2587,7 @@ def __setstate__(self, state): state.setdefault("_init_pid", 0) state.setdefault("_user_credentials", None) state.setdefault("_credentials", None) + state.setdefault("_resolved_location", None) # Restore _credentials from _user_credentials if available so # _create_loop_state uses the user's identity. When both are # None (non-picklable credentials were dropped), ADC is used. @@ -2616,6 +2637,7 @@ def _reset_runtime_state(self) -> None: # Clear all runtime state. self._setup_lock = None + self._resolved_location = None self.client = None self._loop_state_by_loop = {} self._write_stream_name = None diff --git a/tests/unittests/plugins/test_bigquery_agent_analytics_plugin.py b/tests/unittests/plugins/test_bigquery_agent_analytics_plugin.py index 3b723b4ceb..9d48db4549 100644 --- a/tests/unittests/plugins/test_bigquery_agent_analytics_plugin.py +++ b/tests/unittests/plugins/test_bigquery_agent_analytics_plugin.py @@ -7246,3 +7246,130 @@ async def test_no_a2a_interaction_for_no_metadata( await asyncio.sleep(0.05) assert mock_write_client.append_rows.call_count == 0 + + +# ================================================================ +# TEST CLASS: Dataset location auto-detection (Issue #5476) +# ================================================================ +class TestDatasetLocationDetection: + """Tests for auto-detecting the dataset location.""" + + @pytest.mark.asyncio + async def test_location_detected_from_dataset( + self, + mock_auth_default, + mock_to_arrow_schema, + mock_asyncio_to_thread, + ): + """Resolved location comes from get_dataset, not constructor.""" + mock_dataset = mock.MagicMock() + mock_dataset.location = "europe-west1" + + with mock.patch.object(bigquery, "Client", autospec=True) as mock_bq_cls: + mock_bq_cls.return_value.get_dataset.return_value = mock_dataset + mock_bq_cls.return_value.get_table.side_effect = ( + cloud_exceptions.NotFound("table") + ) + mock_bq_cls.return_value.create_table.return_value = None + + async with managed_plugin( + project_id=PROJECT_ID, + dataset_id=DATASET_ID, + table_id=TABLE_ID, + config=bigquery_agent_analytics_plugin.BigQueryLoggerConfig( + create_views=False, + ), + ) as plugin: + await plugin._ensure_started() + assert plugin._resolved_location == "europe-west1" + + @pytest.mark.asyncio + async def test_location_falls_back_on_get_dataset_failure( + self, + mock_auth_default, + mock_to_arrow_schema, + mock_asyncio_to_thread, + ): + """Falls back to configured location when get_dataset fails.""" + with mock.patch.object(bigquery, "Client", autospec=True) as mock_bq_cls: + mock_bq_cls.return_value.get_dataset.side_effect = Exception("not found") + mock_bq_cls.return_value.get_table.side_effect = ( + cloud_exceptions.NotFound("table") + ) + mock_bq_cls.return_value.create_table.return_value = None + + async with managed_plugin( + project_id=PROJECT_ID, + dataset_id=DATASET_ID, + table_id=TABLE_ID, + location="asia-northeast1", + config=bigquery_agent_analytics_plugin.BigQueryLoggerConfig( + create_views=False, + ), + ) as plugin: + await plugin._ensure_started() + assert plugin._resolved_location == "asia-northeast1" + + @pytest.mark.asyncio + async def test_views_created_with_resolved_location( + self, + mock_auth_default, + mock_to_arrow_schema, + mock_asyncio_to_thread, + ): + """View creation DDL passes resolved location to client.query().""" + mock_dataset = mock.MagicMock() + mock_dataset.location = "europe-west1" + + with mock.patch.object(bigquery, "Client", autospec=True) as mock_bq_cls: + mock_client = mock_bq_cls.return_value + mock_client.get_dataset.return_value = mock_dataset + mock_client.get_table.return_value = mock.MagicMock() + mock_client.query.return_value.result.return_value = None + + async with managed_plugin( + project_id=PROJECT_ID, + dataset_id=DATASET_ID, + table_id=TABLE_ID, + config=bigquery_agent_analytics_plugin.BigQueryLoggerConfig( + create_views=True, + ), + ) as plugin: + await plugin._ensure_started() + + # Verify query() was called with location kwarg + assert mock_client.query.call_count > 0 + for call in mock_client.query.call_args_list: + _, kwargs = call + assert kwargs.get("location") == "europe-west1" + + @pytest.mark.asyncio + async def test_view_error_still_logged( + self, + mock_auth_default, + mock_to_arrow_schema, + mock_asyncio_to_thread, + ): + """View creation errors are logged but not raised.""" + mock_dataset = mock.MagicMock() + mock_dataset.location = "US" + + with mock.patch.object(bigquery, "Client", autospec=True) as mock_bq_cls: + mock_client = mock_bq_cls.return_value + mock_client.get_dataset.return_value = mock_dataset + mock_client.get_table.return_value = mock.MagicMock() + mock_client.query.return_value.result.side_effect = Exception( + "view error" + ) + + # Should not raise + async with managed_plugin( + project_id=PROJECT_ID, + dataset_id=DATASET_ID, + table_id=TABLE_ID, + config=bigquery_agent_analytics_plugin.BigQueryLoggerConfig( + create_views=True, + ), + ) as plugin: + await plugin._ensure_started() + assert plugin._started From c6f3960c556c39eb4445a76109d83b882c0b282d Mon Sep 17 00:00:00 2001 From: Haiyuan Cao Date: Fri, 24 Apr 2026 12:34:04 -0700 Subject: [PATCH 2/2] simplify: remove _resolved_location, let BQ infer location server-side Per review feedback: when bigquery.Client is created without a location parameter, client.query() sends no location field in the API request. BigQuery infers the job location from the dataset referenced in the DDL statement. This is simpler than auto-detecting via get_dataset(). The only production change from origin/main is removing location=self.location from the bigquery.Client() constructor call. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../bigquery_agent_analytics_plugin.py | 25 +------ .../test_bigquery_agent_analytics_plugin.py | 67 ++++++------------- 2 files changed, 20 insertions(+), 72 deletions(-) diff --git a/src/google/adk/plugins/bigquery_agent_analytics_plugin.py b/src/google/adk/plugins/bigquery_agent_analytics_plugin.py index e3e932b393..a4468c0231 100644 --- a/src/google/adk/plugins/bigquery_agent_analytics_plugin.py +++ b/src/google/adk/plugins/bigquery_agent_analytics_plugin.py @@ -1990,7 +1990,6 @@ def __init__( self._setup_lock = None self._user_credentials = credentials self._credentials = credentials - self._resolved_location: Optional[str] = None self.client = None self._loop_state_by_loop: dict[asyncio.AbstractEventLoop, _LoopState] = {} self._write_stream_name = None # Resolved stream name @@ -2189,24 +2188,6 @@ async def _lazy_setup(self, **kwargs) -> None: ), ) - # Auto-detect the dataset's location so view-creation DDL - # queries are routed to the correct region. Table CRUD and the - # Storage Write API derive location from the dataset server-side, - # but client.query() uses the client's default location for job - # routing — a mismatch causes silent view-creation failure. - if self._resolved_location is None: - dataset_id = f"{self.project_id}.{self.dataset_id}" - try: - dataset = await loop.run_in_executor( - self._executor, - lambda: self.client.get_dataset(dataset_id), - ) - self._resolved_location = dataset.location - except Exception: - # Fallback when dataset metadata cannot be resolved, - # preserving current behavior. - self._resolved_location = self.location - self.full_table_id = f"{self.project_id}.{self.dataset_id}.{self.table_id}" if not self._schema: self._schema = _get_events_schema() @@ -2465,7 +2446,7 @@ def _create_analytics_views(self) -> None: event_type=event_type, ) try: - self.client.query(sql, location=self._resolved_location).result() + self.client.query(sql).result() except cloud_exceptions.Conflict: logger.debug( "View %s was updated concurrently by another process.", @@ -2564,8 +2545,6 @@ def __getstate__(self): state["_startup_error"] = None state["_is_shutting_down"] = False state["_init_pid"] = 0 - # Re-detect dataset location after unpickle. - state["_resolved_location"] = None # _credentials is always runtime-resolved; clear unconditionally. state["_credentials"] = None # Preserve _user_credentials if they are picklable (e.g., @@ -2587,7 +2566,6 @@ def __setstate__(self, state): state.setdefault("_init_pid", 0) state.setdefault("_user_credentials", None) state.setdefault("_credentials", None) - state.setdefault("_resolved_location", None) # Restore _credentials from _user_credentials if available so # _create_loop_state uses the user's identity. When both are # None (non-picklable credentials were dropped), ADC is used. @@ -2637,7 +2615,6 @@ def _reset_runtime_state(self) -> None: # Clear all runtime state. self._setup_lock = None - self._resolved_location = None self.client = None self._loop_state_by_loop = {} self._write_stream_name = None diff --git a/tests/unittests/plugins/test_bigquery_agent_analytics_plugin.py b/tests/unittests/plugins/test_bigquery_agent_analytics_plugin.py index 9d48db4549..72947124ef 100644 --- a/tests/unittests/plugins/test_bigquery_agent_analytics_plugin.py +++ b/tests/unittests/plugins/test_bigquery_agent_analytics_plugin.py @@ -7249,50 +7249,26 @@ async def test_no_a2a_interaction_for_no_metadata( # ================================================================ -# TEST CLASS: Dataset location auto-detection (Issue #5476) +# TEST CLASS: Dataset location handling (Issue #5476) # ================================================================ -class TestDatasetLocationDetection: - """Tests for auto-detecting the dataset location.""" +class TestDatasetLocationHandling: + """Tests that BQ client is created without a default location. - @pytest.mark.asyncio - async def test_location_detected_from_dataset( - self, - mock_auth_default, - mock_to_arrow_schema, - mock_asyncio_to_thread, - ): - """Resolved location comes from get_dataset, not constructor.""" - mock_dataset = mock.MagicMock() - mock_dataset.location = "europe-west1" - - with mock.patch.object(bigquery, "Client", autospec=True) as mock_bq_cls: - mock_bq_cls.return_value.get_dataset.return_value = mock_dataset - mock_bq_cls.return_value.get_table.side_effect = ( - cloud_exceptions.NotFound("table") - ) - mock_bq_cls.return_value.create_table.return_value = None - - async with managed_plugin( - project_id=PROJECT_ID, - dataset_id=DATASET_ID, - table_id=TABLE_ID, - config=bigquery_agent_analytics_plugin.BigQueryLoggerConfig( - create_views=False, - ), - ) as plugin: - await plugin._ensure_started() - assert plugin._resolved_location == "europe-west1" + When location is omitted from bigquery.Client(), client.query() + sends no location field in the API request, letting BigQuery + infer location from the referenced dataset. This prevents + silent view-creation failures for non-US datasets. + """ @pytest.mark.asyncio - async def test_location_falls_back_on_get_dataset_failure( + async def test_client_created_without_location( self, mock_auth_default, mock_to_arrow_schema, mock_asyncio_to_thread, ): - """Falls back to configured location when get_dataset fails.""" + """bigquery.Client is created without a location parameter.""" with mock.patch.object(bigquery, "Client", autospec=True) as mock_bq_cls: - mock_bq_cls.return_value.get_dataset.side_effect = Exception("not found") mock_bq_cls.return_value.get_table.side_effect = ( cloud_exceptions.NotFound("table") ) @@ -7302,28 +7278,27 @@ async def test_location_falls_back_on_get_dataset_failure( project_id=PROJECT_ID, dataset_id=DATASET_ID, table_id=TABLE_ID, - location="asia-northeast1", + location="europe-west1", config=bigquery_agent_analytics_plugin.BigQueryLoggerConfig( create_views=False, ), ) as plugin: await plugin._ensure_started() - assert plugin._resolved_location == "asia-northeast1" + + mock_bq_cls.assert_called_once() + _, kwargs = mock_bq_cls.call_args + assert "location" not in kwargs @pytest.mark.asyncio - async def test_views_created_with_resolved_location( + async def test_view_query_omits_location( self, mock_auth_default, mock_to_arrow_schema, mock_asyncio_to_thread, ): - """View creation DDL passes resolved location to client.query().""" - mock_dataset = mock.MagicMock() - mock_dataset.location = "europe-west1" - + """View creation DDL queries do not pass an explicit location.""" with mock.patch.object(bigquery, "Client", autospec=True) as mock_bq_cls: mock_client = mock_bq_cls.return_value - mock_client.get_dataset.return_value = mock_dataset mock_client.get_table.return_value = mock.MagicMock() mock_client.query.return_value.result.return_value = None @@ -7337,11 +7312,11 @@ async def test_views_created_with_resolved_location( ) as plugin: await plugin._ensure_started() - # Verify query() was called with location kwarg assert mock_client.query.call_count > 0 for call in mock_client.query.call_args_list: _, kwargs = call - assert kwargs.get("location") == "europe-west1" + # No explicit location — BQ infers from dataset + assert "location" not in kwargs @pytest.mark.asyncio async def test_view_error_still_logged( @@ -7351,12 +7326,8 @@ async def test_view_error_still_logged( mock_asyncio_to_thread, ): """View creation errors are logged but not raised.""" - mock_dataset = mock.MagicMock() - mock_dataset.location = "US" - with mock.patch.object(bigquery, "Client", autospec=True) as mock_bq_cls: mock_client = mock_bq_cls.return_value - mock_client.get_dataset.return_value = mock_dataset mock_client.get_table.return_value = mock.MagicMock() mock_client.query.return_value.result.side_effect = Exception( "view error"