From 320823a49d48bd27c216264f9b128ed49fd1ac53 Mon Sep 17 00:00:00 2001 From: ben Date: Wed, 22 Apr 2026 22:37:31 -0700 Subject: [PATCH] GH-43574: [Python] do not add partition columns from file path when reading single file Reverts a small portion of bd444106af494b3d4c6cce0af88f6ce2a6a327eb --- python/pyarrow/parquet/core.py | 2 ++ python/pyarrow/tests/parquet/test_dataset.py | 21 ++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py index 19d8250d5101..5234976a921b 100644 --- a/python/pyarrow/parquet/core.py +++ b/python/pyarrow/parquet/core.py @@ -1446,6 +1446,8 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None, path_or_paths, filesystem, memory_map=memory_map ) finfo = filesystem.get_file_info(path_or_paths) + if finfo.is_file: + single_file = path_or_paths if finfo.type == FileType.Directory: self._base_dir = path_or_paths else: diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py index d3e9cda73018..5f04a4889391 100644 --- a/python/pyarrow/tests/parquet/test_dataset.py +++ b/python/pyarrow/tests/parquet/test_dataset.py @@ -1250,6 +1250,27 @@ def test_parquet_dataset_new_filesystem(tempdir): assert result.equals(table) +def test_parquet_dataset_partitions_not_loaded_for_single_file(tempdir): + # Ensure single-file reads do not include partitions from higher levels of the path + table = pa.table({'a': [1, 2, 3]}) + path = tempdir / 'p=a' / 'data.parquet' + path.parent.mkdir() + pq.write_table(table, path) + # read using a path object + dataset = pq.ParquetDataset(path) + path_schema = dataset.schema + result = dataset.read() + assert result.equals(table) + # read using a file object; expect same result + with path.open("rb") as file: + dataset = pq.ParquetDataset(file) + file_schema = dataset.schema + result = dataset.read() + assert result.equals(table) + # schemas should match + assert path_schema.equals(file_schema) + + def test_parquet_dataset_partitions_piece_path_with_fsspec(tempdir): # ARROW-10462 ensure that on Windows we properly use posix-style paths # as used by fsspec