Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/pyarrow/parquet/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1446,6 +1446,8 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None,
path_or_paths, filesystem, memory_map=memory_map
)
finfo = filesystem.get_file_info(path_or_paths)
if finfo.is_file:
single_file = path_or_paths
if finfo.type == FileType.Directory:
self._base_dir = path_or_paths
else:
Expand Down
21 changes: 21 additions & 0 deletions python/pyarrow/tests/parquet/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,27 @@ def test_parquet_dataset_new_filesystem(tempdir):
assert result.equals(table)


def test_parquet_dataset_partitions_not_loaded_for_single_file(tempdir):
# Ensure single-file reads do not include partitions from higher levels of the path
table = pa.table({'a': [1, 2, 3]})
path = tempdir / 'p=a' / 'data.parquet'
path.parent.mkdir()
pq.write_table(table, path)
# read using a path object
dataset = pq.ParquetDataset(path)
path_schema = dataset.schema
result = dataset.read()
assert result.equals(table)
# read using a file object; expect same result
with path.open("rb") as file:
dataset = pq.ParquetDataset(file)
file_schema = dataset.schema
result = dataset.read()
assert result.equals(table)
# schemas should match
assert path_schema.equals(file_schema)


def test_parquet_dataset_partitions_piece_path_with_fsspec(tempdir):
# ARROW-10462 ensure that on Windows we properly use posix-style paths
# as used by fsspec
Expand Down