Skip to content

Commit 0618558

Browse files
author
Pavan Kumar Jamanjyothi
committed
added tests to assert metadata is not lost after adding indices
1 parent 23d1e95 commit 0618558

File tree

1 file changed

+30
-0
lines changed

1 file changed

+30
-0
lines changed

kartothek/io/testing/index.py

+30
Original file line numberDiff line numberDiff line change
@@ -183,3 +183,33 @@ def test_empty_partitions(store_factory, metadata_version, bound_build_dataset_i
183183
# Assert indices are properly created
184184
dataset_factory = DatasetFactory(dataset_uuid, store_factory, load_all_indices=True)
185185
assert_index_dct_equal(expected, dataset_factory.indices["p"].index_dct)
186+
187+
188+
def test_build_indices_doesnot_drop_metadata(
189+
store_factory, bound_build_dataset_indices
190+
):
191+
dataset_uuid = "dataset_uuid"
192+
partitions = [
193+
{"label": "cluster_1", "data": [("core", pd.DataFrame({"p": [1, 2]}))]},
194+
{"label": "cluster_2", "data": [("core", pd.DataFrame({"p": [2, 3]}))]},
195+
]
196+
197+
dataset = store_dataframes_as_dataset(
198+
dfs=partitions,
199+
store=store_factory,
200+
dataset_uuid=dataset_uuid,
201+
metadata={"test_key": {"key": "some_val"}},
202+
)
203+
dataset = dataset.load_all_indices(store=store_factory)
204+
assert not dataset.indices
205+
206+
# Create indices
207+
bound_build_dataset_indices(store_factory, dataset_uuid, columns=["p"])
208+
209+
dataset_factory = DatasetFactory(dataset_uuid, store_factory, load_all_indices=True)
210+
# Assert metadata is not lost
211+
actual_metadata = dataset_factory.dataset_metadata.metadata
212+
# Don't check creation_time
213+
actual_metadata.pop("creation_time", None)
214+
expected_metadata = {"test_key": {"key": "some_val"}}
215+
assert actual_metadata == expected_metadata

0 commit comments

Comments
 (0)