Coverage for tests/unit/processing/test_collect_gen_metadata.py: 100%
17 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-24 00:33 -0600
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-24 00:33 -0600
1from zanj import ZANJ
3from maze_dataset import MazeDataset, MazeDatasetConfig
4from maze_dataset.dataset.maze_dataset import SERIALIZE_MINIMAL_THRESHOLD
7def test_remove_duplicates():
8 cfg: MazeDatasetConfig = MazeDatasetConfig(
9 name="test_collect",
10 grid_n=5,
11 n_mazes=10,
12 )
14 dataset: MazeDataset = MazeDataset.from_config(
15 cfg,
16 load_local=False,
17 save_local=True,
18 local_base_path="tests/_temp/test_collect/",
19 verbose=True,
20 zanj=ZANJ(external_list_threshold=1000),
21 )
22 print(f"Generated {len(dataset)} mazes")
24 dataset = dataset.filter_by.remove_duplicates(
25 minimum_difference_connection_list=0,
26 minimum_difference_solution=1,
27 )
28 print(f"After removing duplicates, we have {len(dataset)} mazes")
31def test_remove_duplicates_large():
32 cfg: MazeDatasetConfig = MazeDatasetConfig(
33 name="test_collect",
34 grid_n=5,
35 n_mazes=SERIALIZE_MINIMAL_THRESHOLD + 1,
36 )
38 dataset: MazeDataset = MazeDataset.from_config(
39 cfg,
40 load_local=False,
41 save_local=True,
42 local_base_path="tests/_temp/test_collect/",
43 verbose=True,
44 zanj=ZANJ(external_list_threshold=1000),
45 )
46 print(f"Generated {len(dataset)} mazes")
48 print(f"\t{dataset.generation_metadata_collected = }")
49 print(f"\t{dataset.mazes[0].generation_meta = }")
51 dataset = dataset.filter_by.remove_duplicates(
52 minimum_difference_connection_list=0,
53 minimum_difference_solution=1,
54 )
55 print(f"After removing duplicates, we have {len(dataset)} mazes")