Coverage for tests/unit/processing/test_collect_gen_metadata.py: 100%

17 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-24 00:33 -0600

1from zanj import ZANJ 

2 

3from maze_dataset import MazeDataset, MazeDatasetConfig 

4from maze_dataset.dataset.maze_dataset import SERIALIZE_MINIMAL_THRESHOLD 

5 

6 

7def test_remove_duplicates(): 

8 cfg: MazeDatasetConfig = MazeDatasetConfig( 

9 name="test_collect", 

10 grid_n=5, 

11 n_mazes=10, 

12 ) 

13 

14 dataset: MazeDataset = MazeDataset.from_config( 

15 cfg, 

16 load_local=False, 

17 save_local=True, 

18 local_base_path="tests/_temp/test_collect/", 

19 verbose=True, 

20 zanj=ZANJ(external_list_threshold=1000), 

21 ) 

22 print(f"Generated {len(dataset)} mazes") 

23 

24 dataset = dataset.filter_by.remove_duplicates( 

25 minimum_difference_connection_list=0, 

26 minimum_difference_solution=1, 

27 ) 

28 print(f"After removing duplicates, we have {len(dataset)} mazes") 

29 

30 

31def test_remove_duplicates_large(): 

32 cfg: MazeDatasetConfig = MazeDatasetConfig( 

33 name="test_collect", 

34 grid_n=5, 

35 n_mazes=SERIALIZE_MINIMAL_THRESHOLD + 1, 

36 ) 

37 

38 dataset: MazeDataset = MazeDataset.from_config( 

39 cfg, 

40 load_local=False, 

41 save_local=True, 

42 local_base_path="tests/_temp/test_collect/", 

43 verbose=True, 

44 zanj=ZANJ(external_list_threshold=1000), 

45 ) 

46 print(f"Generated {len(dataset)} mazes") 

47 

48 print(f"\t{dataset.generation_metadata_collected = }") 

49 print(f"\t{dataset.mazes[0].generation_meta = }") 

50 

51 dataset = dataset.filter_by.remove_duplicates( 

52 minimum_difference_connection_list=0, 

53 minimum_difference_solution=1, 

54 ) 

55 print(f"After removing duplicates, we have {len(dataset)} mazes")