docs for maze-dataset v1.3.2
View Source on GitHub

maze_dataset.benchmark.speed

benchmark the speed of maze generation


  1"benchmark the speed of maze generation"
  2
  3import functools
  4import random
  5import timeit
  6from pathlib import Path
  7from typing import Any, Sequence
  8
  9from tqdm import tqdm
 10
 11from maze_dataset import MazeDataset, MazeDatasetConfig
 12from maze_dataset.generation.default_generators import DEFAULT_GENERATORS
 13from maze_dataset.generation.generators import GENERATORS_MAP
 14
 15_BASE_CFG_KWARGS: dict = dict(
 16	grid_n=None,
 17	n_mazes=None,
 18)
 19
 20_GENERATE_KWARGS: dict = dict(
 21	gen_parallel=False,
 22	pool_kwargs=None,
 23	verbose=False,
 24	# do_generate = True,
 25	# load_local = False,
 26	# save_local = False,
 27	# zanj = None,
 28	# do_download = False,
 29	# local_base_path = "INVALID",
 30	# except_on_config_mismatch = True,
 31	# verbose = False,
 32)
 33
 34
 35def time_generation(
 36	base_configs: list[tuple[str, dict]],
 37	grid_n_vals: list[int],
 38	n_mazes_vals: list[int],
 39	trials: int = 10,
 40	verbose: bool = False,
 41) -> list[dict[str, Any]]:
 42	"time the generation of mazes for various configurations"
 43	# assemble configs
 44	configs: list[MazeDatasetConfig] = list()
 45
 46	for b_cfg in base_configs:
 47		for grid_n in grid_n_vals:
 48			for n_mazes in n_mazes_vals:
 49				configs.append(
 50					MazeDatasetConfig(
 51						name="benchmark",
 52						grid_n=grid_n,
 53						n_mazes=n_mazes,
 54						maze_ctor=GENERATORS_MAP[b_cfg[0]],
 55						maze_ctor_kwargs=b_cfg[1],
 56					),
 57				)
 58
 59	# shuffle configs (in place) (otherwise progress bar is annoying)
 60	random.shuffle(configs)
 61
 62	# time generation for each config
 63	times: list[dict[str, Any]] = list()
 64	total: int = len(configs)
 65	for idx, cfg in tqdm(
 66		enumerate(configs),
 67		desc="Timing generation",
 68		unit="config",
 69		total=total,
 70		disable=verbose,
 71	):
 72		if verbose:
 73			print(f"Timing generation for config {idx + 1}/{total}\n{cfg}")
 74
 75		t: float = (
 76			timeit.timeit(
 77				stmt=functools.partial(MazeDataset.generate, cfg, **_GENERATE_KWARGS),  # type: ignore[arg-type]
 78				number=trials,
 79			)
 80			/ trials
 81		)
 82
 83		if verbose:
 84			print(f"avg time: {t:.3f} s")
 85
 86		times.append(
 87			dict(
 88				cfg_name=cfg.name,
 89				grid_n=cfg.grid_n,
 90				n_mazes=cfg.n_mazes,
 91				maze_ctor=cfg.maze_ctor.__name__,
 92				maze_ctor_kwargs=cfg.maze_ctor_kwargs,
 93				trials=trials,
 94				time=t,
 95			),
 96		)
 97
 98	return times
 99
100
101def run_benchmark(
102	save_path: str,
103	base_configs: list[tuple[str, dict]] | None = None,
104	grid_n_vals: Sequence[int] = (2, 3, 4, 5, 8, 10, 16, 25, 32),
105	n_mazes_vals: Sequence[int] = tuple(range(1, 12, 2)),
106	trials: int = 10,
107	verbose: bool = True,
108) -> "pd.DataFrame":  # type: ignore[name-defined] # noqa: F821
109	"run the benchmark and save the results to a file"
110	import pandas as pd
111
112	if base_configs is None:
113		base_configs = DEFAULT_GENERATORS
114
115	times: list[dict] = time_generation(
116		base_configs=base_configs,
117		grid_n_vals=list(grid_n_vals),
118		n_mazes_vals=list(n_mazes_vals),
119		trials=trials,
120		verbose=verbose,
121	)
122
123	df: pd.DataFrame = pd.DataFrame(times)
124
125	# print the whole dataframe contents to console as csv
126	print(df.to_csv())
127
128	# save to file
129	Path(save_path).parent.mkdir(parents=True, exist_ok=True)
130	df.to_json(save_path, orient="records", lines=True)
131
132	return df

def time_generation( base_configs: list[tuple[str, dict]], grid_n_vals: list[int], n_mazes_vals: list[int], trials: int = 10, verbose: bool = False) -> list[dict[str, typing.Any]]:
36def time_generation(
37	base_configs: list[tuple[str, dict]],
38	grid_n_vals: list[int],
39	n_mazes_vals: list[int],
40	trials: int = 10,
41	verbose: bool = False,
42) -> list[dict[str, Any]]:
43	"time the generation of mazes for various configurations"
44	# assemble configs
45	configs: list[MazeDatasetConfig] = list()
46
47	for b_cfg in base_configs:
48		for grid_n in grid_n_vals:
49			for n_mazes in n_mazes_vals:
50				configs.append(
51					MazeDatasetConfig(
52						name="benchmark",
53						grid_n=grid_n,
54						n_mazes=n_mazes,
55						maze_ctor=GENERATORS_MAP[b_cfg[0]],
56						maze_ctor_kwargs=b_cfg[1],
57					),
58				)
59
60	# shuffle configs (in place) (otherwise progress bar is annoying)
61	random.shuffle(configs)
62
63	# time generation for each config
64	times: list[dict[str, Any]] = list()
65	total: int = len(configs)
66	for idx, cfg in tqdm(
67		enumerate(configs),
68		desc="Timing generation",
69		unit="config",
70		total=total,
71		disable=verbose,
72	):
73		if verbose:
74			print(f"Timing generation for config {idx + 1}/{total}\n{cfg}")
75
76		t: float = (
77			timeit.timeit(
78				stmt=functools.partial(MazeDataset.generate, cfg, **_GENERATE_KWARGS),  # type: ignore[arg-type]
79				number=trials,
80			)
81			/ trials
82		)
83
84		if verbose:
85			print(f"avg time: {t:.3f} s")
86
87		times.append(
88			dict(
89				cfg_name=cfg.name,
90				grid_n=cfg.grid_n,
91				n_mazes=cfg.n_mazes,
92				maze_ctor=cfg.maze_ctor.__name__,
93				maze_ctor_kwargs=cfg.maze_ctor_kwargs,
94				trials=trials,
95				time=t,
96			),
97		)
98
99	return times

time the generation of mazes for various configurations

def run_benchmark( save_path: str, base_configs: list[tuple[str, dict]] | None = None, grid_n_vals: Sequence[int] = (2, 3, 4, 5, 8, 10, 16, 25, 32), n_mazes_vals: Sequence[int] = (1, 3, 5, 7, 9, 11), trials: int = 10, verbose: bool = True) -> 'pd.DataFrame':
102def run_benchmark(
103	save_path: str,
104	base_configs: list[tuple[str, dict]] | None = None,
105	grid_n_vals: Sequence[int] = (2, 3, 4, 5, 8, 10, 16, 25, 32),
106	n_mazes_vals: Sequence[int] = tuple(range(1, 12, 2)),
107	trials: int = 10,
108	verbose: bool = True,
109) -> "pd.DataFrame":  # type: ignore[name-defined] # noqa: F821
110	"run the benchmark and save the results to a file"
111	import pandas as pd
112
113	if base_configs is None:
114		base_configs = DEFAULT_GENERATORS
115
116	times: list[dict] = time_generation(
117		base_configs=base_configs,
118		grid_n_vals=list(grid_n_vals),
119		n_mazes_vals=list(n_mazes_vals),
120		trials=trials,
121		verbose=verbose,
122	)
123
124	df: pd.DataFrame = pd.DataFrame(times)
125
126	# print the whole dataframe contents to console as csv
127	print(df.to_csv())
128
129	# save to file
130	Path(save_path).parent.mkdir(parents=True, exist_ok=True)
131	df.to_json(save_path, orient="records", lines=True)
132
133	return df

run the benchmark and save the results to a file