Coverage for tests/unit/tokenization/test_vocab.py: 100%
21 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-24 00:33 -0600
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-24 00:33 -0600
1import pytest
3from maze_dataset.constants import (
4 SPECIAL_TOKENS,
5 VOCAB,
6 VOCAB_LIST,
7 VOCAB_TOKEN_TO_INDEX,
8)
11def test_special_tokens_base():
12 # Test the getitem method
13 assert SPECIAL_TOKENS["ADJLIST_START"] == "<ADJLIST_START>"
15 with pytest.raises(KeyError):
16 SPECIAL_TOKENS["NON_EXISTENT_KEY"]
18 # Test the len method
19 assert len(SPECIAL_TOKENS) == 11
21 # Test the contains method
22 assert "ADJLIST_START" in SPECIAL_TOKENS
23 assert "NON_EXISTENT_KEY" not in SPECIAL_TOKENS
25 # Test the values method
26 assert "<ADJLIST_START>" in SPECIAL_TOKENS.values()
28 # Test the items method
29 assert ("ADJLIST_START", "<ADJLIST_START>") in SPECIAL_TOKENS.items()
31 # Test the keys method
32 assert "ADJLIST_START" in SPECIAL_TOKENS
35def test_vocab():
36 assert len(VOCAB) == 4096
37 # due to typing issue with VOCAB being instance of a dynamic dataclass
38 assert VOCAB.CTT_10 == "10" # type: ignore[attr-defined]
39 assert VOCAB_LIST[0] == "<ADJLIST_START>"
40 assert VOCAB_LIST[706] == "&"
41 assert VOCAB_TOKEN_TO_INDEX["<UNK>"] == 19
42 assert VOCAB_TOKEN_TO_INDEX["0"] == 320
43 assert VOCAB_TOKEN_TO_INDEX["-1"] == 703
44 assert VOCAB_TOKEN_TO_INDEX["(0,0)"] == 1596