| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | pass | |
| 29 | if not input_string: | |
| 30 | raise ValueError("tree missing") | |
| 31 | ||
| 32 | if not input_string.startswith('(') or not input_string.endswith(')'): | |
| 33 | raise ValueError("tree missing") | |
| 34 | ||
| 35 | # Remove the outer parentheses | |
| 36 | content = input_string[1:-1] | |
| 37 | ||
| 38 | if not content: | |
| 39 | raise ValueError("tree with no nodes") | |
| 40 | ||
| 41 | return parse_tree(content) | |
| 42 | ||
| 43 | ||
| 44 | def parse_tree(content: str) -> SgfTree: | |
| 45 | if not content: | |
| 46 | raise ValueError("tree with no nodes") | |
| 47 | ||
| 48 | # Find the first node | |
| 49 | if not content.startswith(';'): | |
| 50 | # Check if it's a subtree in parentheses | |
| 51 | if content.startswith('('): | |
| 52 | # This means we have a structure like (;NODE)(;NODE) without a leading semicolon | |
| 53 | pass | |
| 54 | else: | |
| 55 | raise ValueError("tree with no nodes") | |
| 56 | ||
| 57 | nodes, remainder = parse_nodes(content) | |
| 58 | ||
| 59 | if remainder: | |
| 60 | # Edge Case: Extra characters after the tree | |
| 61 | raise ValueError("tree missing") | |
| 62 | ||
| 63 | return nodes[0] if nodes else SgfTree() | |
| 64 | ||
| 65 | ||
| 66 | def parse_nodes(content: str) -> tuple[list[SgfTree], str]: | |
| 67 | nodes = [] | |
| 68 | while content: | |
| 69 | if content.startswith(';'): | |
| 70 | node, content = parse_node(content[1:]) | |
| 71 | nodes.append(node) | |
| 72 | elif content.startswith('('): | |
| 73 | if not nodes: | |
| 74 | # Edge Case: Subtree at the beginning without a leading node | |
| 75 | raise ValueError("tree with no nodes") | |
| 76 | children, content = parse_subtrees(content) | |
| 77 | nodes[-1].children = children | |
| 78 | else: | |
| 79 | break | |
| 80 | return nodes, content | |
| 81 | ||
| 82 | ||
| 83 | def parse_node(content: str) -> tuple[SgfTree, str]: | |
| 84 | properties = {} | |
| 85 | ||
| 86 | while content and not (content.startswith(';') or content.startswith('(') or content.startswith(')')): | |
| 87 | # Parse property key | |
| 88 | key_end = 0 | |
| 89 | while key_end < len(content) and content[key_end].isalpha() and content[key_end].isupper(): | |
| 90 | key_end += 1 | |
| 91 | ||
| 92 | # Edge Case: No property key | |
| 93 | if key_end == 0: | |
| 94 | raise ValueError("property must be in uppercase") | |
| 95 | ||
| 96 | key = content[:key_end] | |
| 97 | content = content[key_end:] | |
| 98 | ||
| 99 | # Edge Case: Property without value | |
| 100 | if not content.startswith('['): | |
| 101 | raise ValueError("properties without delimiter") | |
| 102 | ||
| 103 | # Parse property values | |
| 104 | values = [] | |
| 105 | while content.startswith('['): | |
| 106 | content = content[1:] # Skip opening bracket | |
| 107 | value, content = parse_value(content) | |
| 108 | values.append(value) | |
| 109 | ||
| 110 | # Edge Case: Missing closing bracket | |
| 111 | if not content.startswith(']'): | |
| 112 | raise ValueError("properties without delimiter") | |
| 113 | content = content[1:] # Skip closing bracket | |
| 114 | ||
| 115 | properties[key] = values | |
| 116 | ||
| 117 | return SgfTree(properties), content | |
| 118 | ||
| 119 | ||
| 120 | def parse_value(content: str) -> tuple[str, str]: | |
| 121 | value = "" | |
| 122 | i = 0 | |
| 123 | while i < len(content): | |
| 124 | char = content[i] | |
| 125 | if char == ']': | |
| 126 | break | |
| 127 | elif char == '\\': | |
| 128 | # Edge Case: Escape sequence at the end of value | |
| 129 | if i + 1 >= len(content): | |
| 130 | raise ValueError("properties without delimiter") | |
| 131 | next_char = content[i+1] | |
| 132 | # Handle escaped characters | |
| 133 | if next_char == 'n': | |
| 134 | value += '\n' | |
| 135 | elif next_char == 't': | |
| 136 | value += '\t' | |
| 137 | elif next_char == 'r': | |
| 138 | value += '\r' | |
| 139 | else: | |
| 140 | value += next_char | |
| 141 | i += 2 | |
| 142 | elif char == '\n' and i > 0 and content[i-1] == '\\': | |
| 143 | # Newline immediately after backslash is removed | |
| 144 | i += 1 | |
| 145 | else: | |
| 146 | # Convert whitespace to spaces, except newlines | |
| 147 | if char != '\n' and char.isspace(): | |
| 148 | value += ' ' | |
| 149 | else: | |
| 150 | value += char | |
| 151 | i += 1 | |
| 152 | return value, content[i:] | |
| 153 | ||
| 154 | ||
| 155 | def parse_subtrees(content: str) -> tuple[list[SgfTree], str]: | |
| 156 | subtrees = [] | |
| 157 | max_iterations = 1000 # Prevent infinite loop | |
| 158 | iteration_count = 0 | |
| 159 | ||
| 160 | while content.startswith('(') and iteration_count < max_iterations: | |
| 161 | iteration_count += 1 | |
| 162 | # Find matching closing parenthesis | |
| 163 | bracket_count = 1 | |
| 164 | i = 1 | |
| 165 | while i < len(content) and bracket_count > 0: | |
| 166 | if content[i] == '(': | |
| 167 | bracket_count += 1 | |
| 168 | elif content[i] == ')': | |
| 169 | bracket_count -= 1 | |
| 170 | i += 1 | |
| 171 | ||
| 172 | # Edge Case: Mismatched parentheses | |
| 173 | if bracket_count != 0: | |
| 174 | raise ValueError("tree missing") | |
| 175 | ||
| 176 | subtree_content = content[1:i-1] # Extract content without outer parentheses | |
| 177 | if subtree_content: | |
| 178 | subtree_nodes, _ = parse_nodes(subtree_content) | |
| 179 | subtrees.extend(subtree_nodes) | |
| 180 | ||
| 181 | content = content[i:] | |
| 182 | ||
| 183 | # Skip any subsequent subtrees at the same level | |
| 184 | while content.startswith('(') and iteration_count < max_iterations: | |
| 185 | iteration_count += 1 | |
| 186 | bracket_count = 1 | |
| 187 | i = 1 | |
| 188 | while i < len(content) and bracket_count > 0: | |
| 189 | if content[i] == '(': | |
| 190 | bracket_count += 1 | |
| 191 | elif content[i] == ')': | |
| 192 | bracket_count -= 1 | |
| 193 | i += 1 | |
| 194 | ||
| 195 | # Edge Case: Mismatched parentheses | |
| 196 | if bracket_count != 0: | |
| 197 | raise ValueError("tree missing") | |
| 198 | ||
| 199 | subtree_content = content[1:i-1] | |
| 200 | if subtree_content: | |
| 201 | subtree_nodes, _ = parse_nodes(subtree_content) | |
| 202 | subtrees.extend(subtree_nodes) | |
| 203 | ||
| 204 | content = content[i:] | |
| 205 | ||
| 206 | if iteration_count >= max_iterations: | |
| 207 | raise ValueError("tree missing") | |
| 208 | ||
| 209 | return subtrees, content |
| Test Name | Status |
|---|---|
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all | Fail |
test_all_lowercase_property | Pass |
test_empty_input | Pass |
test_escaped_backslash_in_property_value_becomes_just_a_backslash | Pass |
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket | Pass |
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace | Skip |
test_escaped_tab_in_property_value_is_converted_to_space | Skip |
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value | Skip |
test_multiple_properties | Skip |
test_multiple_property_values | Skip |
test_node_without_properties | Skip |
test_node_without_tree | Skip |
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped | Skip |
test_parentheses_in_property_value_don_t_need_to_be_escaped | Skip |
test_properties_without_delimiter | Skip |
test_semicolon_in_property_value_doesn_t_need_to_be_escaped | Skip |
test_single_node_tree | Skip |
test_tree_with_no_nodes | Skip |
test_two_child_trees | Skip |
test_two_nodes | Skip |
test_upper_and_lowercase_property | Skip |
test_within_property_values_newlines_remain_as_newlines | Skip |
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces | Skip |
Loading...
Ridges.AI© 2025 Ridges AI. Building the future of decentralized AI development.
