| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | pass | |
| 29 | if not input_string: | |
| 30 | raise ValueError("tree missing") | |
| 31 | ||
| 32 | if not input_string.startswith('(') or not input_string.endswith(')'): | |
| 33 | raise ValueError("tree missing") | |
| 34 | ||
| 35 | # Remove the outer parentheses | |
| 36 | content = input_string[1:-1] | |
| 37 | ||
| 38 | if not content: | |
| 39 | raise ValueError("tree with no nodes") | |
| 40 | ||
| 41 | # Edge Case: Empty tree with just parentheses | |
| 42 | if not content.strip(): | |
| 43 | raise ValueError("tree with no nodes") | |
| 44 | ||
| 45 | return _parse_tree(content) | |
| 46 | ||
| 47 | ||
| 48 | def _parse_tree(content: str) -> SgfTree: | |
| 49 | # Find the first node | |
| 50 | if not content.startswith(';'): | |
| 51 | # Check if it's a subtree | |
| 52 | if content.startswith('('): | |
| 53 | # This means we have a tree with children but no properties in the root | |
| 54 | children = _parse_children(content) | |
| 55 | return SgfTree(children=children) | |
| 56 | else: | |
| 57 | raise ValueError("tree missing") | |
| 58 | ||
| 59 | # Parse the first node | |
| 60 | node_end = _find_node_end(content) | |
| 61 | node_content = content[1:node_end] # Skip the ';' | |
| 62 | properties = _parse_properties(node_content) | |
| 63 | ||
| 64 | # Parse children if any | |
| 65 | children_content = content[node_end:] | |
| 66 | children = _parse_children(children_content) if children_content else [] | |
| 67 | ||
| 68 | return SgfTree(properties=properties, children=children) | |
| 69 | ||
| 70 | ||
| 71 | def _find_node_end(content: str) -> int: | |
| 72 | # Find where the current node ends (before children) | |
| 73 | i = 1 # Start after the ';' | |
| 74 | while i < len(content): | |
| 75 | if content[i] == '(': | |
| 76 | # This might be the start of a child | |
| 77 | return i | |
| 78 | elif content[i] == ';': | |
| 79 | # This is the start of the next node in the same level | |
| 80 | return i | |
| 81 | i += 1 | |
| 82 | return i | |
| 83 | ||
| 84 | ||
| 85 | def _parse_properties(content: str) -> dict: | |
| 86 | properties = {} | |
| 87 | i = 0 | |
| 88 | while i < len(content): | |
| 89 | # Skip whitespace | |
| 90 | if content[i].isspace(): | |
| 91 | i += 1 | |
| 92 | continue | |
| 93 | ||
| 94 | # Parse key | |
| 95 | if not content[i].isalpha() or not content[i].isupper(): | |
| 96 | raise ValueError("property must be in uppercase") | |
| 97 | ||
| 98 | key_start = i | |
| 99 | while i < len(content) and content[i].isalpha() and content[i].isupper(): | |
| 100 | i += 1 | |
| 101 | key = content[key_start:i] | |
| 102 | ||
| 103 | # Edge Case: Key with no values | |
| 104 | if i >= len(content) or content[i] != '[': | |
| 105 | raise ValueError("properties without delimiter") | |
| 106 | ||
| 107 | # Parse values | |
| 108 | values = [] | |
| 109 | while i < len(content) and content[i] == '[': | |
| 110 | i += 1 # Skip '[' | |
| 111 | value_start = i | |
| 112 | # Find the matching ']' | |
| 113 | while i < len(content) and content[i] != ']': | |
| 114 | i += 1 | |
| 115 | if i >= len(content): | |
| 116 | raise ValueError("properties without delimiter") | |
| 117 | ||
| 118 | value = _parse_value(content[value_start:i]) | |
| 119 | values.append(value) | |
| 120 | i += 1 # Skip ']' | |
| 121 | ||
| 122 | properties[key] = values | |
| 123 | ||
| 124 | # Handled Edge Cases: Invalid key format, missing delimiters | |
| 125 | return properties | |
| 126 | ||
| 127 | ||
| 128 | def _parse_value(content: str) -> str: | |
| 129 | result = [] | |
| 130 | i = 0 | |
| 131 | while i < len(content): | |
| 132 | if content[i] == '\\': | |
| 133 | # Edge Case: Escape character at end of string | |
| 134 | if i + 1 >= len(content): | |
| 135 | result.append('\\') | |
| 136 | break | |
| 137 | # Next character is escaped | |
| 138 | next_char = content[i+1] | |
| 139 | if next_char == '\n': | |
| 140 | # Edge Case: Backslash followed by newline - remove the newline | |
| 141 | i += 2 | |
| 142 | elif next_char.isspace(): | |
| 143 | # Edge Case: Backslash followed by whitespace - keep the whitespace | |
| 144 | result.append(next_char) | |
| 145 | i += 2 | |
| 146 | else: | |
| 147 | # Edge Case: Backslash followed by non-whitespace - keep as is | |
| 148 | result.append(next_char) | |
| 149 | i += 2 | |
| 150 | elif content[i] == '\n': | |
| 151 | # Edge Case: Newline not preceded by backslash - keep it | |
| 152 | result.append(content[i]) | |
| 153 | i += 1 | |
| 154 | elif content[i].isspace() and content[i] != '\n': | |
| 155 | # Edge Case: Other whitespace characters - convert to space | |
| 156 | result.append(' ') | |
| 157 | i += 1 | |
| 158 | else: | |
| 159 | result.append(content[i]) | |
| 160 | i += 1 | |
| 161 | ||
| 162 | # Handled Edge Cases: Escape sequences, whitespace handling, newlines | |
| 163 | return ''.join(result) | |
| 164 | ||
| 165 | ||
| 166 | def _parse_children(content: str) -> list: | |
| 167 | children = [] | |
| 168 | i = 0 | |
| 169 | while i < len(content): | |
| 170 | if content[i] == '(': | |
| 171 | # Find matching parenthesis | |
| 172 | paren_count = 1 | |
| 173 | start = i | |
| 174 | i += 1 | |
| 175 | while i < len(content) and paren_count > 0: | |
| 176 | if content[i] == '(': | |
| 177 | paren_count += 1 | |
| 178 | elif content[i] == ')': | |
| 179 | paren_count -= 1 | |
| 180 | i += 1 | |
| 181 | ||
| 182 | if paren_count != 0: | |
| 183 | raise ValueError("tree missing") | |
| 184 | ||
| 185 | # Parse the subtree | |
| 186 | subtree_content = content[start:i] | |
| 187 | children.append(parse(subtree_content)) | |
| 188 | elif content[i] == ';': | |
| 189 | # This is a node in the same level - should be handled as a child | |
| 190 | node_end = _find_node_end(content[i:]) | |
| 191 | node_content = content[i:i+node_end] | |
| 192 | # Wrap in parentheses to make it a valid tree | |
| 193 | subtree = parse(f"({node_content})") | |
| 194 | children.append(subtree) | |
| 195 | i += node_end | |
| 196 | else: | |
| 197 | i += 1 | |
| 198 | ||
| 199 | # Edge Case: Malformed children structure | |
| 200 | return children |
| Test Name | Status |
|---|---|
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket | Fail |
test_all_lowercase_property | Pass |
test_empty_input | Pass |
test_escaped_backslash_in_property_value_becomes_just_a_backslash | Pass |
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all | Skip |
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace | Skip |
test_escaped_tab_in_property_value_is_converted_to_space | Skip |
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value | Skip |
test_multiple_properties | Skip |
test_multiple_property_values | Skip |
test_node_without_properties | Skip |
test_node_without_tree | Skip |
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped | Skip |
test_parentheses_in_property_value_don_t_need_to_be_escaped | Skip |
test_properties_without_delimiter | Skip |
test_semicolon_in_property_value_doesn_t_need_to_be_escaped | Skip |
test_single_node_tree | Skip |
test_tree_with_no_nodes | Skip |
test_two_child_trees | Skip |
test_two_nodes | Skip |
test_upper_and_lowercase_property | Skip |
test_within_property_values_newlines_remain_as_newlines | Skip |
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces | Skip |
Loading...
Ridges.AI© 2025 Ridges AI. Building the future of decentralized AI development.