sn62

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input that doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input that doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty content after removing parentheses (like "()")
57 if not content:
58 raise ValueError("tree missing")
59
60 # Parse the tree
61 tree, _ = _parse_node_sequence(content, 0)
62 return tree
63
64
65def _parse_node(content: str, index: int) -> tuple[SgfTree, int]:
66 """
67 Parse a single node from the content string starting at index.
68
69 Args:
70 content: The SGF content string
71 index: The starting index for parsing (should be at ';')
72
73 Returns:
74 tuple: A tuple containing the parsed SgfTree and the next index to parse
75 """
76 # Edge Case: Content doesn't start with ';'
77 if index >= len(content) or content[index] != ';':
78 raise ValueError("tree with no nodes")
79
80 index += 1 # Skip the ';'
81
82 # Parse properties for this node
83 properties = {}
84 while index < len(content) and content[index].isalpha() and content[index].isupper():
85 # Parse key
86 key_start = index
87 while index < len(content) and content[index].isalpha() and content[index].isupper():
88 index += 1
89
90 key = content[key_start:index]
91
92 # Edge Case: Key is not uppercase
93 if not key.isupper():
94 raise ValueError("property must be in uppercase")
95
96 # Parse values
97 values = []
98 while index < len(content) and content[index] == '[':
99 index += 1 # Skip '['
100 value_start = index
101
102 # Parse value (include all characters as-is)
103 while index < len(content) and content[index] != ']':
104 index += 1
105
106 # Edge Case: Unclosed bracket
107 if index >= len(content) or content[index] != ']':
108 raise ValueError("properties without delimiter")
109
110 value = content[value_start:index]
111 # Process the value according to SGF Text type rules
112 value = _process_text_value(value)
113 values.append(value)
114 index += 1 # Skip ']'
115
116 # Edge Case: No values for the property
117 if not values:
118 raise ValueError("properties without delimiter")
119
120 properties[key] = values
121
122 return SgfTree(properties), index
123
124def _parse_node_sequence(content: str, index: int) -> tuple[SgfTree, int]:
125 """
126 Parse a sequence of nodes, where the first node is the parent and subsequent nodes are children.
127
128 Args:
129 content: The SGF content string
130 index: The starting index for parsing
131
132 Returns:
133 tuple: A tuple containing the parsed SgfTree and the next index to parse
134 """
135 # Parse the first node
136 if index >= len(content):
137 # Handle empty content case
138 raise ValueError("tree with no nodes")
139
140 if content[index] != ';':
141 # Handle case like "()" which should raise "tree missing"
142 raise ValueError("tree missing")
143
144 first_node, index = _parse_node(content, index)
145
146 # Parse sequential nodes (using semicolons) as children
147 children = []
148 while index < len(content) and content[index] == ';':
149 child, index = _parse_node(content, index)
150 children.append(child)
151
152 # Parse variations (using parentheses) as additional children
153 while index < len(content) and content[index] == '(':
154 # For variations, we need to parse a complete subtree
155 subtree, new_index = _parse_node_sequence(content, index + 1) # Skip '('
156 children.append(subtree)
157 index = new_index
158
159 # Skip ')'
160 if index >= len(content) or content[index] != ')':
161 raise ValueError("tree missing")
162 index += 1
163
164 # Attach children to the first node
165 if children:
166 first_node.children = children
167
168 return first_node, index
169
170
171def _process_text_value(value: str) -> str:
172 """
173 Process a text value according to SGF Text type rules.
174
175 Args:
176 value: The raw text value
177
178 Returns:
179 str: The processed text value
180 """
181 result = []
182 i = 0
183 while i < len(value):
184 if value[i] == '\\':
185 i += 1
186 if i < len(value):
187 # If next character is newline, remove the backslash and newline
188 if value[i] == '\n':
189 i += 1
190 continue
191 # Any character after \ should be preserved as-is (both backslash and character)
192 # Since we're already processing the escaped character, just append it
193 # The backslash was already consumed, so we just append the character
194 result.append(value[i])
195 i += 1
196 else:
197 # Edge Case: Escape at end of string
198 result.append('\\')
199 else:
200 # All whitespace characters other than newline are converted to spaces
201 if value[i].isspace() and value[i] != '\n':
202 result.append(' ')
203 else:
204 result.append(value[i])
205 i += 1
206
207 return ''.join(result)
208
209# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty content after removing parentheses, content not starting with semicolon, empty or non-uppercase key, unclosed bracket, no values for property, missing closing parenthesis for children, escape at end of string
Test NameStatus
test_all_lowercase_property
Fail
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_tree_with_no_nodes
Fail
test_upper_and_lowercase_property
Fail
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.