corpus

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input string is malformed or doesn't follow SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Parse the tree starting from index 1 (after the opening '(')
54 try:
55 tree, index = _parse_tree(input_string, 1)
56 except ValueError as e:
57 # Re-raise with the same message
58 raise e
59
60 # Edge Case: Check if we consumed the entire input and it ends with ')'
61 if index < len(input_string) and input_string[index] == ')':
62 index += 1
63
64 # Edge Case: If we didn't consume the entire input, it's malformed
65 if index != len(input_string):
66 raise ValueError("tree missing")
67
68 return tree
69
70
71def _parse_tree(s: str, index: int) -> tuple[SgfTree, int]:
72 """
73 Parse a tree node from the SGF string.
74
75 Args:
76 s: The SGF string
77 index: Current position in the string
78
79 Returns:
80 Tuple of (SgfTree, next_index)
81 """
82 # Edge Case: Check if we have a semicolon to start the node
83 if index >= len(s) or s[index] != ';':
84 raise ValueError("tree with no nodes")
85
86 index += 1 # Skip the semicolon
87
88 # Parse properties of this node
89 properties = {}
90 while index < len(s) and s[index] not in ');(':
91 key, values, index = _parse_property(s, index)
92 properties[key] = values
93
94 # Create the current node
95 current_node = SgfTree(properties, [])
96
97 # Parse children of this node
98 children = []
99
100 # Handle semicolon-separated nodes (shorthand for single child)
101 while index < len(s) and s[index] == ';':
102 child_node, index = _parse_tree(s, index)
103 children.append(child_node)
104
105 # Handle parenthesized variations
106 while index < len(s) and s[index] == '(':
107 child, index = _parse_tree(s, index + 1) # Skip the '('
108 children.append(child)
109
110 # Skip the closing ')'
111 if index < len(s) and s[index] == ')':
112 index += 1
113 else:
114 raise ValueError("tree missing")
115
116 # Update the current node with its children
117 current_node.children = children
118
119 return current_node, index
120
121
122def _parse_property(s: str, index: int) -> tuple[str, list[str], int]:
123 """
124 Parse a property (key with one or more values) from the SGF string.
125
126 Args:
127 s: The SGF string
128 index: Current position in the string
129
130 Returns:
131 Tuple of (key, values_list, next_index)
132 """
133 # Parse the key (sequence of letters)
134 key_start = index
135 while index < len(s) and s[index].isalpha():
136 index += 1
137
138 # Edge Case: No key found
139 if index == key_start:
140 raise ValueError("properties without delimiter")
141
142 key = s[key_start:index]
143
144 # Edge Case: Property keys must be uppercase
145 if not key.isupper():
146 raise ValueError("property must be in uppercase")
147
148 # Edge Case: Key not followed by '['
149 if index >= len(s) or s[index] != '[':
150 raise ValueError("properties without delimiter")
151
152 # Parse values (there must be at least one)
153 values = []
154 while index < len(s) and s[index] == '[':
155 value, index = _parse_value(s, index + 1) # Skip the '['
156 values.append(value)
157
158 # Skip the ']'
159 if index >= len(s) or s[index] != ']':
160 raise ValueError("properties without delimiter")
161 index += 1
162
163 # Edge Case: No values found
164 if not values:
165 raise ValueError("properties without delimiter")
166
167 return key, values, index
168
169
170def _parse_value(s: str, index: int) -> tuple[str, int]:
171 """
172 Parse a value (text type) from the SGF string.
173
174 Args:
175 s: The SGF string
176 index: Current position in the string (after the opening '[')
177
178 Returns:
179 Tuple of (value_string, next_index)
180 """
181 result = []
182
183 while index < len(s) and s[index] != ']':
184 if s[index] == '\\': # Escape character
185 index += 1 # Skip the backslash
186
187 # Edge Case: Backslash at end of input
188 if index >= len(s):
189 raise ValueError("properties without delimiter")
190
191 char = s[index]
192
193 # Handle escaped characters according to SGF text rules
194 if char == '\n':
195 # Newline immediately after backslash is removed
196 pass
197 elif char.isspace():
198 # Other whitespace becomes space
199 result.append(' ')
200 else:
201 # Any non-whitespace character after backslash is inserted as-is
202 result.append(char)
203 elif s[index] == '\n':
204 # Newlines (not after backslash) remain as newlines
205 result.append(s[index])
206 elif s[index].isspace() and s[index] != '\n':
207 # All whitespace other than newline becomes space
208 result.append(' ')
209 else:
210 # Regular character
211 result.append(s[index])
212
213 index += 1
214
215 # Edge Case: End of input reached before closing ']'
216 if index >= len(s):
217 raise ValueError("properties without delimiter")
218
219 return ''.join(result), index
220# Handled Edge Cases: Empty input, input not starting with '(', missing semicolon for node,
221# property keys not uppercase, no key found, key not followed by '[',
222# no values found, backslash at end of input, end of input before closing ']'
Test NameStatus
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.