fortitude

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or input that doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Input that doesn't end with ')'
46 if not input_string.endswith(')'):
47 raise ValueError("tree missing")
48
49 # Remove the outer parentheses
50 content = input_string[1:-1]
51
52 # Edge Case: Empty content after removing parentheses
53 if not content:
54 raise ValueError("tree with no nodes")
55
56 # Parse the tree
57 tree, _ = _parse_tree(content, 0)
58 return tree
59 # Handled Edge Cases: Empty input, input not starting with '(', input not ending with ')', empty content after removing parentheses
60
61
62def _parse_tree(content: str, index: int) -> tuple[SgfTree, int]:
63 """
64 Parse a tree from the content string starting at index.
65
66 Args:
67 content: The SGF content string
68 index: The starting index for parsing
69
70 Returns:
71 tuple: A tuple containing the parsed SgfTree and the next index to parse
72 """
73 # Edge Case: Content doesn't start with ';'
74 if index >= len(content) or content[index] != ';':
75 raise ValueError("tree with no nodes")
76
77 index += 1 # Skip the ';'
78
79 # Parse properties
80 properties = {}
81 while index < len(content) and content[index] not in ['(', ')', ';']:
82 # Check if we have a valid property key (must be uppercase letters)
83 if not content[index].isupper():
84 # If it's not an uppercase letter, it's either a delimiter issue or lowercase property
85 if content[index].isalpha():
86 # It's a lowercase letter, so it's a property case issue
87 raise ValueError("property must be in uppercase")
88 else:
89 # It's not a letter at all, so it's a delimiter issue
90 raise ValueError("properties without delimiter")
91
92 # Parse key
93 key_start = index
94 while index < len(content) and content[index].isupper():
95 index += 1
96
97 key = content[key_start:index]
98
99 # Parse values
100 values = []
101 value_found = False
102 while index < len(content) and content[index] == '[':
103 value_found = True
104 index += 1 # Skip '['
105 value_start = index
106
107 # Parse value, handling escapes
108 while index < len(content) and content[index] != ']':
109 if content[index] == '\\':
110 index += 2 # Skip escape character and the escaped character
111 else:
112 index += 1
113
114 # Edge Case: Unclosed value bracket
115 if index >= len(content) or content[index] != ']':
116 raise ValueError("properties without delimiter")
117
118 value = content[value_start:index]
119 # Process the value according to SGF text type rules
120 value = _process_text(value)
121 values.append(value)
122 index += 1 # Skip ']'
123
124 # Edge Case: No values found for a key
125 if not value_found:
126 raise ValueError("properties without delimiter")
127
128 properties[key] = values
129
130 # Parse children
131 children = []
132
133 # Handle immediate children (nodes that follow without parentheses)
134 if index < len(content) and content[index] == ';':
135 child, index = _parse_tree(content, index)
136 children.append(child)
137
138 # Handle variations (parenthesized branches)
139 while index < len(content) and content[index] == '(':
140 index += 1 # Skip '('
141 child, index = _parse_tree(content, index)
142 children.append(child)
143 # Skip ')'
144 if index < len(content) and content[index] == ')':
145 index += 1
146 else:
147 # This should not happen in a well-formed SGF, but just in case
148 raise ValueError("tree missing")
149
150 return SgfTree(properties, children), index
151 # Handled Edge Cases: Content not starting with ';', no key found, key not uppercase, unclosed value bracket, no values for key
152
153
154def _process_text(text: str) -> str:
155 """
156 Process text according to SGF Text type rules.
157
158 Args:
159 text: The raw text to process
160
161 Returns:
162 str: The processed text
163 """
164 result = []
165 i = 0
166 while i < len(text):
167 if text[i] == '\\':
168 # Handle escape character
169 i += 1
170 if i < len(text):
171 char = text[i]
172 if char == '\n':
173 # Newlines are removed if they come immediately after a \
174 pass # Remove the newline
175 elif char.isspace():
176 # Any whitespace character after \\ follows the normal rules
177 if char == '\n':
178 # Newlines are removed if they come immediately after a \
179 pass
180 else:
181 # Other whitespace converted to space
182 result.append(' ')
183 else:
184 # Any non-whitespace character after \\ is inserted as-is
185 result.append(char)
186 # If there's no character after \\ at end of string, just ignore the \\
187 else:
188 char = text[i]
189 if char == '\n':
190 # Regular newlines are kept
191 result.append(char)
192 elif char.isspace() and char != '\n':
193 # All other whitespace converted to space
194 result.append(' ')
195 else:
196 result.append(char)
197 i += 1
198
199 return ''.join(result)
200 """
201 Process text according to SGF Text type rules.
202
203 Args:
204 text: The raw text to process
205
206 Returns:
207 str: The processed text
208 """
209 result = []
210 i = 0
211 while i < len(text):
212 if text[i] == '\\':
213 # Handle escape character
214 i += 1
215 if i < len(text):
216 char = text[i]
217 if char == '\n':
218 # Newlines are removed if they come immediately after a \
219 pass # Remove the newline
220 elif char.isspace():
221 # Any whitespace character after \\ follows the normal rules
222 if char == '\n':
223 # Newlines are removed if they come immediately after a \
224 pass
225 else:
226 # Other whitespace converted to space
227 result.append(' ')
228 else:
229 # Any non-whitespace character after \\ is inserted as-is
230 result.append(char)
231 # If there's no character after \\ at end of string, just ignore the \\
232 else:
233 char = text[i]
234 if char == '\n':
235 # Regular newlines are kept
236 result.append(char)
237 elif char.isspace() and char != '\n':
238 # All other whitespace converted to space
239 result.append(' ')
240 else:
241 result.append(char)
242 i += 1
243
244 return ''.join(result)
245 """
246 Process text according to SGF Text type rules.
247
248 Args:
249 text: The raw text to process
250
251 Returns:
252 str: The processed text
253 """
254 result = []
255 i = 0
256 while i < len(text):
257 if text[i] == '\\':
258 # Handle escape character
259 i += 1
260 if i < len(text):
261 char = text[i]
262 if char == 'n' and (i + 1 >= len(text) or text[i+1] != '\\'):
263 # Special case: \n at end of string is treated as newline
264 # But according to SGF spec, newlines are removed if they come immediately after \
265 # However, standalone \n should be preserved as the characters '\' and 'n'
266 # Let's re-read the spec: "Newlines are removed if they come immediately after a \, otherwise they remain as newlines."
267 # So \n should become just n if the n is actually a newline, but \n (two chars) should stay \n
268 # But in our case, we're processing the content inside [...], so \n is literally two characters
269 # According to: "Any non-whitespace character after \ is inserted as-is."
270 result.append(char)
271 elif char == '\n':
272 # This is an actual newline character
273 # Edge Case: Newline immediately after backslash should be removed
274 pass # Remove the newline
275 elif char.isspace():
276 # Any whitespace character after \ follows the normal rules
277 if char == ' ':
278 result.append(' ')
279 elif char == '\n':
280 # Edge Case: Newline immediately after backslash should be removed
281 pass
282 else:
283 # Other whitespace converted to space
284 result.append(' ')
285 else:
286 # Any non-whitespace character after \ is inserted as-is
287 result.append(char)
288 # If there's no character after \ at end of string, just ignore the \
289 else:
290 char = text[i]
291 if char == '\n':
292 # Regular newlines are kept
293 result.append(char)
294 elif char.isspace() and char != '\n':
295 # All other whitespace converted to space
296 result.append(' ')
297 else:
298 result.append(char)
299 i += 1
300
301 return ''.join(result)
302 # Handled Edge Cases: Escape sequences, newlines after backslashes, whitespace conversion
Test NameStatus
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.