Konoha

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF (Smart Game Format) string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input string is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Doesn't end with ')'
46 if not input_string.endswith(')'):
47 raise ValueError("tree missing")
48
49 # Remove the outer parentheses
50 content = input_string[1:-1]
51
52 # Edge Case: Empty content after removing parentheses
53 if not content:
54 raise ValueError("tree with no nodes")
55
56 return _parse_tree(content)
57
58
59def _parse_tree(content: str) -> SgfTree:
60 """
61 Parse the content of a tree (without outer parentheses).
62
63 Args:
64 content: The content inside the outer parentheses
65
66 Returns:
67 SgfTree: A tree structure representing the content
68 """
69 # Find the first node
70 if not content.startswith(';'):
71 raise ValueError("tree with no nodes")
72
73 # Parse the first node
74 node_end = 1 # Start after the ';'
75 properties = {}
76
77 # Parse properties of the first node
78 while node_end < len(content) and content[node_end] != '(' and content[node_end] != ';':
79 # Parse key
80 key_start = node_end
81 while node_end < len(content) and content[node_end].isalpha():
82 node_end += 1
83
84 # Edge Case: Key is not all uppercase
85 key = content[key_start:node_end]
86 if not key.isupper():
87 raise ValueError("property must be in uppercase")
88
89 # Edge Case: No key found
90 if not key:
91 raise ValueError("properties without delimiter")
92
93 # Parse values
94 values = []
95 while node_end < len(content) and content[node_end] == '[':
96 node_end += 1 # Skip '['
97 value_start = node_end
98
99 # Parse value, handling escapes
100 while node_end < len(content) and content[node_end] != ']':
101 if content[node_end] == '\\':
102 node_end += 2 # Skip escape and next character
103 else:
104 node_end += 1
105
106 # Edge Case: Unclosed bracket
107 if node_end >= len(content) or content[node_end] != ']':
108 raise ValueError("properties without delimiter")
109
110 value = content[value_start:node_end]
111 # Process the value according to SGF Text type rules
112 value = _process_text(value)
113 values.append(value)
114 node_end += 1 # Skip ']'
115
116 # Edge Case: No values for a key
117 if not values:
118 raise ValueError("properties without delimiter")
119
120 properties[key] = values
121
122 children = []
123
124 # Parse children
125 while node_end < len(content) and content[node_end] == '(':
126 # Find matching closing parenthesis
127 paren_count = 1
128 child_start = node_end + 1
129 node_end += 1
130
131 while node_end < len(content) and paren_count > 0:
132 if content[node_end] == '(':
133 paren_count += 1
134 elif content[node_end] == ')':
135 paren_count -= 1
136 node_end += 1
137
138 # Edge Case: Unmatched parenthesis
139 if paren_count != 0:
140 raise ValueError("tree missing")
141
142 child_content = content[child_start:node_end-1]
143 children.append(_parse_tree(child_content))
144
145 # Check for additional nodes at the same level
146 if node_end < len(content) and content[node_end] == ';':
147 # This is a shorthand - the next node is a child of the current node
148 remaining_content = content[node_end:]
149 children.append(_parse_tree(remaining_content))
150
151 return SgfTree(properties, children)
152
153
154def _process_text(text: str) -> str:
155 """
156 Process text according to SGF Text type rules.
157
158 Args:
159 text: Raw text value from SGF
160
161 Returns:
162 str: Processed text
163 """
164 result = []
165 i = 0
166 while i < len(text):
167 if text[i] == '\\':
168 # Handle escape character
169 i += 1
170 if i < len(text):
171 char = text[i]
172 if char == 'n' and (i+1 >= len(text) or text[i+1] != ']'):
173 # Special case: \n not followed by ] is treated as newline
174 # But newlines are removed if they come immediately after \
175 # So we check if there's a newline
176 if i+1 < len(text) and text[i+1] == '\n':
177 i += 1 # Skip the actual newline
178 # Otherwise just add 'n'
179 result.append(char)
180 elif char == '\n':
181 # Newline immediately after \ is removed
182 pass
183 elif char.isspace():
184 # Whitespace after \ follows normal rules
185 if char != '\n':
186 result.append(' ')
187 else:
188 # Any non-whitespace character after \ is inserted as-is
189 result.append(char)
190 # If escape is at end of string, just ignore it
191 else:
192 char = text[i]
193 if char == '\n':
194 # Newlines remain as newlines unless immediately after \
195 result.append(char)
196 elif char.isspace() and char != '\n':
197 # All whitespace other than newline converted to spaces
198 result.append(' ')
199 else:
200 result.append(char)
201 i += 1
202
203 return ''.join(result)
204# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty content after removing parentheses, missing semicolon for node, key not uppercase, no key found, unclosed bracket, no values for key, unmatched parenthesis
205# Handled Edge Cases: Key not uppercase, no key found, unclosed bracket, no values for key
206# Handled Edge Cases: Empty input or doesn't start with '(', doesn't end with ')', empty content after removing parentheses, missing semicolon for node, unmatched parenthesis, additional nodes at same level
Test NameStatus
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.