xas

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is malformed or doesn't follow SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Parse the content inside the outermost parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Content doesn't start with ';'
61 if not content.startswith(';'):
62 raise ValueError("tree with no nodes")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse a tree from its content string.
70
71 Args:
72 content: The content of the tree (without outer parentheses)
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Edge Case: Empty content
78 if not content:
79 raise ValueError("tree with no nodes")
80
81 # Edge Case: Content doesn't start with ';'
82 if not content.startswith(';'):
83 raise ValueError("tree with no nodes")
84
85 # Find the end of the current node's properties
86 i = 1 # Skip the initial ';'
87 properties = {}
88
89 # Parse properties of the current node
90 while i < len(content) and content[i] != '(' and content[i] != ';':
91 # Parse property key
92 key_start = i
93 while i < len(content) and content[i].isalpha():
94 i += 1
95
96 # Edge Case: Property key is not in uppercase
97 key = content[key_start:i]
98 if not key.isupper():
99 raise ValueError("property must be in uppercase")
100
101 # Edge Case: No property values
102 if i >= len(content) or content[i] != '[':
103 raise ValueError("properties without delimiter")
104
105 # Parse property values
106 values = []
107 while i < len(content) and content[i] == '[':
108 i += 1 # Skip '['
109 value_start = i
110
111 # Parse value, handling escapes
112 while i < len(content):
113 if content[i] == ']':
114 # Check if this ] is escaped
115 # Count consecutive backslashes immediately before this position
116 backslash_count = 0
117 j = i - 1
118 while j >= value_start and content[j] == '\\':
119 backslash_count += 1
120 j -= 1
121
122 # If odd number of backslashes, this ] is escaped
123 if backslash_count % 2 == 1:
124 i += 1 # Skip the escaped ] and continue
125 else:
126 break # This is the real closing bracket
127 elif content[i] == '\\':
128 i += 1 # Skip escape character
129 if i < len(content):
130 i += 1 # Skip the escaped character
131 else:
132 i += 1
133
134 # Edge Case: Unclosed property value
135 if i >= len(content) or content[i] != ']':
136 raise ValueError("properties without delimiter")
137
138 value = content[value_start:i]
139 values.append(_unescape_value(value))
140 i += 1 # Skip ']'
141
142 properties[key] = values
143
144 # Continue parsing if the next character is another property (alphabetic)
145 # This handles consecutive properties like A[...]B[...]
146 if i < len(content) and content[i].isalpha():
147 continue
148 else:
149 # Break if we encounter a structural element
150 break
151
152 # Parse children
153 children = []
154
155 # Handle multiple children
156 while i < len(content) and content[i] == '(':
157 # Find matching closing parenthesis
158 paren_count = 1
159 start = i + 1
160 i += 1
161
162 while i < len(content) and paren_count > 0:
163 if content[i] == '(':
164 paren_count += 1
165 elif content[i] == ')':
166 paren_count -= 1
167 i += 1
168
169 # Edge Case: Mismatched parentheses
170 if paren_count != 0:
171 raise ValueError("tree missing")
172
173 # Parse the child tree
174 child_content = content[start:i-1]
175 children.append(_parse_tree(child_content))
176
177 # Handle single child sequence (shorthand notation)
178 if i < len(content) and content[i] == ';':
179 # Parse the rest as a single child
180 child_content = content[i:]
181 children.append(_parse_tree(child_content))
182
183 return SgfTree(properties, children)
184
185
186def _unescape_value(value: str) -> str:
187 """
188 Unescape an SGF text value according to SGF specification.
189
190 Args:
191 value: The escaped SGF text value
192
193 Returns:
194 str: The unescaped value
195 """
196 result = []
197 i = 0
198 while i < len(value):
199 if value[i] == '\\':
200 i += 1
201 if i < len(value):
202 # If next character is newline, remove both \ and newline
203 if value[i] == '\n':
204 i += 1
205 else:
206 # Any non-whitespace character after \ is inserted as-is
207 # Any whitespace character after \ is converted to space
208 if value[i].isspace() and value[i] != '\n':
209 result.append(' ')
210 i += 1
211 else:
212 result.append(value[i])
213 i += 1
214 else:
215 # Handle trailing backslash
216 result.append('\\')
217 else:
218 # All whitespace characters other than newline are converted to spaces
219 if value[i].isspace() and value[i] != '\n':
220 result.append(' ')
221 else:
222 result.append(value[i])
223 i += 1
224
225 return ''.join(result)
226
227# Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')',
228# Empty tree with no nodes, Content doesn't start with ';', Property key is not in uppercase,
229# No property values, Unclosed property value, Mismatched parentheses
Test NameStatus
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.