yahoo

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Tree doesn't start with ';'
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without outer parentheses).
70
71 Args:
72 content: The content of the tree
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Edge Case: Content is empty
78 if not content:
79 raise ValueError("tree with no nodes")
80
81 # Edge Case: Content doesn't start with ';'
82 if not content.startswith(';'):
83 raise ValueError("tree missing")
84
85 # Find the first node and any subsequent variations
86 i = 1 # Skip the initial ';'
87
88 # Parse properties of the first node
89 properties = {}
90 while i < len(content) and content[i] != '(':
91 # Check if we've reached the start of a new node (semicolon not in property value)
92 if content[i] == ';':
93 # This is the start of a new node, stop parsing properties for current node
94 # In SGF, semicolons always start new nodes, not separate properties within the same node
95 # Multiple properties in the same node are concatenated without semicolons
96 break
97 # Parse a property
98 # Find the property key
99 key_start = i
100 # Parse the key character by character to check for uppercase
101 while i < len(content) and content[i].isupper():
102 i += 1
103
104 # Edge Case: Property key is empty
105 if i == key_start:
106 # Check if we're at the end of properties section
107 if i < len(content) and content[i] in ';(':
108 # This is the end of properties, not an error
109 break
110 else:
111 # Invalid character at start of property - not uppercase and not a delimiter
112 raise ValueError("property must be in uppercase" if content[i].isalpha() and not content[i].isupper() else "properties without delimiter")
113
114 key = content[key_start:i]
115
116 # Edge Case: Property key is not in uppercase
117 # Check that all characters in the key are uppercase
118 if not all(c.isupper() for c in key):
119 raise ValueError("property must be in uppercase")
120
121 # Parse property values
122 values = []
123 while i < len(content) and content[i] == '[':
124 i += 1 # Skip '['
125 value_start = i
126
127 # Parse the value, handling escapes
128 value_chars = []
129 while i < len(content) and content[i] != ']':
130 if content[i] == '\\':
131 i += 1
132 if i >= len(content):
133 raise ValueError("properties without delimiter")
134
135 # Handle escaped characters according to SGF text rules
136 # The escape character \ is preserved, and the character after it is processed
137 char = content[i]
138 if char == '\n':
139 # Newlines are removed if they come immediately after a \
140 # The \ is preserved, but the newline is removed
141 value_chars.append('\\')
142 elif char in (' ', '\t', '\r'):
143 # All whitespace characters other than newline are converted to spaces
144 # The \ is preserved, and the whitespace is converted to space
145 value_chars.append('\\')
146 value_chars.append(' ')
147 else:
148 # Any non-whitespace character after \ is inserted as-is
149 # The \ is preserved, and the character is inserted as-is
150 value_chars.append('\\')
151 value_chars.append(char)
152 elif content[i] == '\n':
153 # Newlines remain as newlines unless escaped
154 value_chars.append(content[i])
155 elif content[i] in (' ', '\t', '\r'):
156 # All whitespace characters other than newline are converted to spaces
157 value_chars.append(' ')
158 else:
159 value_chars.append(content[i])
160 i += 1
161
162 # Edge Case: Unclosed property value
163 if i >= len(content) or content[i] != ']':
164 raise ValueError("properties without delimiter")
165
166 value = ''.join(value_chars)
167 values.append(value)
168 i += 1 # Skip ']'
169
170 # Edge Case: Property has no values
171 if not values:
172 raise ValueError("properties without delimiter")
173
174 properties[key] = values
175
176 # Parse children
177 children = []
178 while i < len(content) and content[i] == '(':
179 # Find the matching parenthesis
180 paren_count = 1
181 start = i + 1
182 i += 1
183
184 while i < len(content) and paren_count > 0:
185 if content[i] == '(':
186 paren_count += 1
187 elif content[i] == ')':
188 paren_count -= 1
189 i += 1
190
191 # Edge Case: Mismatched parentheses
192 if paren_count != 0:
193 raise ValueError("tree missing")
194
195 # Parse the subtree
196 subtree_content = content[start:i-1]
197 child = _parse_tree(subtree_content)
198 children.append(child)
199
200 # Handle shorthand notation - if there's a semicolon, it's a child node in shorthand
201 # In SGF, consecutive semicolons create a chain of nested children
202 if i < len(content) and content[i] == ';':
203 # Parse the rest as a chain of nested children
204 child_content = content[i:]
205 child = _parse_tree(child_content)
206 children.append(child)
207
208 return SgfTree(properties, children)
209
210# Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Tree doesn't start with ';', Content is empty, Content doesn't start with ';', Property key is empty, Property key is not in uppercase, Unclosed property value, Property has no values, Mismatched parentheses
Test NameStatus
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Fail
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.