@ngel

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Missing opening parenthesis
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Missing closing parenthesis
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Missing semicolon after opening parenthesis
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree_content(content)
65
66
67def _parse_tree_content(content: str) -> SgfTree:
68 """
69 Parse the content inside the parentheses of an SGF tree.
70
71 Args:
72 content: The content inside the parentheses
73
74 Returns:
75 SgfTree: The parsed SGF tree
76 """
77 # Skip the initial semicolon
78 index = 1
79
80 # Parse properties of the current node
81 properties = {}
82 while index < len(content) and content[index] not in '(;)' and content[index] != ')':
83 # Parse a property
84 prop_start = index
85
86 # Edge Case: Property key must be uppercase
87 while index < len(content) and content[index].isalpha():
88 if not content[index].isupper():
89 raise ValueError("property must be in uppercase")
90 index += 1
91
92 # Edge Case: Empty property key
93 if index == prop_start:
94 raise ValueError("properties without delimiter")
95
96 key = content[prop_start:index]
97
98 # Edge Case: Missing opening bracket
99 if index >= len(content) or content[index] != '[':
100 raise ValueError("properties without delimiter")
101
102 # Parse values
103 values = []
104 while index < len(content) and content[index] == '[':
105 index += 1 # Skip opening bracket
106 value_start = index
107
108 # Parse value content, handling escapes
109 value_chars = []
110 while index < len(content):
111 if content[index] == ']':
112 # End of value
113 break
114 elif content[index] == '\\':
115 # Handle escape character
116 index += 1
117 if index >= len(content):
118 raise ValueError("properties without delimiter")
119
120 # Any character after \ is inserted as-is, including ]
121 char = content[index]
122 # Apply whitespace conversion rules even for escaped characters
123 if char == 'n' and index >= 1 and content[index-1] == '\\':
124 # Remove actual newlines that come immediately after \
125 # This handles both literal \n in string and actual newlines
126 index += 1
127 continue
128 if char == ']':
129 # Escaped ] should be treated as literal ] and end current value
130 value_chars.append(char)
131 index += 1
132 break # End this value and move to next property
133 elif char in ' \t\r':
134 char = ' '
135 value_chars.append(char)
136 index += 1
137 else:
138 value_chars.append(char)
139 index += 1
140 else:
141 # Normal character
142 char = content[index]
143 # All whitespace characters other than newline are converted to spaces
144 if char in ' \t\r':
145 char = ' '
146 value_chars.append(char)
147 index += 1
148
149 # Edge Case: Missing closing bracket
150 if index >= len(content) or content[index] != ']':
151 raise ValueError("properties without delimiter")
152
153 value = ''.join(value_chars)
154 values.append(value)
155 index += 1 # Skip closing bracket
156
157 properties[key] = values
158
159 # Parse children
160 children = []
161
162 # Handle semicolon shorthand for single child nodes
163 if index < len(content) and content[index] == ';':
164 # This is a shorthand for a single child node
165 child_content = content[index:] # Include the semicolon
166 child_tree = _parse_tree_content(child_content)
167 children.append(child_tree)
168 index = len(content) # We've consumed all content
169
170 # Handle explicit children in parentheses
171 while index < len(content) and content[index] == '(':
172 # Find the matching closing parenthesis for this child
173 paren_count = 1
174 start = index
175 index += 1
176
177 while index < len(content) and paren_count > 0:
178 if content[index] == '(':
179 paren_count += 1
180 elif content[index] == ')':
181 paren_count -= 1
182 index += 1
183
184 # Edge Case: Mismatched parentheses
185 if paren_count != 0:
186 raise ValueError("tree missing")
187
188 # Parse the child tree
189 child_content = content[start+1:index-1]
190
191 # Edge Case: Empty child
192 if not child_content:
193 raise ValueError("tree with no nodes")
194
195 # Edge Case: Missing semicolon in child
196 if not child_content.startswith(';'):
197 raise ValueError("tree missing")
198
199 child_tree = _parse_tree_content(child_content)
200 children.append(child_tree)
201
202 # Edge Case: Unexpected characters after properties and children
203 if index < len(content):
204 raise ValueError("tree missing")
205
206 return SgfTree(properties, children)
207 # Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis,
208 # empty tree, missing semicolon after opening parenthesis, property key not uppercase,
209 # empty property key, missing opening bracket, missing closing bracket, mismatched parentheses,
210 # empty child, missing semicolon in child, unexpected characters after properties and children
Test NameStatus
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.