Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Tree doesn't start with ';'
61 if not content.startswith(';'):
62 raise ValueError("tree with no nodes")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse a tree from its content string.
70
71 Args:
72 content: The content of the tree without outer parentheses
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Edge Case: Content is empty
78 if not content:
79 raise ValueError("tree with no nodes")
80
81 # Edge Case: Content doesn't start with ';'
82 if not content.startswith(';'):
83 raise ValueError("tree with no nodes")
84
85 i = 1 # Skip the initial ';'
86 properties = {}
87 children = []
88
89 # Parse properties
90 while i < len(content) and content[i] != '(' and content[i] != ';':
91 # Parse key
92 key_start = i
93 while i < len(content) and content[i].isupper():
94 i += 1
95
96 # Edge Case: Property key is not in uppercase
97 if i == key_start:
98 raise ValueError("property must be in uppercase")
99
100 key = content[key_start:i]
101
102 # Edge Case: Property has no values
103 if i >= len(content) or content[i] != '[':
104 raise ValueError("properties without delimiter")
105
106 # Parse values
107 values = []
108 while i < len(content) and content[i] == '[':
109 i += 1 # Skip '['
110 value_start = i
111
112 # Parse value with escape sequences
113 value_parts = []
114 while i < len(content):
115 if content[i] == ']':
116 # Check if this bracket is escaped
117 # Count consecutive backslashes immediately before this bracket
118 backslash_count = 0
119 j = i - 1
120 while j >= 0 and content[j] == '\\':
121 backslash_count += 1
122 j -= 1
123 # If odd number of backslashes, the bracket is escaped
124 is_escaped = backslash_count % 2 == 1
125
126 if not is_escaped:
127 # This is a real closing bracket, stop parsing this value
128 break
129 else:
130 # This bracket is escaped, add it to the value
131 value_parts.append(content[i])
132 i += 1
133 else:
134 # For any other character, add it to the value
135 value_parts.append(content[i])
136 i += 1
137
138 # Edge Case: Unclosed value bracket
139 if i >= len(content) or content[i] != ']':
140 raise ValueError("properties without delimiter")
141
142 value = ''.join(value_parts)
143 # Process SGF text type
144 value = _process_sgf_text(value)
145 values.append(value)
146 i += 1 # Skip ']'
147
148 properties[key] = values
149
150 # Parse children
151 while i < len(content) and content[i] == '(':
152 # Find matching parenthesis
153 paren_count = 1
154 start = i + 1
155 i += 1
156
157 while i < len(content) and paren_count > 0:
158 if content[i] == '(':
159 paren_count += 1
160 elif content[i] == ')':
161 paren_count -= 1
162 i += 1
163
164 # Edge Case: Unmatched parenthesis
165 if paren_count != 0:
166 raise ValueError("tree missing")
167
168 child_content = content[start:i-1]
169 children.append(_parse_tree(child_content))
170
171 # Handle shorthand notation (next node without parentheses)
172 if i < len(content) and content[i] == ';':
173 child_content = content[i:]
174 children.append(_parse_tree(child_content))
175
176 return SgfTree(properties, children)
177
178
179def _process_sgf_text(text: str) -> str:
180 """
181 Process SGF Text type according to SGF specification.
182
183 Args:
184 text: Raw text value from SGF
185
186 Returns:
187 str: Processed text
188 """
189 result = []
190 i = 0
191 while i < len(text):
192 if text[i] == '\\':
193 i += 1
194 # Edge Case: Escape at end of string
195 if i >= len(text):
196 break
197
198 # If next character is newline, remove both backslash and newline
199 if text[i] == '\n':
200 i += 1
201 continue
202 else:
203 # Any non-whitespace character after \ is inserted as-is
204 # Any whitespace character after \ is converted according to rules
205 char = text[i]
206 if char in ' \t\r':
207 # Convert whitespace to space
208 result.append(' ')
209 else:
210 result.append(char)
211 i += 1
212 else:
213 char = text[i]
214 if char == '\n':
215 result.append('\n')
216 elif char in ' \t\r':
217 # All whitespace other than newline converted to space
218 result.append(' ')
219 else:
220 result.append(char)
221 i += 1
222
223 return ''.join(result)
224# Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Tree doesn't start with ';', Content is empty, Content doesn't start with ';', Property key is not in uppercase, Property has no values, Unclosed value bracket, Escape at end of string, Unmatched parenthesis
225# Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Tree doesn't start with ';'
226# Handled Edge Cases: Content is empty, Content doesn't start with ';', Property key is not in uppercase, Property has no values, Unclosed value bracket, Escape at end of string, Unmatched parenthesis
227# Handled Edge Cases: Escape at end of string
Test NameStatus
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.