Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or input that doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Input that doesn't end with ')'
46 if not input_string.endswith(')'):
47 raise ValueError("tree missing")
48
49 # Remove the outer parentheses
50 content = input_string[1:-1]
51
52 # Edge Case: Empty content after removing parentheses
53 if not content:
54 raise ValueError("tree with no nodes")
55
56 # Edge Case: Content that doesn't start with ';'
57 if not content.startswith(';'):
58 raise ValueError("tree missing")
59
60 return _parse_tree(content)
61
62
63def _parse_tree(content: str) -> SgfTree:
64 """
65 Parse a tree from SGF content.
66
67 Args:
68 content: SGF content starting with ';'
69
70 Returns:
71 SgfTree: Parsed tree
72 """
73 # Skip the initial ';'
74 content = content[1:]
75
76 # Parse properties of the current node
77 properties, remaining = _parse_properties(content)
78
79 children = []
80
81 # Parse children if any
82 while remaining:
83 if remaining.startswith('('):
84 # Parse variation (child in parentheses)
85 # Find the matching closing parenthesis
86 balance = 1
87 i = 1 # Start after the opening '('
88 while i < len(remaining) and balance > 0:
89 if remaining[i] == '(':
90 balance += 1
91 elif remaining[i] == ')':
92 balance -= 1
93 i += 1
94
95 # Edge Case: Unmatched parentheses
96 if balance != 0:
97 raise ValueError("tree missing")
98
99 # Parse the child tree
100 child_content = remaining[1:i-1]
101 child_tree = _parse_tree(child_content)
102 children.append(child_tree)
103
104 # Move to the next potential child
105 remaining = remaining[i:]
106 elif remaining.startswith(';'):
107 # Parse sequential node (child after semicolon)
108 child_tree = _parse_tree(remaining)
109 children.append(child_tree)
110 remaining = '' # All remaining content consumed by child
111 else:
112 # Invalid content - not a variation or sequential node
113 break
114
115 return SgfTree(properties, children)
116
117
118def _parse_properties(content: str) -> tuple[dict, str]:
119 """
120 Parse properties from SGF content.
121
122 Args:
123 content: SGF content containing properties
124
125 Returns:
126 tuple: (properties dict, remaining content)
127 """
128 properties = {}
129 remaining = content
130
131 while remaining and remaining[0].isalpha():
132 # Parse property key
133 key_end = 0
134 while key_end < len(remaining) and remaining[key_end].isalpha():
135 key_end += 1
136
137 key = remaining[:key_end]
138
139 # Edge Case: Property key is not uppercase
140 if key != key.upper():
141 raise ValueError("property must be in uppercase")
142
143 remaining = remaining[key_end:]
144
145 # Edge Case: Missing opening bracket for property value
146 if not remaining.startswith('['):
147 raise ValueError("properties without delimiter")
148
149 values = []
150
151 # Parse all values for this key
152 while remaining.startswith('['):
153 remaining = remaining[1:] # Skip '['
154
155 # Parse value
156 value, remaining = _parse_value(remaining)
157
158 # Edge Case: Missing closing bracket
159 if not remaining.startswith(']'):
160 raise ValueError("properties without delimiter")
161
162 remaining = remaining[1:] # Skip ']'
163 values.append(value)
164
165 properties[key] = values
166
167 return properties, remaining
168
169
170def _parse_value(content: str) -> tuple[str, str]:
171 """
172 Parse a value from SGF content, handling escape sequences.
173
174 Args:
175 content: SGF content starting with the value
176
177 Returns:
178 tuple: (parsed value, remaining content)
179 """
180 value = []
181 i = 0
182
183 while i < len(content):
184 char = content[i]
185
186 if char == ']':
187 # End of value
188 break
189 elif char == '\\':
190 # Handle escape sequence
191 i += 1
192 if i >= len(content):
193 # Edge Case: Escape character at end of content
194 value.append('\\')
195 break
196
197 next_char = content[i]
198 if next_char == '\n':
199 # Newline is removed if immediately after \, but we add a space
200 value.append(' ')
201 elif next_char in ' \t\r':
202 # Whitespace after escape converted to space
203 value.append(' ')
204 else:
205 # Any non-whitespace character after \ is inserted as-is
206 value.append(next_char)
207 elif char in ' \t\r':
208 # All whitespace characters other than newline are converted to spaces
209 value.append(' ')
210 elif char == '\n':
211 # Newlines remain as newlines unless immediately after \
212 value.append(char)
213 else:
214 # Regular character
215 value.append(char)
216
217 i += 1
218
219 return ''.join(value), content[i:]
220
221# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty content after parentheses, missing semicolon, unmatched parentheses, property key not uppercase, missing opening bracket, missing closing bracket, escape character at end of content\n
Test NameStatus
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.