future

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is malformed or doesn't follow SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty content after removing parentheses
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Parse the tree
61 tree, _ = _parse_tree(content, 0)
62 return tree
63
64
65def _parse_tree(content: str, index: int) -> tuple[SgfTree, int]:
66 """
67 Parse a tree from the content string starting at index.
68
69 Args:
70 content: The SGF content string
71 index: The starting index to parse from
72
73 Returns:
74 tuple: A tuple containing the parsed SgfTree and the next index to parse from
75 """
76 # Edge Case: Unexpected end of content
77 if index >= len(content):
78 raise ValueError("tree with no nodes")
79
80 # For shorthand notation, we might already be at a semicolon
81 # For variations, we start parsing properties immediately
82 if content[index] == ';':
83 index += 1 # Skip the ';'
84
85 # Parse properties of the current node
86 properties = {}
87 while index < len(content) and content[index] not in '();':
88 # Parse key - must be uppercase letters only
89 key_start = index
90 while index < len(content) and content[index].isalpha():
91 if not content[index].isupper():
92 raise ValueError("property must be in uppercase")
93 index += 1
94
95 # Edge Case: Key is empty
96 if index == key_start:
97 raise ValueError("properties without delimiter")
98
99 key = content[key_start:index]
100
101 # Parse values
102 values = []
103 while index < len(content) and content[index] == '[':
104 index += 1 # Skip '['
105 value_start = index
106
107 # Parse value, handling escapes
108 while index < len(content):
109 if content[index] == ']':
110 # Check if this ] is escaped by looking back
111 escaped = False
112 backslash_count = 0
113 check_index = index - 1
114 while check_index >= value_start and content[check_index] == '\\':
115 backslash_count += 1
116 check_index -= 1
117 # If odd number of backslashes, the ] is escaped
118 if backslash_count % 2 == 1:
119 escaped = True
120
121 if not escaped:
122 # Found closing bracket that's not escaped, end of value
123 break
124 else:
125 # This ] is escaped, continue parsing
126 index += 1
127 else:
128 index += 1
129
130 # Edge Case: Unclosed value bracket
131 if index >= len(content) or content[index] != ']':
132 raise ValueError("properties without delimiter")
133
134 value = _unescape_text(content[value_start:index])
135 values.append(value)
136 index += 1 # Skip ']'
137
138 # Edge Case: No values for property
139 if not values:
140 raise ValueError("properties without delimiter")
141
142 properties[key] = values
143
144 # Parse children - handle both variations (parentheses) and shorthand (semicolon)
145 children = []
146 while index < len(content):
147 if content[index] == '(':
148 # Handle variation: parse child tree in parentheses
149 child, index = _parse_tree(content, index+1)
150 children.append(child)
151
152 # Skip the closing ')'
153 if index >= len(content) or content[index] != ')':
154 raise ValueError("tree missing")
155 index += 1
156 elif content[index] == ';':
157 # Handle shorthand: parse consecutive node as child
158 child, index = _parse_tree(content, index+1)
159 children.append(child)
160 else:
161 # End of this node's children
162 break
163
164 return SgfTree(properties, children), index
165
166
167def _unescape_text(text: str) -> str:
168 """
169 Unescape SGF text according to SGF specification.
170
171 Args:
172 text: The escaped text
173
174 Returns:
175 str: The unescaped text
176 """
177 result = []
178 i = 0
179 while i < len(text):
180 if text[i] == '\\':
181 i += 1
182 # Edge Case: Escape at end of string
183 if i >= len(text):
184 # According to SGF spec, a \ at the end of a value just disappears
185 break
186
187 char = text[i]
188 # Newlines are removed if they come immediately after a \
189 if char == '\n':
190 # Do nothing, skip the newline
191 pass
192 # All whitespace characters other than newline are converted to spaces
193 elif char.isspace() and char != '\n':
194 result.append(' ')
195 # Any non-whitespace character after \ is inserted as-is
196 else:
197 result.append(char)
198 else:
199 # Regular character - convert whitespace other than newline to spaces
200 if text[i].isspace() and text[i] != '\n':
201 result.append(' ')
202 else:
203 result.append(text[i])
204 i += 1
205
206 return ''.join(result)
207
208# Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty content after removing parentheses, Unexpected end of content, Node doesn't start with ';', Key is empty, Key is not uppercase, Unclosed value bracket, No values for property, Escape at end of string
Test NameStatus
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.