chris

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or input that doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Input that starts with '(' but doesn't end with ')'
46 if not input_string.endswith(')'):
47 raise ValueError("tree missing")
48
49 # Remove the outer parentheses
50 content = input_string[1:-1]
51
52 # Edge Case: Empty content after removing parentheses
53 if not content:
54 raise ValueError("tree with no nodes")
55
56 # Edge Case: Content that doesn't start with ';'
57 if not content.startswith(';'):
58 raise ValueError("tree with no nodes")
59
60 return _parse_tree(content)
61
62
63def _parse_tree(content: str) -> SgfTree:
64 """
65 Parse a tree from content string.
66
67 Args:
68 content: Content string starting with ';'
69
70 Returns:
71 SgfTree: The parsed tree
72 """
73 # Skip the initial ';'
74 index = 1
75
76 # Parse properties
77 properties = {}
78 while index < len(content) and content[index] not in '();':
79 # Parse key
80 key_start = index
81 while index < len(content) and content[index].isalpha():
82 index += 1
83
84 # Edge Case: Key is not all uppercase
85 key = content[key_start:index]
86 if not key.isupper():
87 raise ValueError("property must be in uppercase")
88
89 # Edge Case: Key without values
90 if index >= len(content) or content[index] != '[':
91 raise ValueError("properties without delimiter")
92
93 # Parse values
94 values = []
95 while index < len(content) and content[index] == '[':
96 index += 1 # Skip '['
97 value_start = index
98
99 # Parse value
100 while index < len(content) and content[index] != ']':
101 if content[index] == '\\':
102 index += 2 # Skip escape character and the next character
103 else:
104 index += 1
105
106 # Edge Case: Unclosed value bracket
107 if index >= len(content):
108 raise ValueError("properties without delimiter")
109
110 # Edge Case: Missing closing bracket
111 if content[index] != ']':
112 raise ValueError("properties without delimiter")
113
114 value = content[value_start:index]
115 values.append(_unescape_value(value))
116 index += 1 # Skip ']'
117
118 properties[key] = values
119
120 # Parse children
121 children = []
122 while index < len(content) and content[index] == '(':
123 # Find the matching closing parenthesis
124 paren_count = 1
125 start = index
126 index += 1
127
128 while index < len(content) and paren_count > 0:
129 if content[index] == '(':
130 paren_count += 1
131 elif content[index] == ')':
132 paren_count -= 1
133 index += 1
134
135 # Edge Case: Unclosed parenthesis
136 if paren_count > 0:
137 raise ValueError("tree missing")
138
139 # Parse the child tree
140 child_content = content[start:index]
141 child_tree = parse(child_content)
142 children.append(child_tree)
143
144 return SgfTree(properties, children)
145
146
147def _unescape_value(value: str) -> str:
148 """
149 Unescape SGF text value according to SGF specification.
150
151 Args:
152 value: The escaped SGF text value
153
154 Returns:
155 str: The unescaped value
156 """
157 result = []
158 i = 0
159 while i < len(value):
160 if value[i] == '\\':
161 i += 1
162 if i < len(value):
163 # If next character is newline, remove both \ and newline
164 if value[i] == '\n':
165 i += 1
166 continue
167 # Any character after \ is inserted as-is
168 result.append(value[i])
169 i += 1
170 else:
171 # Edge Case: Trailing backslash
172 result.append('\\')
173 i += 1
174 else:
175 # Convert whitespace (except newline) to spaces
176 if value[i] != '\n' and value[i].isspace():
177 result.append(' ')
178 else:
179 result.append(value[i])
180 i += 1
181
182 return ''.join(result)
183
184# Handled Edge Cases: Empty input, missing outer parentheses, empty content after parentheses removal, missing semicolon, key not in uppercase, key without values, unclosed value bracket, missing closing bracket, unclosed parenthesis, trailing backslash
Test NameStatus
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_parentheses_in_property_value_don_t_need_to_be_escaped
Fail
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Fail
test_two_nodes
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_properties_without_delimiter
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.