Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or input that doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Input that doesn't end with ')'
46 if not input_string.endswith(')'):
47 raise ValueError("tree missing")
48
49 # Remove the outer parentheses
50 content = input_string[1:-1]
51
52 # Edge Case: Empty content after removing parentheses
53 if not content:
54 raise ValueError("tree with no nodes")
55
56 # Parse the tree
57 tree, _ = _parse_tree(content, 0)
58 return tree
59
60
61def _parse_tree(content: str, index: int) -> tuple[SgfTree, int]:
62 """
63 Parse a tree from the content string starting at index.
64
65 Args:
66 content: The SGF content string
67 index: The starting index to parse from
68
69 Returns:
70 tuple: A tuple containing the parsed SgfTree and the next index to parse from
71 """
72 # Edge Case: Missing semicolon at the beginning of a node
73 if index >= len(content) or content[index] != ';':
74 raise ValueError("tree with no nodes")
75
76 index += 1 # Skip the semicolon
77
78 # Parse properties of the current node
79 properties = {}
80 while index < len(content) and content[index] not in '();':
81 # Parse key
82 key_start = index
83 while index < len(content) and content[index].isalpha() and content[index].isupper():
84 index += 1
85
86 # Edge Case: Key is not all uppercase
87 if index == key_start:
88 raise ValueError("property must be in uppercase")
89
90 key = content[key_start:index]
91
92 # Edge Case: Missing opening bracket for property value
93 if index >= len(content) or content[index] != '[':
94 raise ValueError("properties without delimiter")
95
96 # Parse values
97 values = []
98 while index < len(content) and content[index] == '[':
99 index += 1 # Skip the opening bracket
100 value, index = _parse_value(content, index)
101
102 # Edge Case: Missing closing bracket
103 if index >= len(content) or content[index] != ']':
104 raise ValueError("properties without delimiter")
105
106 index += 1 # Skip the closing bracket
107 values.append(value)
108
109 properties[key] = values
110
111 # Parse children
112 children = []
113 while index < len(content) and content[index] == '(':
114 child, index = _parse_tree(content, index + 1) # Skip the opening parenthesis
115
116 # Skip the closing parenthesis
117 # Edge Case: Missing closing parenthesis for child
118 if index >= len(content) or content[index] != ')':
119 raise ValueError("tree missing")
120
121 index += 1 # Skip the closing parenthesis
122 children.append(child)
123
124 return SgfTree(properties, children), index
125
126
127def _parse_value(content: str, index: int) -> tuple[str, int]:
128 """
129 Parse a property value according to SGF Text type rules.
130
131 Args:
132 content: The SGF content string
133 index: The starting index to parse from
134
135 Returns:
136 tuple: A tuple containing the parsed value string and the next index to parse from
137 """
138 result = []
139 while index < len(content) and content[index] != ']':
140 if content[index] == '\\':
141 # Handle escape character
142 index += 1
143 # Edge Case: Escape at end of value
144 if index >= len(content) or content[index] == ']':
145 raise ValueError("properties without delimiter")
146
147 char = content[index]
148 if char == 'n' and (index + 1 >= len(content) or content[index + 1] == ']'):
149 # Special case: escaped 'n' at end is just 'n'
150 result.append('n')
151 elif char == 'n' and index + 1 < len(content) and content[index + 1] != ']':
152 # Check if it's actually \n followed by more content
153 result.append('\\n')
154 elif char == 't' and (index + 1 >= len(content) or content[index + 1] == ']'):
155 # Special case: escaped 't' at end is just 't'
156 result.append('t')
157 elif char == 't' and index + 1 < len(content) and content[index + 1] != ']':
158 # Check if it's actually \t followed by more content
159 result.append('\\t')
160 elif char in ' \n\t':
161 # Whitespace after escape
162 if char == ' ':
163 result.append(' ')
164 elif char == '\n':
165 # Newlines are removed if they come immediately after a \
166 pass
167 elif char == '\t':
168 result.append(' ')
169 else:
170 # Any non-whitespace character after \ is inserted as-is
171 result.append(char)
172 elif content[index] == '\n':
173 # Newlines remain as newlines
174 result.append('\n')
175 elif content[index] in ' \t':
176 # All whitespace characters other than newline are converted to spaces
177 result.append(' ')
178 else:
179 result.append(content[index])
180 index += 1
181
182 return ''.join(result), index
183# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, missing semicolon, key not uppercase, missing opening bracket, missing closing bracket, missing closing parenthesis for child, escape at end of value
Test NameStatus
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_parentheses_in_property_value_don_t_need_to_be_escaped
Fail
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Fail
test_two_nodes
Fail
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_properties_without_delimiter
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.