| 
					
				 | 
			
			
				@@ -10,44 +10,116 @@ header_pattern = re.compile(r"--- !u!(\d+) &(\S+)") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 # Regex to find and remove the tags for the parser 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 tag_remover_pattern = re.compile(r"!u!\d+\s") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def preprocess_unity_yaml(yaml_content): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    Preprocesses Unity YAML content to handle various edge cases that can break the parser. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    lines = yaml_content.split('\n') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    processed_lines = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    in_document = False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    for i, line in enumerate(lines): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # Check if we're starting a new document 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if line.startswith('---'): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            in_document = True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            processed_lines.append(line) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # Skip empty lines and comments 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if not line.strip() or line.strip().startswith('#'): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            processed_lines.append(line) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if in_document: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # Handle the case where a key starts at column 0 but should be indented 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # This often happens with Unity components like RectTransform, Transform, etc. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if ':' in line and not line.startswith(' ') and not line.startswith('\t'): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # Check if the previous line was a document separator or another component 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if i > 0 and not lines[i-1].startswith('---'): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    # Check if this looks like a Unity component name 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    component_match = re.match(r'^([A-Z][a-zA-Z0-9]*):$', line.strip()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    if component_match: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        # This is likely a component that should be a key under the main object 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        processed_lines.append(f"  {line.strip()}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # Handle empty key issue (:: or just :) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if line.strip().startswith(':') and 'Any' in line: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                processed_lines.append(line.replace(':', 'key_for_any:')) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # Handle cases where there might be invalid indentation after colons 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if ':' in line and not line.strip().endswith(':'): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # Check for malformed key-value pairs 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                parts = line.split(':', 1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if len(parts) == 2 and parts[1].strip() == '': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    # This is a key with no value, which is fine in YAML 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    processed_lines.append(line) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        processed_lines.append(line) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return '\n'.join(processed_lines) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def convert_unity_yaml_to_json(yaml_content): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     Parses a Unity YAML file string, preserving fileID references, and returns a JSON string. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     json_data = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-     
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # First, find all the original headers 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     headers = header_pattern.findall(yaml_content) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-     
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # Next, remove the problematic tags from the content so the parser doesn't fail 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # Remove the problematic tags from the content 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     sanitized_content = tag_remover_pattern.sub("", yaml_content) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-     
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # HACK: Unity sometimes generates invalid YAML with an empty key. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # We'll correct this before parsing. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    lines = sanitized_content.split('\n') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    corrected_lines = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    for line in lines: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if line.strip().startswith(':') and 'Any' in line: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            corrected_lines.append(line.replace(':', 'key_for_any:')) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            corrected_lines.append(line) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-     
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    corrected_content = "\n".join(corrected_lines) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # Use the standard SafeLoader, as the tags are now gone 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    documents = list(yaml.safe_load_all(corrected_content)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # Apply additional preprocessing to handle Unity-specific YAML issues 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    preprocessed_content = preprocess_unity_yaml(sanitized_content) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # The first document is the file info, which we can often skip if it's empty 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # Try to parse with safe_load_all 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        documents = list(yaml.safe_load_all(preprocessed_content)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    except yaml.YAMLError as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print(f"YAML parsing error: {e}", file=sys.stderr) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print("Attempting to parse each document separately...", file=sys.stderr) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # If that fails, try to split by document separators and parse each separately 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        document_parts = re.split(r'\n---[^\n]*\n', preprocessed_content) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        documents = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for i, part in enumerate(document_parts): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if not part.strip(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # Add a temporary document separator for parsing 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if i > 0:  # Skip the first part which might not need a separator 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    part = '---\n' + part 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                doc = yaml.safe_load(part) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if doc is not None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    documents.append(doc) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            except yaml.YAMLError as e2: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print(f"Failed to parse document {i}: {e2}", file=sys.stderr) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print(f"Document content preview: {part[:200]}...", file=sys.stderr) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # Skip this document and continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # Filter out None documents and empty string documents 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    documents = [doc for doc in documents if doc is not None and doc != ''] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # Remove the first document if it's just file info 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     if documents and isinstance(documents[0], str) and 'YAML' in documents[0]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         documents.pop(0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     if len(headers) != len(documents): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         print(f"Warning: Mismatch between headers found ({len(headers)}) and documents parsed ({len(documents)}).", file=sys.stderr) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print(f"Headers: {len(headers)}, Documents: {len(documents)}", file=sys.stderr) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # Match documents with their headers 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for i, doc in enumerate(documents): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if i < len(headers): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             type_id, anchor_id = headers[i] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-             
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             structured_doc = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 'type_id': type_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 'anchor_id': anchor_id, 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -59,12 +131,13 @@ def convert_unity_yaml_to_json(yaml_content): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             json_data.append({'data': doc}) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # Use compact encoding for the final JSON 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    return json.dumps(json_data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return json.dumps(json_data, indent=2)  # Changed to indented for better readability 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def main(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     parser = argparse.ArgumentParser(description='Convert Unity YAML assets to JSON.') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     parser.add_argument('input_path', type=str, help='Absolute path to the input Unity asset file.') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     parser.add_argument('output_path', type=str, help='Absolute path for the output JSON file.') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    parser.add_argument('--debug', action='store_true', help='Enable debug output') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     args = parser.parse_args() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     input_path = args.input_path 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -76,19 +149,26 @@ def main(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if not os.path.exists(output_dir): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             os.makedirs(output_dir) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        with open(input_path, 'r') as f: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        with open(input_path, 'r', encoding='utf-8') as f: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             content = f.read() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if args.debug: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print(f"Input file size: {len(content)} characters", file=sys.stderr) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print(f"First 500 characters:\n{content[:500]}", file=sys.stderr) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         json_output = convert_unity_yaml_to_json(content) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        with open(output_path, 'w') as f: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        with open(output_path, 'w', encoding='utf-8') as f: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             f.write(json_output) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         print(f"Successfully converted '{input_path}' to '{output_path}'") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         print(f"An error occurred: {e}", file=sys.stderr) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if args.debug: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            import traceback 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            traceback.print_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         sys.exit(1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 if __name__ == "__main__": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    main() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    main() 
			 |