import yaml import json import re import sys import argparse import os # Regex to capture the type ID and anchor ID from the document separator header_pattern = re.compile(r"--- !u!(\d+) &(\S+)") # Regex to find and remove the tags for the parser tag_remover_pattern = re.compile(r"!u!\d+\s") def preprocess_unity_yaml(yaml_content): """ Preprocesses Unity YAML content to handle various edge cases that can break the parser. """ lines = yaml_content.split('\n') processed_lines = [] in_document = False for i, line in enumerate(lines): # Check if we're starting a new document if line.startswith('---'): in_document = True processed_lines.append(line) continue # Skip empty lines and comments if not line.strip() or line.strip().startswith('#'): processed_lines.append(line) continue if in_document: # Handle the case where a key starts at column 0 but should be indented # This often happens with Unity components like RectTransform, Transform, etc. if ':' in line and not line.startswith(' ') and not line.startswith('\t'): # Check if the previous line was a document separator or another component if i > 0 and not lines[i-1].startswith('---'): # Check if this looks like a Unity component name component_match = re.match(r'^([A-Z][a-zA-Z0-9]*):$', line.strip()) if component_match: # This is likely a component that should be a key under the main object processed_lines.append(f" {line.strip()}") continue # Handle empty key issue (:: or just :) if line.strip().startswith(':') and 'Any' in line: processed_lines.append(line.replace(':', 'key_for_any:')) continue # Handle cases where there might be invalid indentation after colons if ':' in line and not line.strip().endswith(':'): # Check for malformed key-value pairs parts = line.split(':', 1) if len(parts) == 2 and parts[1].strip() == '': # This is a key with no value, which is fine in YAML processed_lines.append(line) continue processed_lines.append(line) return '\n'.join(processed_lines) def convert_unity_yaml_to_json(yaml_content): """ Parses a Unity YAML file string, preserving fileID references, and returns a JSON string. """ json_data = [] # First, find all the original headers headers = header_pattern.findall(yaml_content) # Remove the problematic tags from the content sanitized_content = tag_remover_pattern.sub("", yaml_content) # Apply additional preprocessing to handle Unity-specific YAML issues preprocessed_content = preprocess_unity_yaml(sanitized_content) try: # Try to parse with safe_load_all documents = list(yaml.safe_load_all(preprocessed_content)) except yaml.YAMLError as e: print(f"YAML parsing error: {e}", file=sys.stderr) print("Attempting to parse each document separately...", file=sys.stderr) # If that fails, try to split by document separators and parse each separately document_parts = re.split(r'\n---[^\n]*\n', preprocessed_content) documents = [] for i, part in enumerate(document_parts): if not part.strip(): continue try: # Add a temporary document separator for parsing if i > 0: # Skip the first part which might not need a separator part = '---\n' + part doc = yaml.safe_load(part) if doc is not None: documents.append(doc) except yaml.YAMLError as e2: print(f"Failed to parse document {i}: {e2}", file=sys.stderr) print(f"Document content preview: {part[:200]}...", file=sys.stderr) # Skip this document and continue continue # Filter out None documents and empty string documents documents = [doc for doc in documents if doc is not None and doc != ''] # Remove the first document if it's just file info if documents and isinstance(documents[0], str) and 'YAML' in documents[0]: documents.pop(0) if len(headers) != len(documents): print(f"Warning: Mismatch between headers found ({len(headers)}) and documents parsed ({len(documents)}).", file=sys.stderr) print(f"Headers: {len(headers)}, Documents: {len(documents)}", file=sys.stderr) # Match documents with their headers for i, doc in enumerate(documents): if i < len(headers): type_id, anchor_id = headers[i] structured_doc = { 'type_id': type_id, 'anchor_id': anchor_id, 'data': doc } json_data.append(structured_doc) else: # Append any extra docs without headers (should be rare in Unity files) json_data.append({'data': doc}) # Use compact encoding for the final JSON return json.dumps(json_data, separators=(',', ':')) # Use most compact encoding def main(): parser = argparse.ArgumentParser(description='Convert Unity YAML assets to JSON.') parser.add_argument('input_path', type=str, help='Absolute path to the input Unity asset file.') parser.add_argument('output_path', type=str, help='Absolute path for the output JSON file.') parser.add_argument('--debug', action='store_true', help='Enable debug output') args = parser.parse_args() input_path = args.input_path output_path = args.output_path try: # Ensure the output directory exists output_dir = os.path.dirname(output_path) if not os.path.exists(output_dir): os.makedirs(output_dir) with open(input_path, 'r', encoding='utf-8') as f: content = f.read() if args.debug: print(f"Input file size: {len(content)} characters", file=sys.stderr) print(f"First 500 characters:\n{content[:500]}", file=sys.stderr) json_output = convert_unity_yaml_to_json(content) with open(output_path, 'w', encoding='utf-8') as f: f.write(json_output) print(f"Successfully converted '{input_path}' to '{output_path}'") except Exception as e: print(f"An error occurred: {e}", file=sys.stderr) if args.debug: import traceback traceback.print_exc() sys.exit(1) if __name__ == "__main__": main()