123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190 |
- import yaml
- import json
- import re
- import sys
- import argparse
- import os
- # Regex to capture the type ID and anchor ID from the document separator
- header_pattern = re.compile(r"--- !u!(\d+) &(\S+)")
- # Regex to find and remove the tags for the parser
- tag_remover_pattern = re.compile(r"!u!\d+\s")
- def preprocess_unity_yaml(yaml_content):
- """
- Preprocesses Unity YAML content to handle various edge cases that can break the parser.
- """
- lines = yaml_content.split('\n')
- processed_lines = []
- in_document = False
- for i, line in enumerate(lines):
- # Check if we're starting a new document
- if line.startswith('---'):
- in_document = True
- processed_lines.append(line)
- continue
- # Skip empty lines and comments
- if not line.strip() or line.strip().startswith('#'):
- processed_lines.append(line)
- continue
- if in_document:
- # Handle the case where a key starts at column 0 but should be indented
- # This often happens with Unity components like RectTransform, Transform, etc.
- if ':' in line and not line.startswith(' ') and not line.startswith('\t'):
- # Check if the previous line was a document separator or another component
- if i > 0 and not lines[i-1].startswith('---'):
- # Check if this looks like a Unity component name
- component_match = re.match(r'^([A-Z][a-zA-Z0-9]*):$', line.strip())
- if component_match:
- # This is likely a component that should be a key under the main object
- processed_lines.append(f" {line.strip()}")
- continue
- # Handle empty key issue (:: or just :)
- if line.strip().startswith(':') and 'Any' in line:
- processed_lines.append(line.replace(':', 'key_for_any:'))
- continue
- # Handle cases where there might be invalid indentation after colons
- if ':' in line and not line.strip().endswith(':'):
- # Check for malformed key-value pairs
- parts = line.split(':', 1)
- if len(parts) == 2 and parts[1].strip() == '':
- # This is a key with no value, which is fine in YAML
- processed_lines.append(line)
- continue
- processed_lines.append(line)
- return '\n'.join(processed_lines)
- def convert_unity_yaml_to_json(yaml_content, whitelist=None):
- """
- Parses a Unity YAML file string, preserving fileID references, and returns a JSON string.
- """
- json_data = []
-
- whitelist_set = None
- if whitelist is not None:
- # If whitelist is an empty string, create an empty set, meaning nothing is whitelisted.
- # Otherwise, split the string to create the set of whitelisted components.
- whitelist_set = set(whitelist.split(',')) if whitelist else set()
- # First, find all the original headers
- headers = header_pattern.findall(yaml_content)
- # Remove the problematic tags from the content
- sanitized_content = tag_remover_pattern.sub("", yaml_content)
- # Apply additional preprocessing to handle Unity-specific YAML issues
- preprocessed_content = preprocess_unity_yaml(sanitized_content)
- try:
- # Try to parse with safe_load_all
- documents = list(yaml.safe_load_all(preprocessed_content))
- except yaml.YAMLError as e:
- print(f"YAML parsing error: {e}", file=sys.stderr)
- print("Attempting to parse each document separately...", file=sys.stderr)
- # If that fails, try to split by document separators and parse each separately
- document_parts = re.split(r'\n---[^\n]*\n', preprocessed_content)
- documents = []
- for i, part in enumerate(document_parts):
- if not part.strip():
- continue
- try:
- # Add a temporary document separator for parsing
- if i > 0: # Skip the first part which might not need a separator
- part = '---\n' + part
- doc = yaml.safe_load(part)
- if doc is not None:
- documents.append(doc)
- except yaml.YAMLError as e2:
- print(f"Failed to parse document {i}: {e2}", file=sys.stderr)
- print(f"Document content preview: {part[:200]}...", file=sys.stderr)
- # Skip this document and continue
- continue
- # Filter out None documents and empty string documents
- documents = [doc for doc in documents if doc is not None and doc != '']
- # Remove the first document if it's just file info
- if documents and isinstance(documents[0], str) and 'YAML' in documents[0]:
- documents.pop(0)
- if len(headers) != len(documents):
- print(f"Warning: Mismatch between headers found ({len(headers)}) and documents parsed ({len(documents)}).", file=sys.stderr)
- print(f"Headers: {len(headers)}, Documents: {len(documents)}", file=sys.stderr)
- # Match documents with their headers
- for i, doc in enumerate(documents):
- if i < len(headers):
- type_id, anchor_id = headers[i]
-
- component_doc = doc
- # Check against whitelist if it has been initialized (is not None)
- if whitelist_set is not None and isinstance(doc, dict):
- component_name = next(iter(doc), None)
- if component_name and component_name not in whitelist_set:
- # If not in whitelist, replace data with an empty object
- component_doc = {component_name: {}}
- structured_doc = {
- 'type_id': type_id,
- 'anchor_id': anchor_id,
- 'data': component_doc
- }
- json_data.append(structured_doc)
- else:
- # Append any extra docs without headers (should be rare in Unity files)
- json_data.append({'data': doc})
- # Use compact encoding for the final JSON
- return json.dumps(json_data, separators=(',', ':')) # Use most compact encoding
- def main():
- parser = argparse.ArgumentParser(description='Convert Unity YAML assets to JSON.')
- parser.add_argument('input_path', type=str, help='Absolute path to the input Unity asset file.')
- parser.add_argument('output_path', type=str, help='Absolute path for the output JSON file.')
- parser.add_argument('--whitelist', type=str, help='Comma-separated list of component types to include.')
- parser.add_argument('--debug', action='store_true', help='Enable debug output')
- args = parser.parse_args()
- input_path = args.input_path
- output_path = args.output_path
- try:
- # Ensure the output directory exists
- output_dir = os.path.dirname(output_path)
- if not os.path.exists(output_dir):
- os.makedirs(output_dir)
- with open(input_path, 'r', encoding='utf-8') as f:
- content = f.read()
- if args.debug:
- print(f"Input file size: {len(content)} characters", file=sys.stderr)
- print(f"First 500 characters:\n{content[:500]}", file=sys.stderr)
- json_output = convert_unity_yaml_to_json(content, args.whitelist)
- with open(output_path, 'w', encoding='utf-8') as f:
- f.write(json_output)
- print(f"Successfully converted '{input_path}' to '{output_path}'")
- except Exception as e:
- print(f"An error occurred: {e}", file=sys.stderr)
- if args.debug:
- import traceback
- traceback.print_exc()
- sys.exit(1)
- if __name__ == "__main__":
- main()
|