convert_scene.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. import yaml
  2. import json
  3. import re
  4. import sys
  5. import argparse
  6. import os
  7. # Regex to capture the type ID and anchor ID from the document separator
  8. header_pattern = re.compile(r"--- !u!(\d+) &(\S+)")
  9. # Regex to find and remove the tags for the parser
  10. tag_remover_pattern = re.compile(r"!u!\d+\s")
  11. def preprocess_unity_yaml(yaml_content):
  12. """
  13. Preprocesses Unity YAML content to handle various edge cases that can break the parser.
  14. """
  15. lines = yaml_content.split('\n')
  16. processed_lines = []
  17. in_document = False
  18. for i, line in enumerate(lines):
  19. # Check if we're starting a new document
  20. if line.startswith('---'):
  21. in_document = True
  22. processed_lines.append(line)
  23. continue
  24. # Skip empty lines and comments
  25. if not line.strip() or line.strip().startswith('#'):
  26. processed_lines.append(line)
  27. continue
  28. if in_document:
  29. # Handle the case where a key starts at column 0 but should be indented
  30. # This often happens with Unity components like RectTransform, Transform, etc.
  31. if ':' in line and not line.startswith(' ') and not line.startswith('\t'):
  32. # Check if the previous line was a document separator or another component
  33. if i > 0 and not lines[i-1].startswith('---'):
  34. # Check if this looks like a Unity component name
  35. component_match = re.match(r'^([A-Z][a-zA-Z0-9]*):$', line.strip())
  36. if component_match:
  37. # This is likely a component that should be a key under the main object
  38. processed_lines.append(f" {line.strip()}")
  39. continue
  40. # Handle empty key issue (:: or just :)
  41. if line.strip().startswith(':') and 'Any' in line:
  42. processed_lines.append(line.replace(':', 'key_for_any:'))
  43. continue
  44. # Handle cases where there might be invalid indentation after colons
  45. if ':' in line and not line.strip().endswith(':'):
  46. # Check for malformed key-value pairs
  47. parts = line.split(':', 1)
  48. if len(parts) == 2 and parts[1].strip() == '':
  49. # This is a key with no value, which is fine in YAML
  50. processed_lines.append(line)
  51. continue
  52. processed_lines.append(line)
  53. return '\n'.join(processed_lines)
  54. def convert_unity_yaml_to_json(yaml_content):
  55. """
  56. Parses a Unity YAML file string, preserving fileID references, and returns a JSON string.
  57. """
  58. json_data = []
  59. # First, find all the original headers
  60. headers = header_pattern.findall(yaml_content)
  61. # Remove the problematic tags from the content
  62. sanitized_content = tag_remover_pattern.sub("", yaml_content)
  63. # Apply additional preprocessing to handle Unity-specific YAML issues
  64. preprocessed_content = preprocess_unity_yaml(sanitized_content)
  65. try:
  66. # Try to parse with safe_load_all
  67. documents = list(yaml.safe_load_all(preprocessed_content))
  68. except yaml.YAMLError as e:
  69. print(f"YAML parsing error: {e}", file=sys.stderr)
  70. print("Attempting to parse each document separately...", file=sys.stderr)
  71. # If that fails, try to split by document separators and parse each separately
  72. document_parts = re.split(r'\n---[^\n]*\n', preprocessed_content)
  73. documents = []
  74. for i, part in enumerate(document_parts):
  75. if not part.strip():
  76. continue
  77. try:
  78. # Add a temporary document separator for parsing
  79. if i > 0: # Skip the first part which might not need a separator
  80. part = '---\n' + part
  81. doc = yaml.safe_load(part)
  82. if doc is not None:
  83. documents.append(doc)
  84. except yaml.YAMLError as e2:
  85. print(f"Failed to parse document {i}: {e2}", file=sys.stderr)
  86. print(f"Document content preview: {part[:200]}...", file=sys.stderr)
  87. # Skip this document and continue
  88. continue
  89. # Filter out None documents and empty string documents
  90. documents = [doc for doc in documents if doc is not None and doc != '']
  91. # Remove the first document if it's just file info
  92. if documents and isinstance(documents[0], str) and 'YAML' in documents[0]:
  93. documents.pop(0)
  94. if len(headers) != len(documents):
  95. print(f"Warning: Mismatch between headers found ({len(headers)}) and documents parsed ({len(documents)}).", file=sys.stderr)
  96. print(f"Headers: {len(headers)}, Documents: {len(documents)}", file=sys.stderr)
  97. # Match documents with their headers
  98. for i, doc in enumerate(documents):
  99. if i < len(headers):
  100. type_id, anchor_id = headers[i]
  101. structured_doc = {
  102. 'type_id': type_id,
  103. 'anchor_id': anchor_id,
  104. 'data': doc
  105. }
  106. json_data.append(structured_doc)
  107. else:
  108. # Append any extra docs without headers (should be rare in Unity files)
  109. json_data.append({'data': doc})
  110. # Use compact encoding for the final JSON
  111. return json.dumps(json_data, indent=2) # Changed to indented for better readability
  112. def main():
  113. parser = argparse.ArgumentParser(description='Convert Unity YAML assets to JSON.')
  114. parser.add_argument('input_path', type=str, help='Absolute path to the input Unity asset file.')
  115. parser.add_argument('output_path', type=str, help='Absolute path for the output JSON file.')
  116. parser.add_argument('--debug', action='store_true', help='Enable debug output')
  117. args = parser.parse_args()
  118. input_path = args.input_path
  119. output_path = args.output_path
  120. try:
  121. # Ensure the output directory exists
  122. output_dir = os.path.dirname(output_path)
  123. if not os.path.exists(output_dir):
  124. os.makedirs(output_dir)
  125. with open(input_path, 'r', encoding='utf-8') as f:
  126. content = f.read()
  127. if args.debug:
  128. print(f"Input file size: {len(content)} characters", file=sys.stderr)
  129. print(f"First 500 characters:\n{content[:500]}", file=sys.stderr)
  130. json_output = convert_unity_yaml_to_json(content)
  131. with open(output_path, 'w', encoding='utf-8') as f:
  132. f.write(json_output)
  133. print(f"Successfully converted '{input_path}' to '{output_path}'")
  134. except Exception as e:
  135. print(f"An error occurred: {e}", file=sys.stderr)
  136. if args.debug:
  137. import traceback
  138. traceback.print_exc()
  139. sys.exit(1)
  140. if __name__ == "__main__":
  141. main()