|
@@ -10,44 +10,116 @@ header_pattern = re.compile(r"--- !u!(\d+) &(\S+)")
|
|
# Regex to find and remove the tags for the parser
|
|
# Regex to find and remove the tags for the parser
|
|
tag_remover_pattern = re.compile(r"!u!\d+\s")
|
|
tag_remover_pattern = re.compile(r"!u!\d+\s")
|
|
|
|
|
|
|
|
+def preprocess_unity_yaml(yaml_content):
|
|
|
|
+ """
|
|
|
|
+ Preprocesses Unity YAML content to handle various edge cases that can break the parser.
|
|
|
|
+ """
|
|
|
|
+ lines = yaml_content.split('\n')
|
|
|
|
+ processed_lines = []
|
|
|
|
+ in_document = False
|
|
|
|
+
|
|
|
|
+ for i, line in enumerate(lines):
|
|
|
|
+ # Check if we're starting a new document
|
|
|
|
+ if line.startswith('---'):
|
|
|
|
+ in_document = True
|
|
|
|
+ processed_lines.append(line)
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ # Skip empty lines and comments
|
|
|
|
+ if not line.strip() or line.strip().startswith('#'):
|
|
|
|
+ processed_lines.append(line)
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ if in_document:
|
|
|
|
+ # Handle the case where a key starts at column 0 but should be indented
|
|
|
|
+ # This often happens with Unity components like RectTransform, Transform, etc.
|
|
|
|
+ if ':' in line and not line.startswith(' ') and not line.startswith('\t'):
|
|
|
|
+ # Check if the previous line was a document separator or another component
|
|
|
|
+ if i > 0 and not lines[i-1].startswith('---'):
|
|
|
|
+ # Check if this looks like a Unity component name
|
|
|
|
+ component_match = re.match(r'^([A-Z][a-zA-Z0-9]*):$', line.strip())
|
|
|
|
+ if component_match:
|
|
|
|
+ # This is likely a component that should be a key under the main object
|
|
|
|
+ processed_lines.append(f" {line.strip()}")
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ # Handle empty key issue (:: or just :)
|
|
|
|
+ if line.strip().startswith(':') and 'Any' in line:
|
|
|
|
+ processed_lines.append(line.replace(':', 'key_for_any:'))
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ # Handle cases where there might be invalid indentation after colons
|
|
|
|
+ if ':' in line and not line.strip().endswith(':'):
|
|
|
|
+ # Check for malformed key-value pairs
|
|
|
|
+ parts = line.split(':', 1)
|
|
|
|
+ if len(parts) == 2 and parts[1].strip() == '':
|
|
|
|
+ # This is a key with no value, which is fine in YAML
|
|
|
|
+ processed_lines.append(line)
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ processed_lines.append(line)
|
|
|
|
+
|
|
|
|
+ return '\n'.join(processed_lines)
|
|
|
|
+
|
|
def convert_unity_yaml_to_json(yaml_content):
|
|
def convert_unity_yaml_to_json(yaml_content):
|
|
"""
|
|
"""
|
|
Parses a Unity YAML file string, preserving fileID references, and returns a JSON string.
|
|
Parses a Unity YAML file string, preserving fileID references, and returns a JSON string.
|
|
"""
|
|
"""
|
|
json_data = []
|
|
json_data = []
|
|
-
|
|
|
|
|
|
+
|
|
# First, find all the original headers
|
|
# First, find all the original headers
|
|
headers = header_pattern.findall(yaml_content)
|
|
headers = header_pattern.findall(yaml_content)
|
|
-
|
|
|
|
- # Next, remove the problematic tags from the content so the parser doesn't fail
|
|
|
|
|
|
+
|
|
|
|
+ # Remove the problematic tags from the content
|
|
sanitized_content = tag_remover_pattern.sub("", yaml_content)
|
|
sanitized_content = tag_remover_pattern.sub("", yaml_content)
|
|
-
|
|
|
|
- # HACK: Unity sometimes generates invalid YAML with an empty key.
|
|
|
|
- # We'll correct this before parsing.
|
|
|
|
- lines = sanitized_content.split('\n')
|
|
|
|
- corrected_lines = []
|
|
|
|
- for line in lines:
|
|
|
|
- if line.strip().startswith(':') and 'Any' in line:
|
|
|
|
- corrected_lines.append(line.replace(':', 'key_for_any:'))
|
|
|
|
- else:
|
|
|
|
- corrected_lines.append(line)
|
|
|
|
-
|
|
|
|
- corrected_content = "\n".join(corrected_lines)
|
|
|
|
|
|
|
|
- # Use the standard SafeLoader, as the tags are now gone
|
|
|
|
- documents = list(yaml.safe_load_all(corrected_content))
|
|
|
|
|
|
+ # Apply additional preprocessing to handle Unity-specific YAML issues
|
|
|
|
+ preprocessed_content = preprocess_unity_yaml(sanitized_content)
|
|
|
|
|
|
- # The first document is the file info, which we can often skip if it's empty
|
|
|
|
|
|
+ try:
|
|
|
|
+ # Try to parse with safe_load_all
|
|
|
|
+ documents = list(yaml.safe_load_all(preprocessed_content))
|
|
|
|
+ except yaml.YAMLError as e:
|
|
|
|
+ print(f"YAML parsing error: {e}", file=sys.stderr)
|
|
|
|
+ print("Attempting to parse each document separately...", file=sys.stderr)
|
|
|
|
+
|
|
|
|
+ # If that fails, try to split by document separators and parse each separately
|
|
|
|
+ document_parts = re.split(r'\n---[^\n]*\n', preprocessed_content)
|
|
|
|
+ documents = []
|
|
|
|
+
|
|
|
|
+ for i, part in enumerate(document_parts):
|
|
|
|
+ if not part.strip():
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ try:
|
|
|
|
+ # Add a temporary document separator for parsing
|
|
|
|
+ if i > 0: # Skip the first part which might not need a separator
|
|
|
|
+ part = '---\n' + part
|
|
|
|
+ doc = yaml.safe_load(part)
|
|
|
|
+ if doc is not None:
|
|
|
|
+ documents.append(doc)
|
|
|
|
+ except yaml.YAMLError as e2:
|
|
|
|
+ print(f"Failed to parse document {i}: {e2}", file=sys.stderr)
|
|
|
|
+ print(f"Document content preview: {part[:200]}...", file=sys.stderr)
|
|
|
|
+ # Skip this document and continue
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ # Filter out None documents and empty string documents
|
|
|
|
+ documents = [doc for doc in documents if doc is not None and doc != '']
|
|
|
|
+
|
|
|
|
+ # Remove the first document if it's just file info
|
|
if documents and isinstance(documents[0], str) and 'YAML' in documents[0]:
|
|
if documents and isinstance(documents[0], str) and 'YAML' in documents[0]:
|
|
documents.pop(0)
|
|
documents.pop(0)
|
|
|
|
|
|
if len(headers) != len(documents):
|
|
if len(headers) != len(documents):
|
|
print(f"Warning: Mismatch between headers found ({len(headers)}) and documents parsed ({len(documents)}).", file=sys.stderr)
|
|
print(f"Warning: Mismatch between headers found ({len(headers)}) and documents parsed ({len(documents)}).", file=sys.stderr)
|
|
|
|
+ print(f"Headers: {len(headers)}, Documents: {len(documents)}", file=sys.stderr)
|
|
|
|
|
|
|
|
+ # Match documents with their headers
|
|
for i, doc in enumerate(documents):
|
|
for i, doc in enumerate(documents):
|
|
if i < len(headers):
|
|
if i < len(headers):
|
|
type_id, anchor_id = headers[i]
|
|
type_id, anchor_id = headers[i]
|
|
-
|
|
|
|
|
|
+
|
|
structured_doc = {
|
|
structured_doc = {
|
|
'type_id': type_id,
|
|
'type_id': type_id,
|
|
'anchor_id': anchor_id,
|
|
'anchor_id': anchor_id,
|
|
@@ -59,12 +131,13 @@ def convert_unity_yaml_to_json(yaml_content):
|
|
json_data.append({'data': doc})
|
|
json_data.append({'data': doc})
|
|
|
|
|
|
# Use compact encoding for the final JSON
|
|
# Use compact encoding for the final JSON
|
|
- return json.dumps(json_data)
|
|
|
|
|
|
+ return json.dumps(json_data, indent=2) # Changed to indented for better readability
|
|
|
|
|
|
def main():
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Convert Unity YAML assets to JSON.')
|
|
parser = argparse.ArgumentParser(description='Convert Unity YAML assets to JSON.')
|
|
parser.add_argument('input_path', type=str, help='Absolute path to the input Unity asset file.')
|
|
parser.add_argument('input_path', type=str, help='Absolute path to the input Unity asset file.')
|
|
parser.add_argument('output_path', type=str, help='Absolute path for the output JSON file.')
|
|
parser.add_argument('output_path', type=str, help='Absolute path for the output JSON file.')
|
|
|
|
+ parser.add_argument('--debug', action='store_true', help='Enable debug output')
|
|
args = parser.parse_args()
|
|
args = parser.parse_args()
|
|
|
|
|
|
input_path = args.input_path
|
|
input_path = args.input_path
|
|
@@ -76,19 +149,26 @@ def main():
|
|
if not os.path.exists(output_dir):
|
|
if not os.path.exists(output_dir):
|
|
os.makedirs(output_dir)
|
|
os.makedirs(output_dir)
|
|
|
|
|
|
- with open(input_path, 'r') as f:
|
|
|
|
|
|
+ with open(input_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
content = f.read()
|
|
|
|
|
|
|
|
+ if args.debug:
|
|
|
|
+ print(f"Input file size: {len(content)} characters", file=sys.stderr)
|
|
|
|
+ print(f"First 500 characters:\n{content[:500]}", file=sys.stderr)
|
|
|
|
+
|
|
json_output = convert_unity_yaml_to_json(content)
|
|
json_output = convert_unity_yaml_to_json(content)
|
|
|
|
|
|
- with open(output_path, 'w') as f:
|
|
|
|
|
|
+ with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write(json_output)
|
|
f.write(json_output)
|
|
|
|
|
|
print(f"Successfully converted '{input_path}' to '{output_path}'")
|
|
print(f"Successfully converted '{input_path}' to '{output_path}'")
|
|
|
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
print(f"An error occurred: {e}", file=sys.stderr)
|
|
print(f"An error occurred: {e}", file=sys.stderr)
|
|
|
|
+ if args.debug:
|
|
|
|
+ import traceback
|
|
|
|
+ traceback.print_exc()
|
|
sys.exit(1)
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
- main()
|
|
|
|
|
|
+ main()
|