Bladeren bron

Sujith :) ->
1. Improved on low level parsing
2. Removed store of gigantic guid mapper created in mid level

Sujith:) 1 dag geleden
bovenliggende
commit
2fbef7a443

+ 36 - 175
Assets/LLM/source/extract_low_level.py

@@ -1,66 +1,40 @@
 import argparse
 import sys
-import json
-import shutil
+import subprocess
 from pathlib import Path
 
-# Add the utils directory to the Python path
-utils_path = Path(__file__).parent / 'utils'
-sys.path.append(str(utils_path))
-
-from file_utils import find_files_by_extension
-from deep_parser import parse_scene_or_prefab
-from json_utils import write_json
-from yaml_utils import load_unity_yaml, convert_to_plain_python_types
-from hierarchy_utils import HierarchyParser
-
-def copy_scripts(assets_dir, output_assets_dir):
-    """
-    Copies all C# scripts (.cs) to the target directory.
-    """
-    print("\n--- Starting Script Handling ---")
-    cs_files = find_files_by_extension(str(assets_dir), '.cs')
-    print(f"Found {len(cs_files)} C# script files to copy.")
-
-    for script_path_str in cs_files:
-        script_path = Path(script_path_str)
-        relative_path = script_path.relative_to(assets_dir)
-        destination_path = output_assets_dir / relative_path
-        
-        destination_path.parent.mkdir(parents=True, exist_ok=True)
-        
-        try:
-            shutil.copy(script_path, destination_path)
-        except IOError as e:
-            print(f"Error copying {script_path} to {destination_path}: {e}", file=sys.stderr)
+def run_subprocess(script_name, input_dir, output_dir, indent=None):
+    """Helper function to run a parser subprocess."""
+    script_path = Path(__file__).parent / "parsers" / script_name
+    command = [
+        sys.executable,
+        str(script_path),
+        "--input",
+        str(input_dir),
+        "--output",
+        str(output_dir)
+    ]
+    if indent is not None:
+        command.extend(["--indent", str(indent)])
     
-    print("Script copying complete.")
+    try:
+        subprocess.run(command, check=True, text=True, capture_output=True)
+    except subprocess.CalledProcessError as e:
+        print(f"--- ERROR in {script_name} ---")
+        print(e.stdout)
+        print(e.stderr)
+        print(f"--- End of Error ---")
 
 def main():
     """
-    Main function to run the low-level data extraction process.
+    Main function to orchestrate the low-level data extraction pipeline.
     """
     parser = argparse.ArgumentParser(
-        description="Generates a deeply detailed, per-GameObject breakdown of scenes and prefabs."
-    )
-    parser.add_argument(
-        "--input",
-        type=str,
-        required=True,
-        help="The root directory of the target Unity project."
-    )
-    parser.add_argument(
-        "--output",
-        type=str,
-        required=True,
-        help="The directory where the generated output folder will be saved."
-    )
-    parser.add_argument(
-        "--indent",
-        type=int,
-        default=None,
-        help="Indentation level for JSON output. Defaults to None (compact)."
+        description="Orchestrates a pipeline of parsers for a detailed data breakdown."
     )
+    parser.add_argument("--input", type=str, required=True, help="The root directory of the target Unity project.")
+    parser.add_argument("--output", type=str, required=True, help="The directory where the output folder will be saved.")
+    parser.add_argument("--indent", type=int, default=None, help="Indentation level for JSON output.")
     args = parser.parse_args()
 
     input_dir = Path(args.input).resolve()
@@ -70,131 +44,18 @@ def main():
         print(f"Error: Input path '{input_dir}' is not a valid directory.", file=sys.stderr)
         sys.exit(1)
 
-    # Create the main output folder, named "LowLevel"
     low_level_output_dir = output_dir / "LowLevel"
-    output_assets_dir = low_level_output_dir / "Assets"
-    try:
-        output_assets_dir.mkdir(parents=True, exist_ok=True)
-        print(f"Output will be saved to: {low_level_output_dir}")
-    except OSError as e:
-        print(f"Error: Could not create output directory '{low_level_output_dir}'. {e}", file=sys.stderr)
-        sys.exit(1)
-
-    # --- Orchestration ---
-    assets_dir = input_dir / "Assets"
-    if not assets_dir.is_dir():
-        print(f"Error: 'Assets' directory not found in '{input_dir}'.", file=sys.stderr)
-        return
-
-    # --- Task 1: Copy C# Scripts ---
-    copy_scripts(assets_dir, output_assets_dir)
-
-    # --- Task 2: Process Scenes and Prefabs ---
-    scene_files = find_files_by_extension(str(assets_dir), '.unity')
-    prefab_files = find_files_by_extension(str(assets_dir), '.prefab')
-    files_to_process = scene_files + prefab_files
-    
-    print(f"\nFound {len(files_to_process)} scene/prefab files to process.")
-
-    for file_path_str in files_to_process:
-        file_path = Path(file_path_str)
-        print(f"\nProcessing Scene/Prefab: {file_path.name}")
-
-        # --- Deep Parsing for Individual GameObjects ---
-        gameobject_list = parse_scene_or_prefab(str(file_path))
-
-        # Create the output subdirectory for this asset
-        relative_path = file_path.relative_to(input_dir)
-        asset_output_dir = low_level_output_dir / relative_path
-        try:
-            asset_output_dir.mkdir(parents=True, exist_ok=True)
-        except OSError as e:
-            print(f"Error creating directory {asset_output_dir}: {e}", file=sys.stderr)
-            continue
-
-        if gameobject_list:
-            print(f"Saving {len(gameobject_list)} GameObjects to {asset_output_dir}")
-            for go_data in gameobject_list:
-                file_id = go_data.get('fileID')
-                if file_id:
-                    output_json_path = asset_output_dir / f"{file_id}.json"
-                    try:
-                        write_json(go_data, output_json_path, indent=args.indent)
-                    except Exception as e:
-                        print(f"Error writing to {output_json_path}: {e}", file=sys.stderr)
-        else:
-            print(f"Skipped deep parsing for {file_path.name}.")
-
-        # --- Hierarchy Parsing for Root Object Identification ---
-        try:
-            documents = load_unity_yaml(file_path)
-            if not documents:
-                print(f"Could not load YAML from {file_path.name} for hierarchy parsing.")
-                continue
-
-            raw_object_map = {int(doc.anchor.value): doc for doc in documents if hasattr(doc, 'anchor') and doc.anchor is not None}
-            object_map = {file_id: convert_to_plain_python_types(obj) for file_id, obj in raw_object_map.items()}
-
-            parser = HierarchyParser(object_map)
-            root_object_ids = parser.get_root_object_ids()
-            
-            root_ids_list = [file_id for file_id, _ in root_object_ids]
-
-            if root_ids_list:
-                roots_output_path = asset_output_dir / "root_objects.json"
-                write_json(root_ids_list, roots_output_path, indent=args.indent)
-                print(f"Successfully saved root object list to {roots_output_path}")
-
-        except Exception as e:
-            print(f"Error during hierarchy parsing for {file_path.name}: {e}", file=sys.stderr)
-
-    # --- Task 3: Process .asset files ---
-    asset_files = find_files_by_extension(str(assets_dir), '.asset')
-    print(f"\nFound {len(asset_files)} .asset files to process.")
-
-    for file_path_str in asset_files:
-        file_path = Path(file_path_str)
-        print(f"\nProcessing Asset: {file_path.name}")
-
-        relative_path = file_path.relative_to(input_dir)
-        asset_output_dir = low_level_output_dir / relative_path
-        try:
-            asset_output_dir.mkdir(parents=True, exist_ok=True)
-        except OSError as e:
-            print(f"Error creating directory {asset_output_dir}: {e}", file=sys.stderr)
-            continue
-
-        try:
-            documents = load_unity_yaml(file_path)
-            if not documents:
-                print(f"Skipped {file_path.name} as it's empty or could not be parsed.")
-                continue
-
-            print(f"Saving {len(documents)} objects from {file_path.name} to {asset_output_dir}")
-            for doc in documents:
-                if not hasattr(doc, 'anchor') or doc.anchor is None:
-                    continue
-                
-                file_id = int(doc.anchor.value)
-                obj_data = convert_to_plain_python_types(doc)
-                
-                final_obj_data = {}
-                for key, value in obj_data.items():
-                    if isinstance(value, dict):
-                        new_value = value.copy()
-                        new_value['fileID'] = file_id
-                        final_obj_data[key] = new_value
-                    else:
-                        final_obj_data[key] = value
-
-                output_json_path = asset_output_dir / f"{file_id}.json"
-                write_json(final_obj_data, output_json_path, indent=args.indent)
-
-        except Exception as e:
-            print(f"Error processing asset file {file_path.name}: {e}", file=sys.stderr)
+    low_level_output_dir.mkdir(parents=True, exist_ok=True)
+    print(f"Output will be saved to: {low_level_output_dir}")
 
+    # --- Run Extraction Pipeline ---
+    run_subprocess("copy_scripts.py", input_dir, low_level_output_dir, args.indent)
+    run_subprocess("copy_shaders.py", input_dir, low_level_output_dir, args.indent)
+    run_subprocess("parse_project_settings.py", input_dir, low_level_output_dir, args.indent)
+    run_subprocess("parse_generic_assets.py", input_dir, low_level_output_dir, args.indent)
+    run_subprocess("parse_scenes_and_prefabs.py", input_dir, low_level_output_dir, args.indent)
 
-    print("\nLow-level extraction complete.")
+    print("\nLow-level extraction pipeline complete.")
 
 if __name__ == "__main__":
-    main()
+    main()

+ 2 - 10
Assets/LLM/source/extract_mid_level.py

@@ -124,17 +124,9 @@ def main():
     replicate_directory_structure(str(assets_dir), str(output_assets_dir))
     print("Directory structure replication complete.")
 
-    # --- Task 2: Generate GUID Map ---
-    print("\n--- Generating GUID Map ---")
+    # --- Task 2: Generate GUID Map (in memory) ---
+    # This map is required by the scene processor.
     guid_map = create_guid_to_path_map(str(input_dir))
-    guid_map_path = mid_level_output_dir / "guid_map.json"
-    try:
-        # Use the new centralized utility
-        write_json(guid_map, guid_map_path, indent=args.indent)
-        print(f"Successfully created GUID map: {guid_map_path}")
-    except Exception as e:
-        print(f"Error writing GUID map: {e}", file=sys.stderr)
-        sys.exit(1)
 
     # --- Task 3: Generate Detailed GUID Mappers ---
     generate_guid_mappers(input_dir, mid_level_output_dir, indent=args.indent)

+ 8 - 0
Assets/LLM/source/parsers.meta

@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: d4cd0977a2add405887697ed62393e24
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 

+ 49 - 0
Assets/LLM/source/parsers/copy_scripts.py

@@ -0,0 +1,49 @@
+import argparse
+import sys
+import shutil
+from pathlib import Path
+
+# Add the utils directory to the Python path
+utils_path = Path(__file__).parent.parent / 'utils'
+sys.path.append(str(utils_path))
+
+from file_utils import find_files_by_extension
+
+def main():
+    parser = argparse.ArgumentParser(description="Copies all C# scripts (.cs) to the target directory.")
+    parser.add_argument("--input", type=str, required=True, help="The root directory of the target Unity project.")
+    parser.add_argument("--output", type=str, required=True, help="The directory where the output will be saved.")
+    args = parser.parse_args()
+
+    input_dir = Path(args.input).resolve()
+    output_dir = Path(args.output).resolve()
+    assets_dir = input_dir / "Assets"
+    output_assets_dir = output_dir / "Assets"
+
+    if not assets_dir.is_dir():
+        return
+
+    cs_files = find_files_by_extension(str(assets_dir), '.cs')
+    if not cs_files:
+        print("No C# scripts found to copy.")
+        return
+
+    print(f"\n--- Starting Script Handling ---")
+    print(f"Found {len(cs_files)} C# script files to copy.")
+
+    for script_path_str in cs_files:
+        script_path = Path(script_path_str)
+        relative_path = script_path.relative_to(assets_dir)
+        destination_path = output_assets_dir / relative_path
+        
+        destination_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        try:
+            shutil.copy(script_path, destination_path)
+        except IOError as e:
+            print(f"Error copying {script_path} to {destination_path}: {e}", file=sys.stderr)
+    
+    print("Script copying complete.")
+
+if __name__ == "__main__":
+    main()

+ 7 - 0
Assets/LLM/source/parsers/copy_scripts.py.meta

@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: b6d2b037c68924c9ea94de9412c81e15
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 

+ 52 - 0
Assets/LLM/source/parsers/copy_shaders.py

@@ -0,0 +1,52 @@
+import argparse
+import sys
+import shutil
+from pathlib import Path
+
+# Add the utils directory to the Python path
+utils_path = Path(__file__).parent.parent / 'utils'
+sys.path.append(str(utils_path))
+
+from file_utils import find_files_by_extension
+
+def main():
+    parser = argparse.ArgumentParser(description="Copies all shader files (.shader, .shadergraph) to the target directory.")
+    parser.add_argument("--input", type=str, required=True, help="The root directory of the target Unity project.")
+    parser.add_argument("--output", type=str, required=True, help="The directory where the output will be saved.")
+    args = parser.parse_args()
+
+    input_dir = Path(args.input).resolve()
+    output_dir = Path(args.output).resolve()
+    assets_dir = input_dir / "Assets"
+    output_assets_dir = output_dir / "Assets"
+
+    if not assets_dir.is_dir():
+        return
+
+    shader_files = find_files_by_extension(str(assets_dir), '.shader')
+    shader_graph_files = find_files_by_extension(str(assets_dir), '.shadergraph')
+    files_to_copy = shader_files + shader_graph_files
+
+    if not files_to_copy:
+        print("No shader files found to copy.")
+        return
+
+    print(f"\n--- Starting Shader Handling ---")
+    print(f"Found {len(files_to_copy)} shader files to copy.")
+
+    for file_path_str in files_to_copy:
+        file_path = Path(file_path_str)
+        relative_path = file_path.relative_to(assets_dir)
+        destination_path = output_assets_dir / relative_path
+        
+        destination_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        try:
+            shutil.copy(file_path, destination_path)
+        except IOError as e:
+            print(f"Error copying {file_path} to {destination_path}: {e}", file=sys.stderr)
+    
+    print("Shader copying complete.")
+
+if __name__ == "__main__":
+    main()

+ 7 - 0
Assets/LLM/source/parsers/copy_shaders.py.meta

@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 1812953ed430e43a78116c405677d3b9
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 

+ 59 - 0
Assets/LLM/source/parsers/parse_generic_assets.py

@@ -0,0 +1,59 @@
+import argparse
+import sys
+from pathlib import Path
+
+# Add the utils directory to the Python path
+utils_path = Path(__file__).parent.parent / 'utils'
+sys.path.append(str(utils_path))
+
+from file_utils import find_files_by_extension
+from json_utils import write_json
+from yaml_utils import load_unity_yaml, convert_to_plain_python_types
+
+def main():
+    parser = argparse.ArgumentParser(description="Parses generic YAML-based assets (.mat, .controller, .anim) into JSON.")
+    parser.add_argument("--input", type=str, required=True, help="The root directory of the target Unity project.")
+    parser.add_argument("--output", type=str, required=True, help="The directory where the output will be saved.")
+    parser.add_argument("--indent", type=int, default=None)
+    args = parser.parse_args()
+
+    input_dir = Path(args.input).resolve()
+    output_dir = Path(args.output).resolve()
+    assets_dir = input_dir / "Assets"
+    output_assets_dir = output_dir / "Assets"
+
+    if not assets_dir.is_dir():
+        return
+
+    extensions = ['.mat', '.controller', '.anim']
+    files_to_process = []
+    for ext in extensions:
+        files_to_process.extend(find_files_by_extension(str(assets_dir), ext))
+
+    if not files_to_process:
+        print("No generic asset files (.mat, .controller, .anim) found.")
+        return
+
+    print(f"\n--- Starting Generic Asset Parsing ---")
+    print(f"Found {len(files_to_process)} files to process.")
+
+    for file_path_str in files_to_process:
+        file_path = Path(file_path_str)
+        
+        relative_path = file_path.relative_to(assets_dir)
+        output_path = (output_assets_dir / relative_path).with_suffix('.json')
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        try:
+            documents = load_unity_yaml(file_path)
+            if documents:
+                # Most asset files have one document
+                data = convert_to_plain_python_types(documents[0])
+                write_json(data, output_path, indent=args.indent)
+        except Exception as e:
+            print(f"Error processing {file_path.name}: {e}", file=sys.stderr)
+
+    print("Generic asset parsing complete.")
+
+if __name__ == "__main__":
+    main()

+ 7 - 0
Assets/LLM/source/parsers/parse_generic_assets.py.meta

@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 883dc0ad60cf74e769230617581587bd
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 

+ 63 - 0
Assets/LLM/source/parsers/parse_project_settings.py

@@ -0,0 +1,63 @@
+import argparse
+import sys
+from pathlib import Path
+
+# Add the utils directory to the Python path
+utils_path = Path(__file__).parent.parent / 'utils'
+sys.path.append(str(utils_path))
+
+from json_utils import write_json
+from yaml_utils import load_unity_yaml, convert_to_plain_python_types
+
+def main():
+    parser = argparse.ArgumentParser(description="Parses key ProjectSettings files into JSON.")
+    parser.add_argument("--input", type=str, required=True, help="The root directory of the target Unity project.")
+    parser.add_argument("--output", type=str, required=True, help="The directory where the output will be saved.")
+    parser.add_argument("--indent", type=int, default=None)
+    args = parser.parse_args()
+
+    input_dir = Path(args.input).resolve()
+    output_dir = Path(args.output).resolve()
+    project_settings_dir = input_dir / "ProjectSettings"
+    output_settings_dir = output_dir / "ProjectSettings"
+
+    if not project_settings_dir.is_dir():
+        print("No ProjectSettings directory found.")
+        return
+
+    files_to_parse = [
+        "TagManager.asset", "DynamicsManager.asset", "Physics2DSettings.asset",
+        "EditorBuildSettings.asset", "GraphicsSettings.asset", "QualitySettings.asset",
+        "InputManager.asset", "ProjectSettings.asset"
+    ]
+
+    print(f"\n--- Starting Project Settings Parsing ---")
+    found_any = False
+
+    for filename in files_to_parse:
+        file_path = project_settings_dir / filename
+        if not file_path.is_file():
+            continue
+
+        found_any = True
+        print(f"Processing: {filename}")
+        
+        output_path = (output_settings_dir / filename).with_suffix('.json')
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        try:
+            documents = load_unity_yaml(file_path)
+            if documents:
+                # Most settings files have one document, export it directly
+                data = convert_to_plain_python_types(documents[0])
+                write_json(data, output_path, indent=args.indent)
+        except Exception as e:
+            print(f"Error processing {filename}: {e}", file=sys.stderr)
+
+    if not found_any:
+        print("No project settings files found to parse.")
+    else:
+        print("Project settings parsing complete.")
+
+if __name__ == "__main__":
+    main()

+ 7 - 0
Assets/LLM/source/parsers/parse_project_settings.py.meta

@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 1062e90f985fe438d8408338a200fd00
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 

+ 84 - 0
Assets/LLM/source/parsers/parse_scenes_and_prefabs.py

@@ -0,0 +1,84 @@
+import argparse
+import sys
+from pathlib import Path
+
+# Add the utils directory to the Python path
+utils_path = Path(__file__).parent.parent / 'utils'
+sys.path.append(str(utils_path))
+
+from file_utils import find_files_by_extension
+from deep_parser import parse_scene_or_prefab
+from json_utils import write_json
+from yaml_utils import load_unity_yaml, convert_to_plain_python_types
+from hierarchy_utils import HierarchyParser
+
+def main():
+    parser = argparse.ArgumentParser(description="Parses scenes and prefabs into a per-GameObject breakdown.")
+    parser.add_argument("--input", type=str, required=True, help="The root directory of the target Unity project.")
+    parser.add_argument("--output", type=str, required=True, help="The directory where the output will be saved.")
+    parser.add_argument("--indent", type=int, default=None)
+    args = parser.parse_args()
+
+    input_dir = Path(args.input).resolve()
+    output_dir = Path(args.output).resolve()
+    assets_dir = input_dir / "Assets"
+
+    if not assets_dir.is_dir():
+        return
+
+    scene_files = find_files_by_extension(str(assets_dir), '.unity')
+    prefab_files = find_files_by_extension(str(assets_dir), '.prefab')
+    files_to_process = scene_files + prefab_files
+
+    if not files_to_process:
+        print("No scene or prefab files found.")
+        return
+        
+    print(f"\n--- Starting Scene/Prefab Parsing ---")
+    print(f"Found {len(files_to_process)} files to process.")
+
+    for file_path_str in files_to_process:
+        file_path = Path(file_path_str)
+        print(f"\nProcessing: {file_path.name}")
+
+        gameobject_list = parse_scene_or_prefab(str(file_path))
+
+        relative_path = file_path.relative_to(input_dir)
+        asset_output_dir = output_dir / relative_path
+        asset_output_dir.mkdir(parents=True, exist_ok=True)
+
+        if gameobject_list:
+            print(f"Saving {len(gameobject_list)} GameObjects to {asset_output_dir}")
+            for go_data in gameobject_list:
+                file_id = go_data.get('fileID')
+                if file_id:
+                    output_json_path = asset_output_dir / f"{file_id}.json"
+                    write_json(go_data, output_json_path, indent=args.indent)
+        else:
+            print(f"Skipped deep parsing for {file_path.name}.")
+
+        try:
+            documents = load_unity_yaml(file_path)
+            if not documents:
+                continue
+
+            raw_object_map = {int(doc.anchor.value): doc for doc in documents if hasattr(doc, 'anchor') and doc.anchor is not None}
+            object_map = {file_id: convert_to_plain_python_types(obj) for file_id, obj in raw_object_map.items()}
+
+            parser = HierarchyParser(object_map)
+            root_object_ids = parser.get_root_object_ids()
+            
+            root_ids_list = [file_id for file_id, _ in root_object_ids]
+
+            if root_ids_list:
+                roots_output_path = asset_output_dir / "root_objects.json"
+                write_json(root_ids_list, roots_output_path, indent=args.indent)
+                print(f"Successfully saved root object list to {roots_output_path}")
+
+        except Exception as e:
+            print(f"Error during hierarchy parsing for {file_path.name}: {e}", file=sys.stderr)
+
+    print("Scene and prefab parsing complete.")
+
+if __name__ == "__main__":
+    main()

+ 7 - 0
Assets/LLM/source/parsers/parse_scenes_and_prefabs.py.meta

@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 572fb71340063430aa7760a89b58154f
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: