浏览代码

Sujith :) ->
1. Added editor script for accessing data extraction
2. Fixed and modified high level export

Sujith:) 7 小时之前
父节点
当前提交
dcf5ea2609

+ 100 - 0
Assets/LLM/Editor/DataExtractorMenu.cs

@@ -0,0 +1,100 @@
+using System.IO;
+using UnityEditor;
+using UnityEngine;
+using System.Diagnostics;
+
+namespace LLM.Editor
+{
+    public static class DataExtractorMenu
+    {
+        private const string HighLevelScript = "Assets/LLM/source/extract_high_level.py";
+        private const string MidLevelScript = "Assets/LLM/source/extract_mid_level.py";
+        private const string LowLevelScript = "Assets/LLM/source/extract_low_level.py";
+
+        [MenuItem("Tools/DataExtractor/High Level Export")]
+        private static void ExportHighLevel()
+        {
+            RunExtractor("High Level", HighLevelScript);
+        }
+
+        [MenuItem("Tools/DataExtractor/Mid Level Export")]
+        private static void ExportMidLevel()
+        {
+            RunExtractor("Mid Level", MidLevelScript);
+        }
+
+        [MenuItem("Tools/DataExtractor/Low Level Export")]
+        private static void ExportLowLevel()
+        {
+            RunExtractor("Low Level", LowLevelScript);
+        }
+
+        private static void RunExtractor(string level, string scriptPath)
+        {
+            var outputPath = EditorUtility.OpenFolderPanel($"Select Output Folder for {level} Export", "", "");
+
+            if (string.IsNullOrEmpty(outputPath))
+            {
+                UnityEngine.Debug.Log($"{level} export cancelled by user.");
+                return;
+            }
+
+            var projectRoot = Path.GetFullPath(Path.Combine(Application.dataPath, ".."));
+            var pythonExecutable = Path.Combine(projectRoot, "venv", "bin", "python3");
+            var fullScriptPath = Path.Combine(projectRoot, scriptPath);
+
+            if (!File.Exists(pythonExecutable))
+            {
+                UnityEngine.Debug.LogError($"Python executable not found at: {pythonExecutable}");
+                EditorUtility.DisplayDialog("Export Error", "The Python virtual environment executable was not found. Please ensure the 'venv' directory is set up correctly.", "OK");
+                return;
+            }
+
+            if (!File.Exists(fullScriptPath))
+            {
+                UnityEngine.Debug.LogError($"Extractor script not found at: {fullScriptPath}");
+                EditorUtility.DisplayDialog("Export Error", $"The Python script for the {level} extractor was not found.", "OK");
+                return;
+            }
+
+            var arguments = $"\"{fullScriptPath}\" --input \"{projectRoot}\" --output \"{outputPath}\"";
+        
+            UnityEngine.Debug.Log($"Running command: \"{pythonExecutable}\" {arguments}");
+
+            var process = new Process
+            {
+                StartInfo = new ProcessStartInfo
+                {
+                    FileName = pythonExecutable,
+                    Arguments = arguments,
+                    RedirectStandardOutput = true,
+                    RedirectStandardError = true,
+                    UseShellExecute = false,
+                    CreateNoWindow = true,
+                    WorkingDirectory = projectRoot
+                }
+            };
+
+            EditorUtility.DisplayProgressBar("Data Extractor", $"Running {level} export...", 0.5f);
+        
+            process.OutputDataReceived += (_, e) => { if (e.Data != null) UnityEngine.Debug.Log($"[Extractor] {e.Data}"); };
+            process.ErrorDataReceived += (_, e) => { if (e.Data != null) UnityEngine.Debug.LogError($"[Extractor ERROR] {e.Data}"); };
+
+            process.Start();
+            process.BeginOutputReadLine();
+            process.BeginErrorReadLine();
+
+            process.WaitForExit();
+            EditorUtility.ClearProgressBar();
+
+            if (process.ExitCode == 0)
+            {
+                EditorUtility.DisplayDialog("Export Complete", $"{level} data export finished successfully.", "OK");
+            }
+            else
+            {
+                EditorUtility.DisplayDialog("Export Failed", $"The {level} data export failed. Check the Unity Console for error messages.", "OK");
+            }
+        }
+    }
+}

+ 11 - 0
Assets/LLM/Editor/DataExtractorMenu.cs.meta

@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 82d0e0384fe86445cb663eddcc98167c
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 

+ 187 - 98
Assets/LLM/source/extract_high_level.py

@@ -11,101 +11,217 @@ from pathlib import Path
 utils_path = Path(__file__).parent / 'utils'
 sys.path.append(str(utils_path))
 
-# Now we can import our utility modules
-from yaml_utils import load_unity_yaml
-from file_utils import find_files_by_extension
+from yaml_utils import load_unity_yaml, convert_to_plain_python_types
+from file_utils import find_files_by_extension, create_guid_to_path_map
 
-def parse_project_settings(input_dir, output_dir):
+def parse_physics_settings(input_dir, project_mode):
     """
-    Parses project settings and creates the manifest.json file.
+    Parses the appropriate physics settings file based on the project mode.
     """
-    print("\n--- Starting Task 2: Project Settings Parser ---")
+    physics_data = {}
+    if project_mode == "3D":
+        asset_path = input_dir / "ProjectSettings" / "DynamicsManager.asset"
+        if asset_path.is_file():
+            docs = load_unity_yaml(str(asset_path))
+            if docs:
+                settings = convert_to_plain_python_types(docs[0]).get('PhysicsManager', {})
+                physics_data['gravity'] = settings.get('m_Gravity')
+                physics_data['sleepThreshold'] = settings.get('m_SleepThreshold')
+                physics_data['solverType'] = settings.get('m_SolverType')
+                physics_data['layerCollisionMatrix'] = settings.get('m_LayerCollisionMatrix')
+                physics_data['autoSimulation'] = settings.get('m_AutoSimulation')
+                physics_data['autoSyncTransforms'] = settings.get('m_AutoSyncTransforms')
+    else: # 2D
+        asset_path = input_dir / "ProjectSettings" / "Physics2DSettings.asset"
+        if asset_path.is_file():
+            docs = load_unity_yaml(str(asset_path))
+            if docs:
+                settings = convert_to_plain_python_types(docs[0]).get('Physics2DSettings', {})
+                physics_data['gravity'] = settings.get('m_Gravity')
+                physics_data['velocityIterations'] = settings.get('m_VelocityIterations')
+                physics_data['positionIterations'] = settings.get('m_PositionIterations')
+                physics_data['layerCollisionMatrix'] = settings.get('m_LayerCollisionMatrix')
+                physics_data['autoSimulation'] = settings.get('m_AutoSimulation')
+                physics_data['autoSyncTransforms'] = settings.get('m_AutoSyncTransforms')
     
-    project_settings_path = input_dir / "ProjectSettings" / "ProjectSettings.asset"
-    editor_settings_path = input_dir / "ProjectSettings" / "EditorSettings.asset"
+    return physics_data
+
+def parse_project_settings(input_dir, output_dir):
+    """
+    Parses various project settings files to create a comprehensive manifest.
+    """
+    print("\n--- Starting Task 2: Comprehensive Project Settings Parser ---")
     
     manifest_data = {}
+    guid_map = create_guid_to_path_map(str(input_dir))
 
-    # Parse ProjectSettings.asset
+    # --- ProjectSettings.asset ---
+    project_settings_path = input_dir / "ProjectSettings" / "ProjectSettings.asset"
     if project_settings_path.is_file():
         docs = load_unity_yaml(str(project_settings_path))
         if docs:
-            # The main settings are usually in the first document
-            player_settings = docs[0].get('PlayerSettings', {})
+            player_settings = convert_to_plain_python_types(docs[0]).get('PlayerSettings', {})
             manifest_data['productName'] = player_settings.get('productName')
             manifest_data['companyName'] = player_settings.get('companyName')
             manifest_data['bundleVersion'] = player_settings.get('bundleVersion')
-        else:
-            print(f"Warning: Could not parse {project_settings_path}")
-    else:
-        print(f"Warning: {project_settings_path} not found.")
+            manifest_data['activeColorSpace'] = player_settings.get('m_ActiveColorSpace')
+            
+            # --- Mappers for human-readable values ---
+            scripting_backend_map = {0: "Mono", 1: "IL2CPP"}
+            api_compatibility_map = {3: ".NET Framework", 6: ".NET Standard 2.1"}
+
+            # --- Extract and map platform-specific settings ---
+            scripting_backends = player_settings.get('scriptingBackend', {})
+            manifest_data['scriptingBackend'] = {
+                platform: scripting_backend_map.get(val, f"Unknown ({val})")
+                for platform, val in scripting_backends.items()
+            }
+
+            api_levels = player_settings.get('apiCompatibilityLevelPerPlatform', {})
+            manifest_data['apiCompatibilityLevel'] = {
+                platform: api_compatibility_map.get(val, f"Unknown ({val})")
+                for platform, val in api_levels.items()
+            }
+            
+            # Fallback for older Unity versions that use a single key
+            if not api_levels and 'apiCompatibilityLevel' in player_settings:
+                val = player_settings.get('apiCompatibilityLevel')
+                manifest_data['apiCompatibilityLevel']['Standalone'] = api_compatibility_map.get(val, f"Unknown ({val})")
+
+            manifest_data['activeInputHandler'] = player_settings.get('activeInputHandler')
+            manifest_data['allowUnsafeCode'] = player_settings.get('allowUnsafeCode')
+            manifest_data['managedStrippingLevel'] = player_settings.get('managedStrippingLevel')
+            manifest_data['scriptingDefineSymbols'] = player_settings.get('scriptingDefineSymbols')
+
+            # --- Deduce configured platforms from various settings ---
+            configured_platforms = set()
+            if 'applicationIdentifier' in player_settings:
+                configured_platforms.update(player_settings['applicationIdentifier'].keys())
+            if 'scriptingBackend' in player_settings:
+                configured_platforms.update(player_settings['scriptingBackend'].keys())
+            manifest_data['configuredPlatforms'] = sorted(list(configured_platforms))
 
-    # Determine rendering mode from EditorSettings.asset
+            # --- Filter managedStrippingLevel based on configured platforms ---
+            managed_stripping_level = player_settings.get('managedStrippingLevel', {})
+            manifest_data['managedStrippingLevel'] = {
+                platform: level
+                for platform, level in managed_stripping_level.items()
+                if platform in manifest_data['configuredPlatforms']
+            }
+
+            # --- Populate all configured platforms for scripting settings ---
+            default_api_level = player_settings.get('apiCompatibilityLevel')
+            
+            final_scripting_backends = {}
+            final_api_levels = {}
+
+            for platform in manifest_data['configuredPlatforms']:
+                # Scripting Backend (Default to Mono if not specified)
+                backend_val = scripting_backends.get(platform, 0)
+                final_scripting_backends[platform] = scripting_backend_map.get(backend_val, f"Unknown ({backend_val})")
+
+                # API Compatibility Level (Default to project's global setting if not specified)
+                level_val = api_levels.get(platform, default_api_level)
+                final_api_levels[platform] = api_compatibility_map.get(level_val, f"Unknown ({level_val})")
+            
+            manifest_data['scriptingBackend'] = final_scripting_backends
+            manifest_data['apiCompatibilityLevel'] = final_api_levels
+
+    # --- EditorSettings.asset for 2D/3D Mode ---
+    editor_settings_path = input_dir / "ProjectSettings" / "EditorSettings.asset"
     if editor_settings_path.is_file():
         docs = load_unity_yaml(str(editor_settings_path))
         if docs:
-            editor_settings = docs[0].get('EditorSettings', {})
-            render_pipeline = editor_settings.get('m_RenderPipelineAsset')
-            if render_pipeline and render_pipeline.get('guid'):
-                # This indicates URP or HDRP is likely in use.
-                # A more robust check would be to map this guid to the actual asset.
-                manifest_data['renderPipeline'] = 'Scriptable' 
-            else:
-                manifest_data['renderPipeline'] = 'Built-in'
-        else:
-            print(f"Warning: Could not parse {editor_settings_path}")
-    else:
-        print(f"Warning: {editor_settings_path} not found.")
+            editor_settings = convert_to_plain_python_types(docs[0]).get('EditorSettings', {})
+            manifest_data['projectMode'] = "2D" if editor_settings.get('m_DefaultBehaviorMode') == 1 else "3D"
+
+    # --- GraphicsSettings.asset for Render Pipeline ---
+    graphics_settings_path = input_dir / "ProjectSettings" / "GraphicsSettings.asset"
+    manifest_data['renderPipeline'] = 'Built-in'
+    if graphics_settings_path.is_file():
+        docs = load_unity_yaml(str(graphics_settings_path))
+        if docs:
+            graphics_settings = convert_to_plain_python_types(docs[0]).get('GraphicsSettings', {})
+            pipeline_ref = graphics_settings.get('m_CustomRenderPipeline') or graphics_settings.get('m_SRPDefaultSettings', {}).get('UnityEngine.Rendering.Universal.UniversalRenderPipeline')
+            if pipeline_ref and pipeline_ref.get('guid'):
+                guid = pipeline_ref['guid']
+                if guid in guid_map:
+                    asset_path = Path(guid_map[guid]).name.upper()
+                    if "URP" in asset_path: manifest_data['renderPipeline'] = 'URP'
+                    elif "HDRP" in asset_path: manifest_data['renderPipeline'] = 'HDRP'
+                    else: manifest_data['renderPipeline'] = 'Scriptable'
+
+    # --- TagManager.asset ---
+    tag_manager_path = input_dir / "ProjectSettings" / "TagManager.asset"
+    if tag_manager_path.is_file():
+        docs = load_unity_yaml(str(tag_manager_path))
+        if docs:
+            tag_manager = convert_to_plain_python_types(docs[0]).get('TagManager', {})
+            manifest_data['tags'] = tag_manager.get('tags')
+            layers_list = tag_manager.get('layers', [])
+            # Only include layers that have a name, preserving their index
+            manifest_data['layers'] = {i: name for i, name in enumerate(layers_list) if name}
+
+    # --- EditorBuildSettings.asset ---
+    build_settings_path = input_dir / "ProjectSettings" / "EditorBuildSettings.asset"
+    if build_settings_path.is_file():
+        docs = load_unity_yaml(str(build_settings_path))
+        if docs:
+            build_settings = convert_to_plain_python_types(docs[0]).get('EditorBuildSettings', {})
+            manifest_data['buildScenes'] = [
+                {'path': scene.get('path'), 'enabled': scene.get('enabled') == 1}
+                for scene in build_settings.get('m_Scenes', [])
+            ]
 
-    # Write the combined data to manifest.json
+    # --- TimeManager.asset ---
+    time_manager_path = input_dir / "ProjectSettings" / "TimeManager.asset"
+    if time_manager_path.is_file():
+        docs = load_unity_yaml(str(time_manager_path))
+        if docs:
+            time_manager = convert_to_plain_python_types(docs[0]).get('TimeManager', {})
+            # Cherry-pick only the useful time settings
+            manifest_data['timeSettings'] = {
+                'Fixed Timestep': time_manager.get('Fixed Timestep'),
+                'Maximum Allowed Timestep': time_manager.get('Maximum Allowed Timestep'),
+                'm_TimeScale': time_manager.get('m_TimeScale'),
+                'Maximum Particle Timestep': time_manager.get('Maximum Particle Timestep')
+            }
+
+    # --- Physics Settings ---
+    manifest_data['physicsSettings'] = parse_physics_settings(input_dir, manifest_data.get('projectMode', '3D'))
+
+    # --- Write manifest.json ---
     manifest_output_path = output_dir / "manifest.json"
     try:
         with open(manifest_output_path, 'w', encoding='utf-8') as f:
-            json.dump(manifest_data, f, indent=4)
+            json.dump(manifest_data, f, separators=(',', ':'))
         print(f"Successfully created manifest.json at {manifest_output_path}")
     except IOError as e:
-        print(f"Error: Could not write to {manifest_output_path}. {e}", file=sys.stderr)
+        print(f"Error writing to {manifest_output_path}. {e}", file=sys.stderr)
+
 
 def parse_package_manifests(input_dir, output_dir):
     """
-    Parses package manifests and creates a combined packages.json file.
+    Parses the primary package manifest and creates a clean packages.json file.
     """
     print("\n--- Starting Task 3: Package Manifest Extractor ---")
     
     manifest_path = input_dir / "Packages" / "manifest.json"
-    lock_path = input_dir / "Packages" / "packages-lock.json"
     
-    packages_data = {}
-
-    # Read manifest.json
     if manifest_path.is_file():
         try:
             with open(manifest_path, 'r', encoding='utf-8') as f:
-                packages_data['manifest'] = json.load(f)
-        except (IOError, json.JSONDecodeError) as e:
-            print(f"Error reading {manifest_path}: {e}", file=sys.stderr)
-    else:
-        print(f"Warning: {manifest_path} not found.")
+                packages_data = json.load(f)
+            
+            packages_output_path = output_dir / "packages.json"
+            with open(packages_output_path, 'w', encoding='utf-8') as f:
+                json.dump(packages_data, f, separators=(',', ':')) # Compact output
+            print(f"Successfully created packages.json at {packages_output_path}")
 
-    # Read packages-lock.json
-    if lock_path.is_file():
-        try:
-            with open(lock_path, 'r', encoding='utf-8') as f:
-                packages_data['lock'] = json.load(f)
         except (IOError, json.JSONDecodeError) as e:
-            print(f"Error reading {lock_path}: {e}", file=sys.stderr)
+            print(f"Error processing {manifest_path}: {e}", file=sys.stderr)
     else:
-        print(f"Warning: {lock_path} not found.")
-
-    # Write the combined data to packages.json
-    if packages_data:
-        packages_output_path = output_dir / "packages.json"
-        try:
-            with open(packages_output_path, 'w', encoding='utf-8') as f:
-                json.dump(packages_data, f, indent=4)
-            print(f"Successfully created packages.json at {packages_output_path}")
-        except IOError as e:
-            print(f"Error: Could not write to {packages_output_path}. {e}", file=sys.stderr)
+        print(f"Warning: {manifest_path} not found.")
 
 def generate_guid_mappers(input_dir, output_dir):
     """
@@ -120,16 +236,10 @@ def generate_guid_mappers(input_dir, output_dir):
     meta_files = find_files_by_extension(str(assets_dir), '.meta')
     print(f"Found {len(meta_files)} .meta files to process.")
 
-    # Asset type mapping based on file extensions
     asset_type_map = {
-        '.prefab': 'prefabs',
-        '.unity': 'scenes',
-        '.mat': 'materials',
-        '.cs': 'scripts',
-        '.png': 'textures',
-        '.jpg': 'textures',
-        '.jpeg': 'textures',
-        '.asset': 'scriptable_objects',
+        '.prefab': 'prefabs', '.unity': 'scenes', '.mat': 'materials',
+        '.cs': 'scripts', '.png': 'textures', '.jpg': 'textures',
+        '.jpeg': 'textures', '.asset': 'scriptable_objects',
     }
     
     guid_maps = {value: {} for value in asset_type_map.values()}
@@ -138,13 +248,17 @@ def generate_guid_mappers(input_dir, output_dir):
     for meta_file_path_str in meta_files:
         meta_file_path = Path(meta_file_path_str)
         asset_file_path = Path(meta_file_path_str.rsplit('.meta', 1)[0])
-        
+
+        # THE FIX: Ensure that the corresponding path is a file, not a directory
+        if not asset_file_path.is_file():
+            continue
+
         guid = None
         try:
             with open(meta_file_path, 'r', encoding='utf-8') as f:
                 for line in f:
                     if line.strip().startswith('guid:'):
-                        guid = line.split(':')[1].strip()
+                        guid = line.strip().split(':')[1].strip()
                         break
         except Exception as e:
             print(f"Warning: Could not read or parse guid from {meta_file_path}. {e}", file=sys.stderr)
@@ -153,25 +267,21 @@ def generate_guid_mappers(input_dir, output_dir):
         if guid:
             asset_ext = asset_file_path.suffix.lower()
             asset_type = asset_type_map.get(asset_ext, 'others')
-            
-            # Make path relative to the input directory for consistency
             relative_path = asset_file_path.relative_to(input_dir).as_posix()
             guid_maps[asset_type][guid] = relative_path
 
-    # Write the GUID maps to separate JSON files
     mappers_dir = output_dir / "GuidMappers"
     try:
         mappers_dir.mkdir(parents=True, exist_ok=True)
         for asset_type, guid_map in guid_maps.items():
-            if guid_map: # Only write files for types that have assets
+            if guid_map:
                 output_path = mappers_dir / f"{asset_type}.json"
                 with open(output_path, 'w', encoding='utf-8') as f:
-                    json.dump(guid_map, f, indent=4)
+                    json.dump(guid_map, f, separators=(',', ':')) # Compact output
         print(f"Successfully created GUID mappers in {mappers_dir}")
     except OSError as e:
         print(f"Error: Could not create GUID mapper directory or files. {e}", file=sys.stderr)
 
-
 def main():
     """
     Main function to run the high-level data extraction process.
@@ -179,21 +289,10 @@ def main():
     parser = argparse.ArgumentParser(
         description="Extracts high-level summary data from a Unity project."
     )
-    parser.add_argument(
-        "--input",
-        type=str,
-        required=True,
-        help="The root directory of the target Unity project."
-    )
-    parser.add_argument(
-        "--output",
-        type=str,
-        required=True,
-        help="The directory where the generated output folder will be saved."
-    )
+    parser.add_argument("--input", type=str, required=True, help="The root directory of the target Unity project.")
+    parser.add_argument("--output", type=str, required=True, help="The directory where the generated output folder will be saved.")
     args = parser.parse_args()
 
-    # --- 1. Validate inputs and set up paths ---
     input_dir = Path(args.input)
     output_dir = Path(args.output)
 
@@ -201,7 +300,6 @@ def main():
         print(f"Error: Input path '{input_dir}' is not a valid directory.", file=sys.stderr)
         sys.exit(1)
 
-    # Create the main output folder, named "HighLevel"
     high_level_output_dir = output_dir / "HighLevel"
     try:
         high_level_output_dir.mkdir(parents=True, exist_ok=True)
@@ -210,21 +308,12 @@ def main():
         print(f"Error: Could not create output directory '{high_level_output_dir}'. {e}", file=sys.stderr)
         sys.exit(1)
 
-    # --- Run Extraction Tasks ---
-    print("\nMilestone 2, Task 1 Complete: Argument parsing and folder creation successful.")
-    
-    # Run Task 2
     parse_project_settings(input_dir, high_level_output_dir)
-
-    # Run Task 3
     parse_package_manifests(input_dir, high_level_output_dir)
-
-    # Run Task 4
     generate_guid_mappers(input_dir, high_level_output_dir)
 
     print("\nHigh-level extraction complete.")
 
-
 if __name__ == "__main__":
     main()
 

+ 1 - 1
Assets/LLM/source/extract_low_level.py

@@ -82,7 +82,7 @@ def main():
                     output_json_path = asset_output_dir / f"{file_id}.json"
                     try:
                         with open(output_json_path, 'w', encoding='utf-8') as f:
-                            json.dump(go_data, f, indent=4)
+                            json.dump(go_data, f, separators=(',', ':'))
                     except IOError as e:
                         print(f"Error writing to {output_json_path}: {e}", file=sys.stderr)
         else:

+ 1 - 1
Assets/LLM/source/extract_mid_level.py

@@ -113,7 +113,7 @@ def main():
             # Save the JSON output
             try:
                 with open(output_path, 'w', encoding='utf-8') as f:
-                    json.dump(virtual_tree, f, indent=4)
+                    json.dump(virtual_tree, f, separators=(',', ':'))
                 print(f"Successfully created: {output_path}")
             except IOError as e:
                 print(f"Error writing to {output_path}: {e}", file=sys.stderr)

二进制
Assets/LLM/source/utils/__pycache__/file_utils.cpython-313.pyc


二进制
Assets/LLM/source/utils/__pycache__/virtual_tree_builder.cpython-313.pyc


二进制
Assets/LLM/source/utils/__pycache__/yaml_utils.cpython-313.pyc


+ 35 - 16
Assets/LLM/source/utils/file_utils.py

@@ -50,25 +50,44 @@ def replicate_directory_structure(source_root, target_root):
         # Create the directory in the target, ignoring if it already exists
         target_path.mkdir(parents=True, exist_ok=True)
 
-def create_guid_to_path_map(assets_dir):
+def create_guid_to_path_map(root_dir):
     """
-    Creates a dictionary mapping GUIDs to their corresponding asset file paths.
+    Creates a dictionary mapping GUIDs to their corresponding asset file paths,
+    scanning both the Assets and Packages directories and ignoring folders.
     """
     guid_map = {}
-    meta_files = find_files_by_extension(assets_dir, '.meta')
-    for meta_file_path in meta_files:
-        asset_path = meta_file_path[:-5] # Remove .meta
-        guid = None
-        try:
-            with open(meta_file_path, 'r', encoding='utf-8') as f:
-                for line in f:
-                    if line.strip().startswith('guid:'):
-                        guid = line.split(':')[1].strip()
-                        break
-            if guid:
-                guid_map[guid] = asset_path
-        except Exception as e:
-            print(f"Warning: Could not process meta file {meta_file_path}. {e}", file=sys.stderr)
+    scan_dirs = [
+        os.path.join(root_dir, 'Assets'),
+        os.path.join(root_dir, 'Packages'),
+        os.path.join(root_dir, 'Library', 'PackageCache')
+    ]
+
+    for directory in scan_dirs:
+        if not os.path.isdir(directory):
+            continue
+            
+        meta_files = find_files_by_extension(directory, '.meta')
+        for meta_file_path in meta_files:
+            asset_path = meta_file_path[:-5]
+            guid = None
+            is_folder = False
+            try:
+                with open(meta_file_path, 'r', encoding='utf-8') as f:
+                    for line in f:
+                        stripped_line = line.strip()
+                        if stripped_line.startswith('guid:'):
+                            guid = stripped_line.split(':')[1].strip()
+                        if stripped_line == 'folderAsset: yes':
+                            is_folder = True
+                            break
+                
+                if is_folder:
+                    continue
+
+                if guid and guid not in guid_map:
+                    guid_map[guid] = asset_path
+            except Exception as e:
+                print(f"Warning: Could not process meta file {meta_file_path}. {e}", file=sys.stderr)
     return guid_map
 
 if __name__ == '__main__':

+ 1 - 1
Assets/LLM/source/utils/json_reducer.py

@@ -90,7 +90,7 @@ def main():
     # --- 4. Write to the output file ---
     try:
         with open(args.output_file, 'w', encoding='utf-8') as f:
-            json.dump(final_output, f, indent=4) # Using indent for readability, can be removed for max reduction
+            json.dump(final_output, f, separators=(',', ':'))
         print(f"Successfully created tokenized JSON at: {args.output_file}")
     except IOError as e:
         print(f"Error: Could not write to output file {args.output_file}. {e}", file=sys.stderr)

+ 9 - 1
Assets/LLM/source/utils/yaml_utils.py

@@ -10,9 +10,17 @@ warnings.simplefilter('ignore', ruamel.yaml.error.MantissaNoDotYAML1_1Warning)
 def convert_to_plain_python_types(data):
     """
     Recursively converts ruamel.yaml specific types into plain Python types.
+    Also handles converting boolean keys to strings to avoid YAML parsing issues.
     """
     if isinstance(data, ruamel.yaml.comments.CommentedMap):
-        return {convert_to_plain_python_types(k): convert_to_plain_python_types(v) for k, v in data.items()}
+        new_dict = {}
+        for k, v in data.items():
+            key = k
+            if isinstance(k, bool):
+                # This is a specific workaround for the 'y' key in vectors being parsed as True
+                key = 'y' if k is True else str(k)
+            new_dict[key] = convert_to_plain_python_types(v)
+        return new_dict
     elif isinstance(data, ruamel.yaml.comments.CommentedSeq):
         return [convert_to_plain_python_types(i) for i in data]
     elif isinstance(data, ruamel.yaml.scalarstring.ScalarString):