import json import sys from pathlib import Path # Ensure the reducer is importable try: from json_reducer import tokenize_from_object except ImportError: print("Warning: json_reducer could not be imported. The 'shrink' feature will be disabled.", file=sys.stderr) # Define a dummy function if the import fails def tokenize_from_object(data): return data, {} def _remove_empty_keys(data): """ Recursively removes keys from dictionaries that have None, empty list, or empty dict values. """ if isinstance(data, dict): # Rebuild the dictionary, only including items that don't have empty values. # Recursively process the values to clean nested structures. return {k: _remove_empty_keys(v) for k, v in data.items() if v is not None and v != [] and v != {}} elif isinstance(data, list): # Recursively process each item in the list. return [_remove_empty_keys(item) for item in data] else: # Return all other data types (strings, numbers, etc.) as is. return data def write_json(data, file_path, indent=None, shrink=False, ensure_ascii=False): """ Centralized function to write Python objects to a JSON file. Args: data: The Python object (e.g., dict, list) to serialize. file_path: The path to the output file. indent: The indentation level for pretty-printing. Defaults to None (compact). shrink: If True, tokenizes JSON keys and removes empty values to reduce size. ensure_ascii: Whether to escape non-ASCII characters. Defaults to False. """ final_data = data output_indent = indent if shrink: # First, remove any keys with empty values from the original data. cleaned_data = _remove_empty_keys(data) # Then, tokenize the keys of the cleaned data. tokenized_data, key_mapper = tokenize_from_object(cleaned_data) final_data = { "key_mapper": key_mapper, "data": tokenized_data } # When shrinking, always use compact formatting for maximum size reduction. output_indent = None # If indent is 0, it should be treated as compact (None for json.dump) if output_indent == 0: output_indent = None try: with open(file_path, 'w', encoding='utf-8') as f: if output_indent is None: json.dump(final_data, f, ensure_ascii=ensure_ascii, separators=(',', ':')) else: json.dump(final_data, f, indent=output_indent, ensure_ascii=ensure_ascii) except IOError as e: print(f"Error writing JSON to {file_path}: {e}", file=sys.stderr) raise except TypeError as e: print(f"Error serializing data to JSON: {e}", file=sys.stderr) raise