123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- import json
- import sys
- from pathlib import Path
- # Ensure the reducer is importable
- try:
- from json_reducer import tokenize_from_object
- except ImportError:
- print("Warning: json_reducer could not be imported. The 'shrink' feature will be disabled.", file=sys.stderr)
- # Define a dummy function if the import fails
- def tokenize_from_object(data):
- return data, {}
- def _remove_empty_keys(data):
- """
- Recursively removes keys from dictionaries that have None, empty list, or empty dict values.
- """
- if isinstance(data, dict):
- # Rebuild the dictionary, only including items that don't have empty values.
- # Recursively process the values to clean nested structures.
- return {k: _remove_empty_keys(v) for k, v in data.items() if v is not None and v != [] and v != {}}
- elif isinstance(data, list):
- # Recursively process each item in the list.
- return [_remove_empty_keys(item) for item in data]
- else:
- # Return all other data types (strings, numbers, etc.) as is.
- return data
- def write_json(data, file_path, indent=None, shrink=False, ensure_ascii=False):
- """
- Centralized function to write Python objects to a JSON file.
- Args:
- data: The Python object (e.g., dict, list) to serialize.
- file_path: The path to the output file.
- indent: The indentation level for pretty-printing. Defaults to None (compact).
- shrink: If True, tokenizes JSON keys and removes empty values to reduce size.
- ensure_ascii: Whether to escape non-ASCII characters. Defaults to False.
- """
- final_data = data
- output_indent = indent
- if shrink:
- # First, remove any keys with empty values from the original data.
- cleaned_data = _remove_empty_keys(data)
-
- # Then, tokenize the keys of the cleaned data.
- tokenized_data, key_mapper = tokenize_from_object(cleaned_data)
-
- final_data = {
- "key_mapper": key_mapper,
- "data": tokenized_data
- }
- # When shrinking, always use compact formatting for maximum size reduction.
- output_indent = None
-
- # If indent is 0, it should be treated as compact (None for json.dump)
- if output_indent == 0:
- output_indent = None
- try:
- with open(file_path, 'w', encoding='utf-8') as f:
- if output_indent is None:
- json.dump(final_data, f, ensure_ascii=ensure_ascii, separators=(',', ':'))
- else:
- json.dump(final_data, f, indent=output_indent, ensure_ascii=ensure_ascii)
- except IOError as e:
- print(f"Error writing JSON to {file_path}: {e}", file=sys.stderr)
- raise
- except TypeError as e:
- print(f"Error serializing data to JSON: {e}", file=sys.stderr)
- raise
|