Source code for sloth.mmcif.exporter

#!/usr/bin/env python3
"""
JSON Exporter for SLOTH - Export mmCIF data to nested JSON format.

This module provides functionality to export mmCIF data to nested JSON format
using the RelationshipResolver from serializer.py, working directly from mmCIF dictionary.
"""

import json
import os
from pathlib import Path
from typing import Dict, Any, Optional, Union
from .models import MMCIFDataContainer
from .common import BaseExporter
from .serializer import (
    RelationshipResolver,
    DictionaryParser,
    MappingGenerator,
    get_cache_manager
)


[docs] class JSONExporter(BaseExporter): """Export mmCIF data to nested JSON format."""
[docs] def __init__( self, dict_path: Optional[Union[str, Path]] = None, cache_dir: Optional[str] = None, quiet: bool = False, denormalize: bool = False ): """Initialize the JSON exporter. Args: dict_path: Path to mmCIF dictionary file cache_dir: Directory for caching quiet: Suppress output messages denormalize: If True, embed reference/lookup data for full denormalization """ super().__init__(dict_path, cache_dir, quiet) # Set up JSON-specific components cache_manager = get_cache_manager( self.cache_dir or os.path.join(os.path.expanduser("~"), ".sloth_cache") ) # Set up dictionary parser dict_parser = DictionaryParser(cache_manager, self.quiet) dict_parser.source = self.dict_path # Set up mapping generator mapping_generator = MappingGenerator(dict_parser, cache_manager, self.quiet) self.resolver = RelationshipResolver(mapping_generator) self.resolver.set_denormalize(denormalize)
[docs] def export_data( self, mmcif_data: MMCIFDataContainer, file_path: Optional[Union[str, Path]] = None, indent: Optional[int] = None ) -> Optional[str]: """ Export mmCIF data to JSON format (always nested). Args: mmcif_data: The mmCIF data container to export file_path: Path to save the file (optional) indent: Number of spaces for indentation (None for compact output) Returns: JSON string if no file_path provided, otherwise None """ # Get nested JSON using relationship resolution nested_data = self._to_nested_json(mmcif_data) # Convert to JSON string json_str = json.dumps(nested_data, indent=indent, ensure_ascii=False) if file_path: with open(file_path, "w", encoding="utf-8") as f: f.write(json_str) if not self.quiet: print(f"Exported nested JSON to: {file_path}") return None else: return json_str
def _to_nested_json( self, mmcif_data: MMCIFDataContainer ) -> Dict[str, Any]: """ Export mmCIF data to nested JSON format using relationship resolution. Args: mmcif_data: The mmCIF data container to export Returns: Nested JSON dictionary with resolved relationships and block structure """ # Handle multiple blocks by processing each separately to preserve block boundaries result = {} for block in mmcif_data: # Create a temporary container with just this block single_block_container = MMCIFDataContainer() single_block_container.data[block.name] = block # Resolve relationships directly from mmCIF nested_categories = self.resolver.resolve_relationships(single_block_container) # Add underscore prefix to category names for consistency with flat format and external API prefixed_categories = {} for category_name, category_data in nested_categories.items(): if not category_name.startswith("_"): prefixed_name = f"_{category_name}" else: prefixed_name = category_name prefixed_categories[prefixed_name] = category_data # Use block name directly from the block object # Make sure to include the data_ prefix for external API consistency block_name = block.name if not block_name.startswith("data_"): block_name = f"data_{block_name}" result[block_name] = prefixed_categories return result