Advertisement
davidleathers113

Skype Parser

Mar 13th, 2025
106
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 161.67 KB | Source Code | 0 0
  1. # ═════════════════════════════════════════════════════════════════════════════
  2. # ═══════════════════════════ ENHANCEMENT OVERVIEW ═══════════════════════════
  3. # ═════════════════════════════════════════════════════════════════════════════
  4. """
  5. SkypeExporter Enhancements:
  6.  
  7. 1. Basic Mode:
  8.   - Added simplified procedural workflow
  9.   - Streamlined user interaction
  10.   - Direct prompts with clear instructions
  11.  
  12. 2. Enhanced Filename Sanitization:
  13.   - Reserved Windows name handling
  14.   - Cross-platform compatibility
  15.   - Length limits enforcement
  16.   - Special character handling
  17.  
  18. 3. Memory Profiling & Optimization:
  19.   - Memory usage tracking
  20.   - Dynamic batch size adjustment
  21.   - Automated garbage collection
  22.   - System-aware resource allocation
  23.  
  24. 4. PostgreSQL Export:
  25.   - Normalized database schema
  26.   - SQLAlchemy ORM integration
  27.   - Batch insertion optimization
  28.   - Connection pooling and management
  29.   - Configurable database settings
  30. """
  31.  
  32. # ═════════════════════════════════════════════════════════════════════════════
  33. # ═══════════════════════════ IMPORTS AND SETUP ═══════════════════════════════
  34. # ═════════════════════════════════════════════════════════════════════════════
  35.  
  36. import argparse
  37. import asyncio
  38. import concurrent.futures
  39. import dataclasses
  40. import datetime
  41. import fnmatch
  42. import gc
  43. import html
  44. import importlib.metadata
  45. import json
  46. import logging
  47. import os
  48. import platform
  49. import psutil
  50. import re
  51. import shutil
  52. import signal
  53. import sys
  54. import tarfile
  55. import tempfile
  56. import time
  57. import traceback
  58. import uuid
  59. import zipfile
  60. from abc import ABC, abstractmethod
  61. from contextlib import contextmanager
  62. from dataclasses import dataclass, field
  63. from enum import Enum, auto
  64. from pathlib import Path
  65. from typing import (Any, Dict, Generator, List, Optional, Set, Tuple)
  66.  
  67. # Import for SQLAlchemy
  68. try:
  69.     from sqlalchemy import (
  70.         Column, ForeignKey, Integer, String, DateTime, Boolean, Text, create_engine,
  71.         select, func, Index, UniqueConstraint
  72.     )
  73.     from sqlalchemy.orm import relationship, Session, sessionmaker, declarative_base
  74.     from sqlalchemy.ext.declarative import declared_attr
  75.     SQLALCHEMY_AVAILABLE = True
  76. except ImportError:
  77.     SQLALCHEMY_AVAILABLE = False
  78.  
  79. # Import for Rich and other optional libraries
  80. try:
  81.     from rich.console import Console
  82.     from rich.progress import Progress, TextColumn, BarColumn, TimeElapsedColumn, TimeRemainingColumn
  83.     from rich.table import Table
  84.     from rich.panel import Panel
  85.     from rich.markdown import Markdown
  86.     RICH_AVAILABLE = True
  87. except ImportError:
  88.     RICH_AVAILABLE = False
  89.  
  90. try:
  91.     from tqdm import tqdm
  92.     TQDM_AVAILABLE = True
  93. except ImportError:
  94.     TQDM_AVAILABLE = False
  95.  
  96. # ═════════════════════════════════════════════════════════════════════════════
  97. # ═══════════════════════════ CUSTOM EXCEPTIONS ══════════════════════════════
  98. # ═════════════════════════════════════════════════════════════════════════════
  99.  
  100. class SkypeExporterError(Exception):
  101.     """Base exception for all Skype Exporter errors."""
  102.     pass
  103.  
  104. class ConfigError(SkypeExporterError):
  105.     """Error in configuration settings."""
  106.     pass
  107.  
  108. class FileReadError(SkypeExporterError):
  109.     """Error reading input files."""
  110.     pass
  111.  
  112. class FileWriteError(SkypeExporterError):
  113.     """Error writing output files."""
  114.     pass
  115.  
  116. class ParseError(SkypeExporterError):
  117.     """Error parsing Skype data."""
  118.     pass
  119.  
  120. class TimestampError(ParseError):
  121.     """Error parsing timestamps."""
  122.     pass
  123.  
  124. class ExportError(SkypeExporterError):
  125.     """Error exporting conversations."""
  126.     pass
  127.  
  128. class DatabaseError(SkypeExporterError):
  129.     """Error with database operations."""
  130.     pass
  131.  
  132. class MemoryError(SkypeExporterError):
  133.     """Error with memory management."""
  134.     pass
  135.  
  136. # ═════════════════════════════════════════════════════════════════════════════
  137. # ═══════════════════════════ DEPENDENCY MANAGEMENT ═══════════════════════════
  138. # ═════════════════════════════════════════════════════════════════════════════
  139.  
  140. REQUIRED_PACKAGES = {
  141.     "beautifulsoup4": "4.9.0",
  142.     "lxml": "4.5.0",
  143.     "colorama": "0.4.3",
  144.     "tqdm": "4.45.0",
  145.     "rich": "10.0.0",
  146.     "jinja2": "3.0.0",
  147.     "markdown": "3.3.0",
  148.     "pyyaml": "6.0.0",
  149.     "psutil": "5.8.0",  # Added for memory monitoring
  150.     "sqlalchemy": "1.4.0",  # Added for PostgreSQL export
  151.     "psycopg2-binary": "2.9.0",  # Added for PostgreSQL connection
  152.     "alembic": "1.7.0",  # Added for database migrations
  153. }
  154.  
  155. def check_dependencies() -> Dict[str, bool]:
  156.     """
  157.    Check if required dependencies are installed and at the correct version.
  158.  
  159.    Returns:
  160.        Dict[str, bool]: Dictionary of package names and whether they're properly installed
  161.    """
  162.     result = {}
  163.  
  164.     for package, min_version in REQUIRED_PACKAGES.items():
  165.         try:
  166.             installed_version = importlib.metadata.version(package)
  167.             version_ok = _compare_versions(installed_version, min_version) >= 0
  168.             result[package] = version_ok
  169.         except importlib.metadata.PackageNotFoundError:
  170.             result[package] = False
  171.  
  172.     return result
  173.  
  174. def _compare_versions(version1: str, version2: str) -> int:
  175.     """
  176.    Compare two version strings.
  177.  
  178.    Args:
  179.        version1: First version string
  180.        version2: Second version string
  181.  
  182.    Returns:
  183.        int: 1 if version1 > version2, 0 if equal, -1 if version1 < version2
  184.    """
  185.     def normalize(v):
  186.         return [int(x) for x in re.sub(r'(\.0+)*$', '', v).split(".")]
  187.  
  188.     v1 = normalize(version1)
  189.     v2 = normalize(version2)
  190.  
  191.     for i in range(max(len(v1), len(v2))):
  192.         n1 = v1[i] if i < len(v1) else 0
  193.         n2 = v2[i] if i < len(v2) else 0
  194.         if n1 > n2:
  195.             return 1
  196.         elif n1 < n2:
  197.             return -1
  198.  
  199.     return 0
  200.  
  201. def install_dependencies() -> None:
  202.     """
  203.    Check for missing dependencies and provide installation instructions.
  204.  
  205.    Instead of automatically installing packages, this now warns the user
  206.    and provides instructions for manual installation.
  207.    """
  208.     dependencies = check_dependencies()
  209.     missing = []
  210.  
  211.     for dep, installed in dependencies.items():
  212.         if not installed:
  213.             missing.append(dep)
  214.  
  215.     if missing:
  216.         print("\nWARNING: The following dependencies are missing:")
  217.         for dep in missing:
  218.             print(f"  - {dep}")
  219.  
  220.         print("\nPlease install them manually with:")
  221.         print(f"  pip install {' '.join(missing)}")
  222.         print("\nContinuing with limited functionality. Some features may not work correctly.")
  223.     else:
  224.         print("All dependencies are installed.")
  225.  
  226. # Import optional dependencies, which may fail
  227. try:
  228.     from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
  229.     import warnings
  230.     warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
  231.     BEAUTIFULSOUP_AVAILABLE = True
  232. except ImportError:
  233.     BEAUTIFULSOUP_AVAILABLE = False
  234.  
  235. try:
  236.     from rich.console import Console
  237.     from rich.progress import Progress, TextColumn, BarColumn, TimeElapsedColumn, TimeRemainingColumn
  238.     from rich.panel import Panel
  239.     from rich.table import Table
  240.     from rich.syntax import Syntax
  241.     from rich.logging import RichHandler
  242.     from rich.traceback import install as install_rich_traceback
  243.     from rich.prompt import Prompt, Confirm
  244.     RICH_AVAILABLE = True
  245.     install_rich_traceback()
  246. except ImportError:
  247.     RICH_AVAILABLE = False
  248.  
  249. try:
  250.     from colorama import init as colorama_init
  251.     from colorama import Fore, Back, Style
  252.     COLORAMA_AVAILABLE = True
  253.     colorama_init()
  254. except ImportError:
  255.     COLORAMA_AVAILABLE = False
  256.  
  257. try:
  258.     from tqdm import tqdm
  259.     TQDM_AVAILABLE = True
  260. except ImportError:
  261.     TQDM_AVAILABLE = False
  262.  
  263. try:
  264.     import markdown
  265.     MARKDOWN_AVAILABLE = True
  266. except ImportError:
  267.     MARKDOWN_AVAILABLE = False
  268.  
  269. try:
  270.     import jinja2
  271.     JINJA2_AVAILABLE = True
  272. except ImportError:
  273.     JINJA2_AVAILABLE = False
  274.  
  275. try:
  276.     import yaml
  277.     YAML_AVAILABLE = True
  278. except ImportError:
  279.     YAML_AVAILABLE = False
  280.  
  281. try:
  282.     import sqlalchemy
  283.     from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, Boolean, ForeignKey
  284.     from sqlalchemy.orm import sessionmaker, relationship, Session, declarative_base
  285.     SQLALCHEMY_AVAILABLE = True
  286. except ImportError:
  287.     SQLALCHEMY_AVAILABLE = False
  288.  
  289. try:
  290.     import psycopg2
  291.     PSYCOPG2_AVAILABLE = True
  292. except ImportError:
  293.     PSYCOPG2_AVAILABLE = False
  294.  
  295. # ═════════════════════════════════════════════════════════════════════════════
  296. # ═══════════════════════════ MEMORY MANAGEMENT ═══════════════════════════════
  297. # ═════════════════════════════════════════════════════════════════════════════
  298.  
  299. class MemoryMonitor:
  300.     """
  301.    Monitor and manage memory usage during processing.
  302.  
  303.    This class provides utilities to track memory usage, optimize batch sizes,
  304.    and trigger garbage collection based on system resources.
  305.    """
  306.  
  307.     def __init__(self, ctx: 'AppContext'):
  308.         """Initialize memory monitor with application context."""
  309.         self.ctx = ctx
  310.         self.logger = ctx.logger.getChild('memory')
  311.         self.process = psutil.Process(os.getpid())  # Initialize the process object
  312.         self.usage_history = []
  313.         self.memory_samples = []  # Restore memory samples list
  314.         self.memory_timestamps = []  # Restore timestamps list
  315.         self.peak_usage = 0
  316.         self.last_memory_percent = None
  317.         self.last_gc_time = time.time()
  318.         self.memory_target = ctx.options.memory_threshold_percent
  319.         self.check_counter = 0  # Counter for adaptive memory checks
  320.  
  321.         # Capture initial memory usage
  322.         self.record_memory_usage()
  323.  
  324.         self.logger.debug(f"Memory monitor initialized with target: {self.memory_target}%")
  325.  
  326.     def get_memory_usage_mb(self) -> float:
  327.         """
  328.        Get current memory usage in megabytes.
  329.  
  330.        Returns:
  331.            Memory usage in MB
  332.        """
  333.         return self.process.memory_info().rss / (1024 * 1024)
  334.  
  335.     def get_memory_percent(self) -> float:
  336.         """
  337.        Get memory usage as percentage of system memory.
  338.  
  339.        Returns:
  340.            Memory usage percentage
  341.        """
  342.         return self.process.memory_percent()
  343.  
  344.     def get_system_memory_mb(self) -> float:
  345.         """
  346.        Get total system memory in megabytes.
  347.  
  348.        Returns:
  349.            Total system memory in MB
  350.        """
  351.         return psutil.virtual_memory().total / (1024 * 1024)
  352.  
  353.     def record_memory_usage(self) -> None:
  354.         """Record current memory usage for tracking."""
  355.         current_usage_mb = self.get_memory_usage_mb()
  356.         current_time = time.time()
  357.  
  358.         # Record in both tracking mechanisms for backward compatibility
  359.         self.usage_history.append(current_usage_mb)
  360.         self.memory_samples.append(current_usage_mb)
  361.         self.memory_timestamps.append(current_time)
  362.  
  363.         # Keep only the last 100 samples in both arrays
  364.         if len(self.memory_samples) > 100:
  365.             self.memory_samples.pop(0)
  366.             self.memory_timestamps.pop(0)
  367.  
  368.         if len(self.usage_history) > 100:
  369.             self.usage_history.pop(0)
  370.  
  371.     def check_memory(self) -> bool:
  372.         """
  373.        Check memory usage and optimize if needed.
  374.  
  375.        Returns:
  376.            True if optimization was performed, False otherwise
  377.        """
  378.         # Use an adaptive check interval based on previous memory usage
  379.         self.check_counter += 1
  380.  
  381.         # Default intervals for memory checks (operations between checks)
  382.         low_usage_interval = 100    # Less frequent checks when memory usage is low
  383.         medium_usage_interval = 25  # Medium frequency checks
  384.         high_usage_interval = 5     # Frequent checks when memory is high
  385.  
  386.         # Determine the check interval based on last measured memory percentage
  387.         if self.last_memory_percent is None:
  388.             check_interval = medium_usage_interval
  389.         elif self.last_memory_percent < 30:
  390.             check_interval = low_usage_interval
  391.         elif self.last_memory_percent < 60:
  392.             check_interval = medium_usage_interval
  393.         else:
  394.             check_interval = high_usage_interval
  395.  
  396.         # Skip check if we haven't reached the interval, unless it's the first check
  397.         if self.check_counter % check_interval != 0 and self.last_memory_percent is not None:
  398.             return False
  399.  
  400.         # Get current memory usage
  401.         memory_percent = self.get_memory_percent()
  402.         memory_usage_mb = self.get_memory_usage_mb()
  403.         self.last_memory_percent = memory_percent
  404.  
  405.         # Record usage for historical tracking
  406.         self.record_memory_usage()
  407.  
  408.         if memory_usage_mb > self.peak_usage:
  409.             self.peak_usage = memory_usage_mb
  410.  
  411.         # Check if memory usage exceeds threshold
  412.         if memory_percent > self.memory_target:
  413.             self.logger.warning(
  414.                 f"Memory usage high: {memory_percent:.1f}% ({memory_usage_mb:.1f} MB), "
  415.                 f"optimizing..."
  416.             )
  417.             self._optimize_memory()
  418.             return True
  419.  
  420.         # Occasionally collect garbage even if memory usage is low
  421.         # but at a lower frequency (every 5000 operations or 60 seconds)
  422.         elif (self.check_counter % 5000 == 0 or
  423.               (time.time() - self.last_gc_time > 60)):
  424.             self.logger.debug(
  425.                 f"Performing routine garbage collection: {memory_percent:.1f}% "
  426.                 f"({memory_usage_mb:.1f} MB)"
  427.             )
  428.             self._collect_garbage()
  429.  
  430.         self.logger.debug(
  431.             f"Memory usage: {memory_percent:.1f}% ({memory_usage_mb:.1f} MB) "
  432.             f"of {self.get_system_memory_mb():.1f} MB"
  433.         )
  434.  
  435.         return False
  436.  
  437.     def _optimize_memory(self) -> None:
  438.         """Optimize memory usage by adjusting batch sizes and collecting garbage."""
  439.         self.logger.info("Optimizing memory usage...")
  440.  
  441.         # Reduce batch size to conserve memory
  442.         current_batch_size = self.ctx.options.batch_size
  443.         new_batch_size = max(100, current_batch_size // 2)
  444.  
  445.         if new_batch_size < current_batch_size:
  446.             self.logger.info(f"Reducing batch size from {current_batch_size} to {new_batch_size}")
  447.             self.ctx.options.batch_size = new_batch_size
  448.  
  449.         # Reduce max workers if memory usage is very high
  450.         if self.get_memory_percent() > 90 and self.ctx.options.max_workers > 2:
  451.             self.logger.warning("Critical memory usage - reducing worker threads")
  452.             self.ctx.options.max_workers = max(1, self.ctx.options.max_workers // 2)
  453.  
  454.         # Force garbage collection
  455.         self._collect_garbage()
  456.  
  457.     def _collect_garbage(self) -> None:
  458.         """Force garbage collection to free memory."""
  459.         self.logger.debug("Running garbage collection...")
  460.  
  461.         before_mb = self.get_memory_usage_mb()
  462.         gc.collect()
  463.         after_mb = self.get_memory_usage_mb()
  464.  
  465.         freed_mb = before_mb - after_mb
  466.         self.logger.debug(f"Garbage collection freed {freed_mb:.2f} MB")
  467.  
  468.     def calculate_optimal_batch_size(self, item_count: int) -> int:
  469.         """
  470.        Calculate optimal batch size based on available system resources.
  471.  
  472.        Args:
  473.            item_count: Total number of items to process
  474.  
  475.        Returns:
  476.            Optimal batch size
  477.        """
  478.         # Get available memory in MB
  479.         available_memory = psutil.virtual_memory().available / (1024 * 1024)
  480.  
  481.         # Estimate memory per item (using exponential moving average if we have samples)
  482.         current_memory = self.get_memory_usage_mb()
  483.         memory_per_item = 0.1  # Default assumption: 100KB per item
  484.  
  485.         # Calculate optimal batch size - aim to use at most 20% of available memory
  486.         max_memory_to_use = available_memory * 0.2
  487.         optimal_batch_size = int(max_memory_to_use / memory_per_item)
  488.  
  489.         # Constrain within reasonable limits
  490.         optimal_batch_size = min(optimal_batch_size, 5000)  # Never go above 5000
  491.         optimal_batch_size = max(optimal_batch_size, 100)   # Never go below 100
  492.  
  493.         # Round to nearest 100 for cleaner numbers
  494.         optimal_batch_size = round(optimal_batch_size / 100) * 100
  495.  
  496.         self.logger.debug(f"Calculated optimal batch size: {optimal_batch_size} "
  497.                         f"(available memory: {available_memory:.2f} MB)")
  498.  
  499.         return optimal_batch_size
  500.  
  501.     def get_memory_report(self) -> Dict[str, Any]:
  502.         """
  503.        Generate a report on memory usage.
  504.  
  505.        Returns:
  506.            Dictionary with memory statistics
  507.        """
  508.         return {
  509.             "current_usage_mb": self.get_memory_usage_mb(),
  510.             "current_usage_percent": self.get_memory_percent(),
  511.             "peak_usage_mb": max(self.memory_samples) if self.memory_samples else self.get_memory_usage_mb(),
  512.             "system_memory_mb": self.get_system_memory_mb(),
  513.             "batch_size": self.ctx.options.batch_size,
  514.             "max_workers": self.ctx.options.max_workers
  515.         }
  516.  
  517. # ═════════════════════════════════════════════════════════════════════════════
  518. # ═══════════════════════════ FILEPATH UTILITIES ═══════════════════════════════
  519. # ═════════════════════════════════════════════════════════════════════════════
  520.  
  521. def sanitize_filename(name: str, max_length: int = 200) -> str:
  522.     """
  523.    Sanitize a string to be used as a filename across all platforms.
  524.  
  525.    Handles invalid characters, reserved Windows names, and length limitations.
  526.  
  527.    Args:
  528.        name: Original name to sanitize
  529.        max_length: Maximum length for the filename
  530.  
  531.    Returns:
  532.        Sanitized filename string safe for all platforms
  533.    """
  534.     if not name:
  535.         return "unnamed"
  536.  
  537.     # Handle file system restrictions
  538.     # 1. Replace invalid characters
  539.     sanitized = re.sub(r'[<>:"/\\|?*\x00-\x1F]', '_', name)
  540.  
  541.     # 2. Check for reserved Windows names (CON, PRN, AUX, etc.)
  542.     reserved_names = {
  543.         'CON', 'PRN', 'AUX', 'NUL',
  544.         'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
  545.         'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'
  546.     }
  547.  
  548.     # Check if name matches a reserved name (either exactly or with an extension)
  549.     name_parts = sanitized.split('.')
  550.     if name_parts[0].upper() in reserved_names:
  551.         sanitized = f"_{sanitized}"
  552.  
  553.     # 3. Enforce length limit with smart truncation
  554.     if len(sanitized) > max_length:
  555.         # Keep the extension if present
  556.         if '.' in sanitized:
  557.             extension = '.' + sanitized.split('.')[-1]
  558.             base_name = '.'.join(sanitized.split('.')[:-1])
  559.  
  560.             # Truncate the base name, leaving room for ellipsis and extension
  561.             available_length = max_length - len(extension) - 3  # 3 for "..."
  562.             sanitized = base_name[:available_length] + "..." + extension
  563.         else:
  564.             sanitized = sanitized[:max_length-3] + "..."
  565.  
  566.     # 4. Ensure name doesn't end with space or period (Windows restriction)
  567.     sanitized = sanitized.rstrip(' .')
  568.  
  569.     # 5. If empty after sanitization, provide a fallback
  570.     if not sanitized:
  571.         sanitized = "unnamed_file"
  572.  
  573.     return sanitized
  574.  
  575. def ensure_directory(path: Path) -> Path:
  576.     """
  577.    Ensure a directory exists, creating it if necessary.
  578.  
  579.    Args:
  580.        path: Directory path to ensure
  581.  
  582.    Returns:
  583.        Path to the directory
  584.    """
  585.     path.mkdir(parents=True, exist_ok=True)
  586.     return path
  587.  
  588. def get_unique_filename(directory: Path, base_name: str, extension: str) -> Path:
  589.     """
  590.    Generate a unique filename by appending a counter if needed.
  591.  
  592.    Args:
  593.        directory: Directory path
  594.        base_name: Base filename
  595.        extension: File extension
  596.  
  597.    Returns:
  598.        Path to a unique filename
  599.    """
  600.     # Ensure extension starts with a dot
  601.     if extension and not extension.startswith('.'):
  602.         extension = '.' + extension
  603.  
  604.     # First try the original name
  605.     file_path = directory / f"{base_name}{extension}"
  606.     if not file_path.exists():
  607.         return file_path
  608.  
  609.     # Add counter until we find an unused name
  610.     counter = 1
  611.     while True:
  612.         file_path = directory / f"{base_name}_{counter}{extension}"
  613.         if not file_path.exists():
  614.             return file_path
  615.         counter += 1
  616.  
  617. # ═════════════════════════════════════════════════════════════════════════════
  618. # ═══════════════════════════ CONFIGURATION AND SETUP ═════════════════════════
  619. # ═════════════════════════════════════════════════════════════════════════════
  620.  
  621. class LogLevel(Enum):
  622.     """Log levels with descriptive names."""
  623.     DEBUG = logging.DEBUG
  624.     INFO = logging.INFO
  625.     WARNING = logging.WARNING
  626.     ERROR = logging.ERROR
  627.     CRITICAL = logging.CRITICAL
  628.  
  629. class OutputFormat(Enum):
  630.     """Supported output formats for exporting conversations."""
  631.     TEXT = auto()
  632.     HTML = auto()
  633.     MARKDOWN = auto()
  634.     JSON = auto()
  635.     POSTGRESQL = auto()  # Added support for PostgreSQL export
  636.     ALL = auto()
  637.  
  638. @dataclass
  639. class DatabaseConfig:
  640.     """Configuration for database connections."""
  641.     engine: str = "postgresql"
  642.     host: str = "localhost"
  643.     port: int = 5432
  644.     database: str = "skype_export"
  645.     username: str = "postgres"
  646.     password: str = ""
  647.     schema: str = "public"
  648.     connection_pool_size: int = 5
  649.     connection_max_overflow: int = 10
  650.     connection_timeout: int = 30
  651.     echo_sql: bool = False
  652.  
  653.     @property
  654.     def connection_string(self) -> str:
  655.         """Generate SQLAlchemy connection string."""
  656.         return (f"{self.engine}://{self.username}:{self.password}@"
  657.                 f"{self.host}:{self.port}/{self.database}")
  658.  
  659. @dataclass
  660. class ExportOptions:
  661.     """Configuration options for the export process."""
  662.     output_dir: Path = Path.cwd() / "skype_exports"
  663.     format: OutputFormat = OutputFormat.TEXT
  664.     anonymize: bool = False
  665.     include_timestamps: bool = True
  666.     use_local_time: bool = True
  667.     include_metadata: bool = True
  668.     include_message_ids: bool = False
  669.     parallel: bool = True
  670.     max_workers: int = max(1, os.cpu_count() or 4)
  671.     batch_size: int = 1000
  672.     timezone: Optional[str] = None
  673.     pretty_print: bool = True
  674.     compress_output: bool = False
  675.     filter_pattern: Optional[str] = None
  676.     date_range: Optional[Tuple[datetime.date, datetime.date]] = None
  677.     include_conversation_stats: bool = True
  678.     media_links: bool = False
  679.     strip_html: bool = True
  680.     debug_mode: bool = False
  681.     basic_mode: bool = False  # Added for basic mode
  682.     enable_memory_optimization: bool = True  # Added for memory optimization
  683.     memory_profile: bool = False  # Added for memory profiling
  684.     memory_threshold_percent: int = 75  # Added for memory monitoring
  685.     database_config: DatabaseConfig = field(default_factory=DatabaseConfig)
  686.  
  687. @dataclass
  688. class AppContext:
  689.     """Application context with shared resources and state."""
  690.     options: ExportOptions = field(default_factory=ExportOptions)
  691.     logger: logging.Logger = field(default_factory=lambda: logging.getLogger("original_scripts.testing"))
  692.     console: Any = field(default=None)
  693.     temp_dir: Optional[Path] = None
  694.     start_time: float = field(default_factory=time.time)
  695.     user_id: Optional[str] = None
  696.     user_display_name: Optional[str] = None
  697.     export_date: Optional[str] = None
  698.     export_time: Optional[str] = None
  699.     total_conversations: int = 0
  700.     total_messages: int = 0
  701.     processed_conversations: int = 0
  702.     processed_messages: int = 0
  703.     errors: List[Dict[str, Any]] = field(default_factory=list)
  704.     cancel_requested: bool = False
  705.     memory_monitor: Optional['MemoryMonitor'] = None
  706.  
  707.     def __post_init__(self):
  708.         """Initialize console based on available libraries."""
  709.         if RICH_AVAILABLE and not self.console:
  710.             self.console = Console()
  711.  
  712.         if self.options.enable_memory_optimization:
  713.             try:
  714.                 self.memory_monitor = MemoryMonitor(self)
  715.             except Exception as e:
  716.                 self.logger.warning(f"Failed to initialize memory monitor: {e}")
  717.  
  718.     @property
  719.     def progress_tracker(self):
  720.         """Get a progress tracker based on available libraries."""
  721.         if RICH_AVAILABLE:
  722.             return Progress(
  723.                 TextColumn("[bold blue]{task.description}"),
  724.                 BarColumn(),
  725.                 "[progress.percentage]{task.percentage:>3.0f}%",
  726.                 TimeElapsedColumn(),
  727.                 TimeRemainingColumn(),
  728.                 console=self.console
  729.             )
  730.         elif TQDM_AVAILABLE:
  731.             return tqdm
  732.         else:
  733.             return None  # Simple text-based progress will be used
  734.  
  735.     @contextmanager
  736.     def create_temp_directory(self) -> Generator[Path, None, None]:
  737.         """Create and manage a temporary directory for processing."""
  738.         try:
  739.             temp_dir = Path(tempfile.mkdtemp(prefix="original_scripts.testing_"))
  740.             self.temp_dir = temp_dir
  741.             yield temp_dir
  742.         finally:
  743.             if self.temp_dir and self.temp_dir.exists():
  744.                 shutil.rmtree(self.temp_dir, ignore_errors=True)
  745.                 self.temp_dir = None
  746.  
  747.     def check_memory(self) -> bool:
  748.         """
  749.        Check memory usage and optimize if needed.
  750.  
  751.        Returns:
  752.            True if optimization was performed, False otherwise
  753.        """
  754.         if self.memory_monitor and self.options.enable_memory_optimization:
  755.             return self.memory_monitor.check_memory()
  756.         return False
  757.  
  758.     def get_memory_report(self) -> Optional[Dict[str, Any]]:
  759.         """
  760.        Get a report on memory usage.
  761.  
  762.        Returns:
  763.            Dictionary with memory statistics or None if monitoring disabled
  764.        """
  765.         if self.memory_monitor:
  766.             return self.memory_monitor.get_memory_report()
  767.         return None
  768.  
  769. def setup_logging(level: LogLevel = LogLevel.INFO, log_file: Optional[Path] = None) -> logging.Logger:
  770.     """
  771.    Configure logging with rich formatting if available.
  772.  
  773.    Args:
  774.        level: Logging level to use
  775.        log_file: Optional file path to write logs to
  776.  
  777.    Returns:
  778.        Configured logger instance
  779.    """
  780.     log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
  781.  
  782.     # Create logger
  783.     logger = logging.getLogger("original_scripts.testing")
  784.     logger.setLevel(level.value)
  785.     logger.handlers = []  # Clear any existing handlers
  786.  
  787.     # Console handler
  788.     if RICH_AVAILABLE:
  789.         console_handler = RichHandler(rich_tracebacks=True)
  790.         console_handler.setFormatter(logging.Formatter("%(message)s"))
  791.     else:
  792.         console_handler = logging.StreamHandler()
  793.         console_handler.setFormatter(logging.Formatter(log_format))
  794.  
  795.     console_handler.setLevel(level.value)
  796.     logger.addHandler(console_handler)
  797.  
  798.     # File handler if specified
  799.     if log_file:
  800.         log_file.parent.mkdir(parents=True, exist_ok=True)
  801.         file_handler = logging.FileHandler(log_file, encoding='utf-8')
  802.         file_handler.setFormatter(logging.Formatter(log_format))
  803.         file_handler.setLevel(level.value)
  804.         logger.addHandler(file_handler)
  805.  
  806.     return logger
  807.  
  808. def get_logger(name: str, ctx: AppContext) -> logging.Logger:
  809.     """
  810.    Get a consistently configured logger instance.
  811.  
  812.    Args:
  813.        name: Logger name (will be prefixed with original_scripts.testing)
  814.        ctx: Application context with configuration
  815.  
  816.    Returns:
  817.        Configured logger instance
  818.    """
  819.     logger = ctx.logger.getChild(name)
  820.     return logger
  821.  
  822. # ═════════════════════════════════════════════════════════════════════════════
  823. # ═══════════════════════════ DOMAIN MODELS ════════════════════════════════════
  824. # ═════════════════════════════════════════════════════════════════════════════
  825.  
  826. @dataclass
  827. class SkypeMessage:
  828.     """Represents a single message in a Skype conversation."""
  829.     id: str
  830.     timestamp: datetime.datetime
  831.     sender_id: str
  832.     sender_display_name: str
  833.     content: str
  834.     message_type: str
  835.     edited: bool = False
  836.     original_json: Dict[str, Any] = field(default_factory=dict)
  837.  
  838.     @property
  839.     def formatted_timestamp(self) -> str:
  840.         """Format the timestamp for display."""
  841.         return self.timestamp.strftime("%Y-%m-%d %H:%M:%S")
  842.  
  843.     @property
  844.     def date(self) -> datetime.date:
  845.         """Get the date of the message."""
  846.         return self.timestamp.date()
  847.  
  848.     @property
  849.     def time(self) -> datetime.time:
  850.         """Get the time of the message."""
  851.         return self.timestamp.time()
  852.  
  853. @dataclass
  854. class SkypeConversation:
  855.     """Represents a Skype conversation with metadata and messages."""
  856.     id: str
  857.     display_name: str
  858.     messages: List[SkypeMessage] = field(default_factory=list)
  859.     first_timestamp: Optional[datetime.datetime] = None
  860.     last_timestamp: Optional[datetime.datetime] = None
  861.     participants: Dict[str, str] = field(default_factory=dict)
  862.     original_json: Dict[str, Any] = field(default_factory=dict)
  863.  
  864.     def __post_init__(self):
  865.         """Calculate first and last timestamps after initialization."""
  866.         if self.messages:
  867.             message_timestamps = [m.timestamp for m in self.messages]
  868.             self.first_timestamp = min(message_timestamps)
  869.             self.last_timestamp = max(message_timestamps)
  870.  
  871.     @property
  872.     def message_count(self) -> int:
  873.         """Get the total number of messages in the conversation."""
  874.         return len(self.messages)
  875.  
  876.     @property
  877.     def duration(self) -> Optional[datetime.timedelta]:
  878.         """Get the duration of the conversation."""
  879.         if self.first_timestamp and self.last_timestamp:
  880.             return self.last_timestamp - self.first_timestamp
  881.         return None
  882.  
  883.     @property
  884.     def days_active(self) -> Optional[int]:
  885.         """Get the number of days the conversation was active."""
  886.         if self.duration:
  887.             return self.duration.days
  888.         return None
  889.  
  890.     def get_messages_by_date(self, date: datetime.date) -> List[SkypeMessage]:
  891.         """Get all messages from a specific date."""
  892.         return [msg for msg in self.messages if msg.date == date]
  893.  
  894.     def get_message_dates(self) -> Set[datetime.date]:
  895.         """Get all unique dates when messages were sent."""
  896.         return {msg.date for msg in self.messages}
  897.  
  898.     def add_message(self, message: SkypeMessage) -> None:
  899.         """Add a message to the conversation and update timestamps."""
  900.         self.messages.append(message)
  901.  
  902.         # Update first/last timestamps
  903.         if not self.first_timestamp or message.timestamp < self.first_timestamp:
  904.             self.first_timestamp = message.timestamp
  905.  
  906.         if not self.last_timestamp or message.timestamp > self.last_timestamp:
  907.             self.last_timestamp = message.timestamp
  908.  
  909. @dataclass
  910. class SkypeExport:
  911.     """Represents a complete Skype export with metadata and conversations."""
  912.     user_id: str
  913.     export_date: datetime.datetime
  914.     conversations: Dict[str, SkypeConversation] = field(default_factory=dict)
  915.     original_json: Dict[str, Any] = field(default_factory=dict)
  916.  
  917.     @property
  918.     def total_messages(self) -> int:
  919.         """Get the total number of messages across all conversations."""
  920.         return sum(conv.message_count for conv in self.conversations.values())
  921.  
  922.     @property
  923.     def total_conversations(self) -> int:
  924.         """Get the total number of conversations."""
  925.         return len(self.conversations)
  926.  
  927.     def get_conversation_by_id(self, id: str) -> Optional[SkypeConversation]:
  928.         """Get a conversation by its ID."""
  929.         return self.conversations.get(id)
  930.  
  931.     def add_conversation(self, conversation: SkypeConversation) -> None:
  932.         """Add a conversation to the export."""
  933.         self.conversations[conversation.id] = conversation
  934.  
  935.     def filter_conversations(self, pattern: str) -> List[SkypeConversation]:
  936.         """Filter conversations by display name pattern."""
  937.         return [conv for conv in self.conversations.values()
  938.                 if fnmatch.fnmatch(conv.display_name.lower(), pattern.lower())]
  939.  
  940.     def get_conversation_stats(self) -> Dict[str, Any]:
  941.         """Generate statistics about the conversations."""
  942.         stats = {
  943.             "total_conversations": self.total_conversations,
  944.             "total_messages": self.total_messages,
  945.             "conversation_details": []
  946.         }
  947.  
  948.         for conv_id, conv in self.conversations.items():
  949.             # Skip empty conversations
  950.             if not conv.messages:
  951.                 continue
  952.  
  953.             conv_stats = {
  954.                 "id": conv.id,
  955.                 "display_name": conv.display_name,
  956.                 "message_count": conv.message_count,
  957.                 "days_active": conv.days_active,
  958.                 "first_message": conv.first_timestamp.isoformat() if conv.first_timestamp else None,
  959.                 "last_message": conv.last_timestamp.isoformat() if conv.last_timestamp else None,
  960.                 "participants": len(conv.participants),
  961.                 "participants_names": list(conv.participants.values()),
  962.                 "message_types": {}
  963.             }
  964.  
  965.             # Count message types
  966.             for msg in conv.messages:
  967.                 if msg.message_type not in conv_stats["message_types"]:
  968.                     conv_stats["message_types"][msg.message_type] = 0
  969.                 conv_stats["message_types"][msg.message_type] += 1
  970.  
  971.             stats["conversation_details"].append(conv_stats)
  972.  
  973.         return stats
  974.  
  975. # ═════════════════════════════════════════════════════════════════════════════
  976. # ═══════════════════════════ DATABASE MODELS ═════════════════════════════════
  977. # ═════════════════════════════════════════════════════════════════════════════
  978.  
  979. if SQLALCHEMY_AVAILABLE:
  980.     Base = declarative_base()
  981.  
  982.     class DbConversation(Base):
  983.         """Database model for Skype conversations."""
  984.         __tablename__ = 'conversations'
  985.  
  986.         id = Column(String(255), primary_key=True)
  987.         display_name = Column(String(255), index=True)
  988.         first_timestamp = Column(DateTime, nullable=True, index=True)
  989.         last_timestamp = Column(DateTime, nullable=True, index=True)
  990.         message_count = Column(Integer, default=0)
  991.         days_active = Column(Integer, nullable=True)
  992.         export_date = Column(DateTime, nullable=False)
  993.         metadata_json = Column(Text, nullable=True)
  994.  
  995.         # Relationships
  996.         messages = relationship("DbMessage", back_populates="conversation",
  997.                                 cascade="all, delete-orphan")
  998.         participants = relationship("DbParticipant", back_populates="conversation",
  999.                                    cascade="all, delete-orphan")
  1000.  
  1001.     class DbMessage(Base):
  1002.         """Database model for Skype messages."""
  1003.         __tablename__ = 'messages'
  1004.  
  1005.         id = Column(String(255), primary_key=True)
  1006.         conversation_id = Column(String(255), ForeignKey('conversations.id'), index=True)
  1007.         timestamp = Column(DateTime, nullable=False, index=True)
  1008.         sender_id = Column(String(255), index=True)
  1009.         sender_display_name = Column(String(255))
  1010.         content = Column(Text, nullable=True)
  1011.         message_type = Column(String(50), index=True)
  1012.         edited = Column(Boolean, default=False)
  1013.         metadata_json = Column(Text, nullable=True)
  1014.  
  1015.         # Relationships
  1016.         conversation = relationship("DbConversation", back_populates="messages")
  1017.  
  1018.     class DbParticipant(Base):
  1019.         """Database model for conversation participants."""
  1020.         __tablename__ = 'participants'
  1021.  
  1022.         id = Column(Integer, primary_key=True, autoincrement=True)
  1023.         conversation_id = Column(String(255), ForeignKey('conversations.id'), index=True)
  1024.         user_id = Column(String(255), index=True)
  1025.         display_name = Column(String(255))
  1026.  
  1027.         # Relationships
  1028.         conversation = relationship("DbConversation", back_populates="participants")
  1029.  
  1030.         # Composite unique constraint
  1031.         __table_args__ = (
  1032.             sqlalchemy.UniqueConstraint('conversation_id', 'user_id', name='uq_participant'),
  1033.         )
  1034.  
  1035.     class DbExportMeta(Base):
  1036.         """Database model for export metadata."""
  1037.         __tablename__ = 'export_metadata'
  1038.  
  1039.         id = Column(Integer, primary_key=True, autoincrement=True)
  1040.         export_date = Column(DateTime, nullable=False, index=True)
  1041.         user_id = Column(String(255), index=True)
  1042.         user_display_name = Column(String(255))
  1043.         format = Column(String(50))
  1044.         total_conversations = Column(Integer, default=0)
  1045.         total_messages = Column(Integer, default=0)
  1046.         duration_seconds = Column(Integer, default=0)
  1047.         metadata_json = Column(Text, nullable=True)
  1048.  
  1049. class DatabaseManager:
  1050.     """Manage database connections and operations."""
  1051.  
  1052.     def __init__(self, ctx: AppContext):
  1053.         """
  1054.        Initialize the database manager.
  1055.  
  1056.        Args:
  1057.            ctx: Application context
  1058.        """
  1059.         self.ctx = ctx
  1060.         self.logger = get_logger('database', ctx)
  1061.         self.engine = None
  1062.         self.session_factory = None
  1063.  
  1064.         # Check required dependencies
  1065.         if not SQLALCHEMY_AVAILABLE:
  1066.             self.logger.error("SQLAlchemy is required for database operations but not installed")
  1067.             raise DatabaseError("SQLAlchemy is required but not installed")
  1068.  
  1069.         if not PSYCOPG2_AVAILABLE and ctx.options.format == OutputFormat.POSTGRESQL:
  1070.             self.logger.error("psycopg2 is required for PostgreSQL export but not installed")
  1071.             raise DatabaseError("psycopg2 is required but not installed")
  1072.  
  1073.     def initialize(self) -> None:
  1074.         """Initialize database connection and create schema if needed."""
  1075.         config = self.ctx.options.database_config
  1076.  
  1077.         try:
  1078.             # Create engine with connection pooling
  1079.             self.engine = create_engine(
  1080.                 config.connection_string,
  1081.                 pool_size=config.connection_pool_size,
  1082.                 max_overflow=config.connection_max_overflow,
  1083.                 pool_timeout=config.connection_timeout,
  1084.                 echo=config.echo_sql
  1085.             )
  1086.  
  1087.             # Create session factory
  1088.             self.session_factory = sessionmaker(bind=self.engine)
  1089.  
  1090.             # Create tables if they don't exist
  1091.             Base.metadata.create_all(self.engine)
  1092.  
  1093.             self.logger.info(f"Connected to database: {config.engine}://{config.host}:{config.port}/{config.database}")
  1094.  
  1095.         except Exception as e:
  1096.             self.logger.error(f"Database initialization error: {e}")
  1097.             raise DatabaseError(f"Failed to initialize database: {e}") from e
  1098.  
  1099.     @contextmanager
  1100.     def session(self) -> Generator[Session, None, None]:
  1101.         """
  1102.        Get a database session with automatic cleanup.
  1103.  
  1104.        Yields:
  1105.            SQLAlchemy session
  1106.        """
  1107.         if not self.session_factory:
  1108.             self.initialize()
  1109.  
  1110.         session = self.session_factory()
  1111.         try:
  1112.             yield session
  1113.             session.commit()
  1114.         except Exception as e:
  1115.             session.rollback()
  1116.             self.logger.error(f"Database session error: {e}")
  1117.             raise
  1118.         finally:
  1119.             session.close()
  1120.  
  1121.     def count_conversations(self) -> int:
  1122.         """
  1123.        Count conversations in the database.
  1124.  
  1125.        Returns:
  1126.            Number of conversations
  1127.        """
  1128.         with self.session() as session:
  1129.             return session.query(DbConversation).count()
  1130.  
  1131.     def count_messages(self) -> int:
  1132.         """
  1133.        Count messages in the database.
  1134.  
  1135.        Returns:
  1136.            Number of messages
  1137.        """
  1138.         with self.session() as session:
  1139.             return session.query(DbMessage).count()
  1140.  
  1141.     def create_export_metadata(self, skype_export: SkypeExport, duration_seconds: int) -> None:
  1142.         """
  1143.        Create export metadata record.
  1144.  
  1145.        Args:
  1146.            skype_export: SkypeExport object
  1147.            duration_seconds: Export duration in seconds
  1148.        """
  1149.         with self.session() as session:
  1150.             meta = DbExportMeta(
  1151.                 export_date=skype_export.export_date,
  1152.                 user_id=skype_export.user_id,
  1153.                 user_display_name=self.ctx.user_display_name,
  1154.                 format=self.ctx.options.format.name,
  1155.                 total_conversations=skype_export.total_conversations,
  1156.                 total_messages=skype_export.total_messages,
  1157.                 duration_seconds=duration_seconds,
  1158.                 metadata_json=json.dumps({
  1159.                     "export_date": self.ctx.export_date,
  1160.                     "export_time": self.ctx.export_time,
  1161.                     "options": {k: str(v) for k, v in dataclasses.asdict(self.ctx.options).items()
  1162.                                if k != 'database_config'}
  1163.                 })
  1164.             )
  1165.             session.add(meta)
  1166.  
  1167. # ═════════════════════════════════════════════════════════════════════════════
  1168. # ═══════════════════════════ CORE PROCESSORS ═════════════════════════════════
  1169. # ═════════════════════════════════════════════════════════════════════════════
  1170.  
  1171. class FileReader(ABC):
  1172.     """Abstract base class for reading different types of input files."""
  1173.  
  1174.     @abstractmethod
  1175.     async def read(self, file_path: Path, ctx: AppContext) -> Dict[str, Any]:
  1176.         """
  1177.        Read and parse input file.
  1178.  
  1179.        Args:
  1180.            file_path: Path to the input file
  1181.            ctx: Application context
  1182.  
  1183.        Returns:
  1184.            Parsed content as dictionary
  1185.        """
  1186.         pass
  1187.  
  1188.     @classmethod
  1189.     def create_reader(cls, file_path: Path) -> 'FileReader':
  1190.         """
  1191.        Factory method to create appropriate reader based on file extension.
  1192.  
  1193.        Args:
  1194.            file_path: Path to input file
  1195.  
  1196.        Returns:
  1197.            Appropriate FileReader instance
  1198.        """
  1199.         suffix = file_path.suffix.lower()
  1200.         if suffix == '.json':
  1201.             return JsonFileReader()
  1202.         elif suffix == '.tar' or suffix == '.gz' or suffix == '.tgz':
  1203.             return TarFileReader()
  1204.         elif suffix == '.zip':
  1205.             return ZipFileReader()
  1206.         else:
  1207.             raise ValueError(f"Unsupported file type: {suffix}")
  1208.  
  1209. class JsonFileReader(FileReader):
  1210.     """Reader for JSON files."""
  1211.  
  1212.     async def read(self, file_path: Path, ctx: AppContext) -> Dict[str, Any]:
  1213.         """Read a regular JSON file."""
  1214.         ctx.logger.debug(f"Reading JSON file: {file_path}")
  1215.         loop = asyncio.get_event_loop()
  1216.  
  1217.         # Check file size - if large, use streaming parser
  1218.         file_size = file_path.stat().st_size
  1219.         large_file_threshold = 100 * 1024 * 1024  # 100 MB
  1220.  
  1221.         if file_size > large_file_threshold:
  1222.             ctx.logger.info(f"Large JSON file detected ({file_size/1024/1024:.2f} MB). Using streaming parser.")
  1223.             try:
  1224.                 # Use ijson for streaming if available
  1225.                 import_result = importlib.util.find_spec("ijson")
  1226.                 if import_result is not None:
  1227.                     import ijson
  1228.                     return await loop.run_in_executor(None, self._read_with_ijson, file_path)
  1229.                 else:
  1230.                     ctx.logger.warning("ijson package not available for streaming. Using standard JSON parser.")
  1231.             except ImportError:
  1232.                 ctx.logger.warning("ijson import failed. Using standard JSON parser.")
  1233.  
  1234.         # Default JSON loading for normal-sized files or if ijson fails
  1235.         try:
  1236.             return await loop.run_in_executor(None, self._read_standard_json, file_path)
  1237.         except json.JSONDecodeError as e:
  1238.             raise ParseError(f"Failed to parse JSON file {file_path}: {e}")
  1239.         except Exception as e:
  1240.             raise FileReadError(f"Failed to read JSON file {file_path}: {e}")
  1241.  
  1242.     def _read_standard_json(self, file_path: Path) -> Dict[str, Any]:
  1243.         """Read a JSON file using the standard json module."""
  1244.         with open(file_path, 'r', encoding='utf-8') as f:
  1245.             return json.load(f)
  1246.  
  1247.     def _read_with_ijson(self, file_path: Path) -> Dict[str, Any]:
  1248.         """Stream parse a large JSON file using ijson."""
  1249.         import ijson
  1250.  
  1251.         result = {}
  1252.         with open(file_path, 'rb') as f:
  1253.             # Read top-level elements
  1254.             for prefix, event, value in ijson.parse(f):
  1255.                 if prefix == '' and event == 'map_key':
  1256.                     current_key = value
  1257.                 elif prefix == '' and event in ('string', 'number', 'boolean'):
  1258.                     result[current_key] = value
  1259.  
  1260.             # Reopen file and stream the conversations array specifically
  1261.             f.seek(0)
  1262.             conversations = []
  1263.             for conversation in ijson.items(f, 'conversations.item'):
  1264.                 conversations.append(conversation)
  1265.  
  1266.             result['conversations'] = conversations
  1267.  
  1268.         return result
  1269.  
  1270. class TarFileReader(FileReader):
  1271.     """Reader for TAR file archives."""
  1272.  
  1273.     async def read(self, file_path: Path, ctx: AppContext) -> Dict[str, Any]:
  1274.         """Read and extract a TAR archive."""
  1275.         ctx.logger.debug(f"Reading TAR file: {file_path}")
  1276.         loop = asyncio.get_event_loop()
  1277.  
  1278.         try:
  1279.             return await loop.run_in_executor(None, self._process_tar, file_path, ctx)
  1280.         except Exception as e:
  1281.             ctx.logger.error(f"Error reading TAR file {file_path}: {e}")
  1282.             raise FileReadError(f"Failed to read TAR file: {e}")
  1283.  
  1284.     def _process_tar(self, file_path: Path, ctx: AppContext) -> Dict[str, Any]:
  1285.         """Process TAR file contents in a separate thread."""
  1286.         with tarfile.open(file_path, 'r:*') as tar:
  1287.             # Extract all files to temporary directory
  1288.             temp_dir = Path(tempfile.mkdtemp(prefix="original_scripts.testing_"))
  1289.             try:
  1290.                 tar.extractall(path=temp_dir)
  1291.                 ctx.logger.debug(f"Extracted TAR contents to {temp_dir}")
  1292.  
  1293.                 # Find JSON files
  1294.                 json_files = list(temp_dir.glob('**/*.json'))
  1295.  
  1296.                 # Check if we found any JSON files
  1297.                 if not json_files:
  1298.                     raise FileReadError(f"No JSON files found in TAR archive: {file_path}")
  1299.  
  1300.                 # Handle multiple JSON files
  1301.                 if len(json_files) > 1:
  1302.                     ctx.logger.warning(f"Multiple JSON files found in archive: {[f.name for f in json_files]}")
  1303.  
  1304.                     # In interactive/basic mode, prompt the user to select
  1305.                     if hasattr(ctx, 'ui') and ctx.options.basic_mode:
  1306.                         print("\nMultiple JSON files found in the archive:")
  1307.                         for i, f in enumerate(json_files, 1):
  1308.                             print(f"  {i}: {f.name} ({f.stat().st_size / 1024 / 1024:.2f} MB)")
  1309.  
  1310.                         try:
  1311.                             selection = input("\nEnter number to select (1-{}) or press Enter for first file: ".format(len(json_files)))
  1312.                             if selection.strip():
  1313.                                 index = int(selection.strip()) - 1
  1314.                                 if 0 <= index < len(json_files):
  1315.                                     json_file = json_files[index]
  1316.                                     ctx.logger.info(f"Selected file: {json_file.name}")
  1317.                                 else:
  1318.                                     ctx.logger.warning(f"Invalid selection, using first file: {json_files[0].name}")
  1319.                                     json_file = json_files[0]
  1320.                             else:
  1321.                                 ctx.logger.info(f"No selection made, using first file: {json_files[0].name}")
  1322.                                 json_file = json_files[0]
  1323.                         except (ValueError, IndexError):
  1324.                             ctx.logger.warning(f"Invalid input, using first file: {json_files[0].name}")
  1325.                             json_file = json_files[0]
  1326.                     else:
  1327.                         # In non-interactive mode, use largest JSON file (likely the main export)
  1328.                         json_file = max(json_files, key=lambda f: f.stat().st_size)
  1329.                         ctx.logger.info(f"Selected largest JSON file: {json_file.name} ({json_file.stat().st_size / 1024 / 1024:.2f} MB)")
  1330.                 else:
  1331.                     json_file = json_files[0]
  1332.                     ctx.logger.debug(f"Found JSON file: {json_file}")
  1333.  
  1334.                 # Read the selected JSON file
  1335.                 with open(json_file, 'r', encoding='utf-8') as f:
  1336.                     data = json.load(f)
  1337.  
  1338.                 return data
  1339.             finally:
  1340.                 # Clean up temporary directory
  1341.                 shutil.rmtree(temp_dir)
  1342.  
  1343. class ZipFileReader(FileReader):
  1344.     """Reader for ZIP file archives."""
  1345.  
  1346.     async def read(self, file_path: Path, ctx: AppContext) -> Dict[str, Any]:
  1347.         """Read and extract a ZIP archive."""
  1348.         ctx.logger.debug(f"Reading ZIP file: {file_path}")
  1349.         loop = asyncio.get_event_loop()
  1350.  
  1351.         try:
  1352.             return await loop.run_in_executor(None, self._process_zip, file_path, ctx)
  1353.         except Exception as e:
  1354.             ctx.logger.error(f"Error reading ZIP file {file_path}: {e}")
  1355.             raise FileReadError(f"Failed to read ZIP file: {e}")
  1356.  
  1357.     def _process_zip(self, file_path: Path, ctx: AppContext) -> Dict[str, Any]:
  1358.         """Process ZIP file contents in a separate thread."""
  1359.         with zipfile.ZipFile(file_path, 'r') as zip_file:
  1360.             # Extract all files to temporary directory
  1361.             temp_dir = Path(tempfile.mkdtemp(prefix="original_scripts.testing_"))
  1362.             try:
  1363.                 zip_file.extractall(path=temp_dir)
  1364.                 ctx.logger.debug(f"Extracted ZIP contents to {temp_dir}")
  1365.  
  1366.                 # Find JSON files
  1367.                 json_files = list(temp_dir.glob('**/*.json'))
  1368.  
  1369.                 # Check if we found any JSON files
  1370.                 if not json_files:
  1371.                     raise FileReadError(f"No JSON files found in ZIP archive: {file_path}")
  1372.  
  1373.                 # Handle multiple JSON files
  1374.                 if len(json_files) > 1:
  1375.                     ctx.logger.warning(f"Multiple JSON files found in archive: {[f.name for f in json_files]}")
  1376.  
  1377.                     # In interactive/basic mode, prompt the user to select
  1378.                     if hasattr(ctx, 'ui') and ctx.options.basic_mode:
  1379.                         print("\nMultiple JSON files found in the archive:")
  1380.                         for i, f in enumerate(json_files, 1):
  1381.                             print(f"  {i}: {f.name} ({f.stat().st_size / 1024 / 1024:.2f} MB)")
  1382.  
  1383.                         try:
  1384.                             selection = input("\nEnter number to select (1-{}) or press Enter for first file: ".format(len(json_files)))
  1385.                             if selection.strip():
  1386.                                 index = int(selection.strip()) - 1
  1387.                                 if 0 <= index < len(json_files):
  1388.                                     json_file = json_files[index]
  1389.                                     ctx.logger.info(f"Selected file: {json_file.name}")
  1390.                                 else:
  1391.                                     ctx.logger.warning(f"Invalid selection, using first file: {json_files[0].name}")
  1392.                                     json_file = json_files[0]
  1393.                             else:
  1394.                                 ctx.logger.info(f"No selection made, using first file: {json_files[0].name}")
  1395.                                 json_file = json_files[0]
  1396.                         except (ValueError, IndexError):
  1397.                             ctx.logger.warning(f"Invalid input, using first file: {json_files[0].name}")
  1398.                             json_file = json_files[0]
  1399.                     else:
  1400.                         # In non-interactive mode, use largest JSON file (likely the main export)
  1401.                         json_file = max(json_files, key=lambda f: f.stat().st_size)
  1402.                         ctx.logger.info(f"Selected largest JSON file: {json_file.name} ({json_file.stat().st_size / 1024 / 1024:.2f} MB)")
  1403.                 else:
  1404.                     json_file = json_files[0]
  1405.                     ctx.logger.debug(f"Found JSON file: {json_file}")
  1406.  
  1407.                 # Read the selected JSON file
  1408.                 with open(json_file, 'r', encoding='utf-8') as f:
  1409.                     data = json.load(f)
  1410.  
  1411.                 return data
  1412.             finally:
  1413.                 # Clean up temporary directory
  1414.                 shutil.rmtree(temp_dir)
  1415.  
  1416. class SkypeExportParser:
  1417.     """Parser for Skype export data."""
  1418.  
  1419.     def __init__(self, ctx: AppContext):
  1420.         """
  1421.        Initialize the parser.
  1422.  
  1423.        Args:
  1424.            ctx: Application context
  1425.        """
  1426.         self.ctx = ctx
  1427.         self.logger = get_logger('parser', ctx)
  1428.  
  1429.     async def parse(self, data: Dict[str, Any]) -> SkypeExport:
  1430.         """
  1431.        Parse raw Skype export data into structured domain objects.
  1432.  
  1433.        Args:
  1434.            data: Raw JSON data from Skype export
  1435.  
  1436.        Returns:
  1437.            Structured SkypeExport object
  1438.        """
  1439.         self.logger.info("Parsing Skype export data...")
  1440.  
  1441.         # Extract basic metadata
  1442.         user_id, export_date = self._extract_metadata(data)
  1443.  
  1444.         # Create export object
  1445.         skype_export = SkypeExport(
  1446.             user_id=user_id,
  1447.             export_date=export_date,
  1448.             original_json=data
  1449.         )
  1450.  
  1451.         # Build ID to display name mapping
  1452.         id_to_display_name = self._build_display_name_map(data)
  1453.  
  1454.         # Process all conversations
  1455.         conversations = data.get('conversations', [])
  1456.         self.ctx.total_conversations = len(conversations)
  1457.  
  1458.         # Optimize batch size if needed
  1459.         self._optimize_batch_size(conversations)
  1460.  
  1461.         # Parse all conversations with progress tracking
  1462.         await self._parse_conversations_with_progress(conversations, id_to_display_name, skype_export)
  1463.  
  1464.         self.logger.info(f"Parsed {skype_export.total_conversations} conversations with {skype_export.total_messages} messages")
  1465.         return skype_export
  1466.  
  1467.     async def _parse_conversations_with_progress(self, conversations: List[Dict[str, Any]],
  1468.                                                 id_to_display_name: Dict[str, str],
  1469.                                                 skype_export: SkypeExport) -> None:
  1470.         """
  1471.        Parse conversations with progress tracking.
  1472.  
  1473.        Args:
  1474.            conversations: List of conversation data
  1475.            id_to_display_name: Mapping of user IDs to display names
  1476.            skype_export: SkypeExport object
  1477.        """
  1478.         progress_tracker = self.ctx.progress_tracker
  1479.         if RICH_AVAILABLE and progress_tracker and not self.ctx.options.basic_mode:
  1480.             with progress_tracker as progress:
  1481.                 task = progress.add_task("[cyan]Parsing conversations...", total=len(conversations))
  1482.                 for i, conv_data in enumerate(conversations):
  1483.                     conversation = await self._parse_conversation(conv_data, id_to_display_name)
  1484.                     skype_export.add_conversation(conversation)
  1485.                     progress.update(task, advance=1)
  1486.  
  1487.                     # Periodically check memory usage
  1488.                     if i % 5 == 0:
  1489.                         self.ctx.check_memory()
  1490.  
  1491.                     # Check for cancellation
  1492.                     if self.ctx.cancel_requested:
  1493.                         self.logger.info("Parsing cancelled by user")
  1494.                         break
  1495.         else:
  1496.             # Simple parsing without rich progress bar
  1497.             for i, conv_data in enumerate(conversations):
  1498.                 if i % 10 == 0:
  1499.                     self.logger.info(f"Parsing conversation {i+1}/{len(conversations)}")
  1500.  
  1501.                 conversation = await self._parse_conversation(conv_data, id_to_display_name)
  1502.                 skype_export.add_conversation(conversation)
  1503.  
  1504.                 # Periodically check memory usage
  1505.                 if i % 5 == 0:
  1506.                     self.ctx.check_memory()
  1507.  
  1508.                 # Check for cancellation
  1509.                 if self.ctx.cancel_requested:
  1510.                     self.logger.info("Parsing cancelled by user")
  1511.                     break
  1512.  
  1513.     async def _parse_conversation(self, conv_data: Dict[str, Any],
  1514.                                 id_to_display_name: Dict[str, str]) -> SkypeConversation:
  1515.         """
  1516.        Parse a single conversation from raw data.
  1517.  
  1518.        Args:
  1519.            conv_data: Raw conversation data
  1520.            id_to_display_name: Mapping of user IDs to display names
  1521.  
  1522.        Returns:
  1523.            Structured SkypeConversation object
  1524.        """
  1525.         conv_id = conv_data.get('id', '')
  1526.         display_name = conv_data.get('displayName', '')
  1527.  
  1528.         # Handle missing display name
  1529.         if not display_name:
  1530.             # Try to extract from ID (typically format is "8:username")
  1531.             try:
  1532.                 display_name = conv_id.split(':')[1]
  1533.             except (IndexError, AttributeError):
  1534.                 display_name = f"Conversation {conv_id}"
  1535.  
  1536.         # Update ID to display name mapping
  1537.         id_to_display_name[conv_id] = display_name
  1538.  
  1539.         # Create conversation object
  1540.         conversation = SkypeConversation(
  1541.             id=conv_id,
  1542.             display_name=display_name,
  1543.             original_json=conv_data
  1544.         )
  1545.  
  1546.         # Parse messages in parallel if enabled
  1547.         message_list = conv_data.get('MessageList', [])
  1548.  
  1549.         if self.ctx.options.parallel and len(message_list) > 100 and not self.ctx.options.basic_mode:
  1550.             # Process messages in batches for large conversations
  1551.             loop = asyncio.get_event_loop()
  1552.  
  1553.             # Use dynamic batch size based on memory constraints
  1554.             batch_size = self.ctx.options.batch_size
  1555.             batches = [message_list[i:i+batch_size] for i in range(0, len(message_list), batch_size)]
  1556.  
  1557.             self.logger.debug(f"Processing {len(message_list)} messages in {len(batches)} batches "
  1558.                            f"(batch size: {batch_size})")
  1559.  
  1560.             with concurrent.futures.ThreadPoolExecutor(
  1561.                 max_workers=self.ctx.options.max_workers
  1562.             ) as executor:
  1563.                 # Process each batch in parallel
  1564.                 tasks = []
  1565.                 for batch in batches:
  1566.                     task = loop.run_in_executor(
  1567.                         executor,
  1568.                         self._process_message_batch,
  1569.                         batch,
  1570.                         id_to_display_name,
  1571.                         conversation
  1572.                     )
  1573.                     tasks.append(task)
  1574.  
  1575.                 # Wait for all batches to complete
  1576.                 completed_count = 0
  1577.                 for completed_task in await asyncio.gather(*tasks):
  1578.                     completed_count += 1
  1579.  
  1580.                     # Periodically check memory usage
  1581.                     if completed_count % 5 == 0:
  1582.                         self.ctx.check_memory()
  1583.         else:
  1584.             # Process messages sequentially for smaller conversations
  1585.             for msg_data in message_list:
  1586.                 message = self._parse_message(msg_data, id_to_display_name)
  1587.                 conversation.add_message(message)
  1588.  
  1589.         # Update participant mapping
  1590.         for message in conversation.messages:
  1591.             if message.sender_id not in conversation.participants:
  1592.                 conversation.participants[message.sender_id] = message.sender_display_name
  1593.  
  1594.         # Sort messages by timestamp
  1595.         conversation.messages.sort(key=lambda msg: msg.timestamp)
  1596.  
  1597.         return conversation
  1598.  
  1599.     def _process_message_batch(self, batch: List[Dict[str, Any]],
  1600.                               id_to_display_name: Dict[str, str],
  1601.                               conversation: SkypeConversation) -> List[SkypeMessage]:
  1602.         """
  1603.        Process a batch of messages in a separate thread.
  1604.  
  1605.        Args:
  1606.            batch: List of raw message data
  1607.            id_to_display_name: Mapping of user IDs to display names
  1608.            conversation: Conversation to add messages to
  1609.  
  1610.        Returns:
  1611.            List of parsed messages
  1612.        """
  1613.         messages = []
  1614.         for msg_data in batch:
  1615.             message = self._parse_message(msg_data, id_to_display_name)
  1616.             conversation.add_message(message)
  1617.             messages.append(message)
  1618.  
  1619.         # Trigger garbage collection for very large batches to manage memory
  1620.         if len(batch) > 5000 and self.ctx.options.enable_memory_optimization:
  1621.             gc.collect()
  1622.  
  1623.         return messages
  1624.  
  1625.     def _parse_message(self, msg_data: Dict[str, Any],
  1626.                        id_to_display_name: Dict[str, str]) -> SkypeMessage:
  1627.         """
  1628.        Parse a single message from raw data.
  1629.  
  1630.        Args:
  1631.            msg_data: Raw message data
  1632.            id_to_display_name: Mapping of user IDs to display names
  1633.  
  1634.        Returns:
  1635.            Structured SkypeMessage object
  1636.        """
  1637.         # Extract basic message data
  1638.         msg_id = msg_data.get('id', str(uuid.uuid4()))
  1639.  
  1640.         # Parse timestamp
  1641.         timestamp_str = msg_data.get('originalarrivaltime', '')
  1642.         try:
  1643.             timestamp = datetime.datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
  1644.         except (ValueError, TypeError):
  1645.             self.logger.warning(f"Invalid timestamp format: {timestamp_str}")
  1646.             timestamp = datetime.datetime.now(datetime.timezone.utc)
  1647.  
  1648.         # Convert to local time if requested
  1649.         if self.ctx.options.use_local_time:
  1650.             timestamp = timestamp.astimezone()
  1651.  
  1652.         # Extract sender info
  1653.         sender_id = msg_data.get('from', '')
  1654.         sender_display_name = id_to_display_name.get(sender_id, sender_id)
  1655.  
  1656.         # Extract content and type
  1657.         content = msg_data.get('content', '')
  1658.         msg_type = msg_data.get('messagetype', 'unknown')
  1659.  
  1660.         # Special handling for non-text message types
  1661.         if msg_type != 'RichText':
  1662.             content = self._get_message_type_description(msg_type)
  1663.  
  1664.         # Check for edited messages
  1665.         edited = bool(re.search(r'<e_m.*>', content))
  1666.  
  1667.         # Create message object
  1668.         message = SkypeMessage(
  1669.             id=msg_id,
  1670.             timestamp=timestamp,
  1671.             sender_id=sender_id,
  1672.             sender_display_name=sender_display_name,
  1673.             content=content,
  1674.             message_type=msg_type,
  1675.             edited=edited,
  1676.             original_json=msg_data
  1677.         )
  1678.  
  1679.         return message
  1680.  
  1681.     def _get_message_type_description(self, msg_type: str) -> str:
  1682.         """
  1683.        Convert Skype message type to human-readable description.
  1684.  
  1685.        Args:
  1686.            msg_type: Skype message type
  1687.  
  1688.        Returns:
  1689.            Human-readable description
  1690.        """
  1691.         type_descriptions = {
  1692.             'Event/Call': '***A call started/ended***',
  1693.             'Poll': '***Created a poll***',
  1694.             'RichText/Media_Album': '***Sent an album of images***',
  1695.             'RichText/Media_AudioMsg': '***Sent a voice message***',
  1696.             'RichText/Media_CallRecording': '***Sent a call recording***',
  1697.             'RichText/Media_Card': '***Sent a media card***',
  1698.             'RichText/Media_FlikMsg': '***Sent a moji***',
  1699.             'RichText/Media_GenericFile': '***Sent a file***',
  1700.             'RichText/Media_Video': '***Sent a video message***',
  1701.             'RichText/UriObject': '***Sent a photo***',
  1702.             'RichText/ScheduledCallInvite': '***Scheduled a call***',
  1703.             'RichText/Location': '***Sent a location***',
  1704.             'RichText/Contacts': '***Sent a contact***',
  1705.         }
  1706.  
  1707.         return type_descriptions.get(msg_type, f'***Sent a {msg_type}***')
  1708.  
  1709.     def _optimize_batch_size(self, conversations: List[Dict[str, Any]]) -> None:
  1710.         """
  1711.        Calculate optimal batch size based on data volume.
  1712.  
  1713.        Args:
  1714.            conversations: List of raw conversation data
  1715.        """
  1716.         # Only optimize if memory monitoring is enabled
  1717.         if not (self.ctx.options.enable_memory_optimization and self.ctx.memory_monitor):
  1718.             return
  1719.  
  1720.         # Adjust batch size based on number of conversations and available memory
  1721.         conversation_count = len(conversations)
  1722.         estimated_total_messages = 0
  1723.  
  1724.         # Sample a few conversations to estimate total message count
  1725.         sample_size = min(10, conversation_count)
  1726.         for i in range(sample_size):
  1727.             conv_data = conversations[i]
  1728.             estimated_total_messages += len(conv_data.get('MessageList', []))
  1729.  
  1730.         if sample_size > 0:
  1731.             avg_messages = estimated_total_messages / sample_size
  1732.             estimated_total = avg_messages * conversation_count
  1733.  
  1734.             # Adjust batch size if total is large
  1735.             if estimated_total > 100000:
  1736.                 optimal_batch_size = self.ctx.memory_monitor.calculate_optimal_batch_size(
  1737.                     int(estimated_total)
  1738.                 )
  1739.                 self.logger.info(f"Adjusting batch size to {optimal_batch_size} "
  1740.                              f"based on estimated {estimated_total:.0f} messages")
  1741.                 self.ctx.options.batch_size = optimal_batch_size
  1742.  
  1743.     def _extract_metadata(self, data: Dict[str, Any]) -> Tuple[str, datetime.datetime]:
  1744.         """
  1745.        Extract user ID and export date from the export data.
  1746.  
  1747.        Args:
  1748.            data: Raw JSON data from Skype export
  1749.  
  1750.        Returns:
  1751.            Tuple of (user_id, export_date)
  1752.        """
  1753.         # Default values
  1754.         user_id = "unknown"
  1755.         export_date = datetime.datetime.now()
  1756.  
  1757.         # Try to extract user ID
  1758.         if "userId" in data:
  1759.             user_id = data["userId"]
  1760.         elif "creator" in data:
  1761.             user_id = data["creator"]
  1762.         elif "exportedBy" in data:
  1763.             user_id = data["exportedBy"]
  1764.  
  1765.         # Try to extract export date
  1766.         if "exportDate" in data:
  1767.             try:
  1768.                 if isinstance(data["exportDate"], str):
  1769.                     # Try ISO format first
  1770.                     try:
  1771.                         export_date = datetime.datetime.fromisoformat(data["exportDate"])
  1772.                     except ValueError:
  1773.                         # Try various date formats
  1774.                         for fmt in ["%Y-%m-%d", "%Y/%m/%d", "%d-%m-%Y", "%d/%m/%Y"]:
  1775.                             try:
  1776.                                 export_date = datetime.datetime.strptime(data["exportDate"], fmt)
  1777.                                 break
  1778.                             except ValueError:
  1779.                                 continue
  1780.                 elif isinstance(data["exportDate"], int):
  1781.                     # Assume Unix timestamp (seconds since epoch)
  1782.                     export_date = datetime.datetime.fromtimestamp(data["exportDate"])
  1783.             except Exception as e:
  1784.                 self.logger.warning(f"Failed to parse export date: {e}")
  1785.  
  1786.         # If we still don't have a user ID, try to extract from file metadata
  1787.         if user_id == "unknown" and "personaList" in data:
  1788.             for persona in data["personaList"]:
  1789.                 if "cid" in persona:
  1790.                     user_id = persona["cid"]
  1791.                     break
  1792.  
  1793.         return user_id, export_date
  1794.  
  1795.     def _build_display_name_map(self, data: Dict[str, Any]) -> Dict[str, str]:
  1796.         """
  1797.        Build a mapping from user IDs to display names.
  1798.  
  1799.        Args:
  1800.            data: Raw JSON data from Skype export
  1801.  
  1802.        Returns:
  1803.            Dictionary mapping user IDs to display names
  1804.        """
  1805.         id_to_display_name = {}
  1806.  
  1807.         # Extract from personas list if available
  1808.         if "personaList" in data:
  1809.             for persona in data["personaList"]:
  1810.                 if "cid" in persona and "displayName" in persona:
  1811.                     id_to_display_name[persona["cid"]] = persona["displayName"]
  1812.  
  1813.         # Extract from conversations/chats if available
  1814.         if "conversations" in data:
  1815.             for conv in data["conversations"]:
  1816.                 if "id" in conv and "displayName" in conv:
  1817.                     id_to_display_name[conv["id"]] = conv["displayName"]
  1818.  
  1819.         if "chats" in data:
  1820.             for chat in data["chats"]:
  1821.                 if "id" in chat and "threadProperties" in chat and "topic" in chat["threadProperties"]:
  1822.                     id_to_display_name[chat["id"]] = chat["threadProperties"]["topic"]
  1823.                 elif "id" in chat and "displayName" in chat:
  1824.                     id_to_display_name[chat["id"]] = chat["displayName"]
  1825.  
  1826.         return id_to_display_name
  1827.  
  1828. class ContentFormatter:
  1829.     """Base class for content formatting with rich formatting support."""
  1830.  
  1831.     def __init__(self, ctx: AppContext):
  1832.         """
  1833.        Initialize the formatter.
  1834.  
  1835.        Args:
  1836.            ctx: Application context
  1837.        """
  1838.         self.ctx = ctx
  1839.         self.logger = get_logger('formatter', ctx)
  1840.  
  1841.     def format_timestamp(self, timestamp: datetime.datetime) -> str:
  1842.         """Format timestamp for display."""
  1843.         return timestamp.strftime("%Y-%m-%d %H:%M:%S")
  1844.  
  1845.     def format_message(self, message: SkypeMessage) -> str:
  1846.         """
  1847.        Format a message for display.
  1848.  
  1849.        Args:
  1850.            message: Message to format
  1851.  
  1852.        Returns:
  1853.            Formatted message string
  1854.        """
  1855.         timestamp = ""
  1856.         if self.ctx.options.include_timestamps:
  1857.             timestamp = f"[{self.format_timestamp(message.timestamp)}] "
  1858.  
  1859.         formatted = f"{timestamp}{message.sender_display_name}: {message.content}"
  1860.         return formatted
  1861.  
  1862.     def create_banner(self, conversation: SkypeConversation) -> str:
  1863.         """
  1864.        Create a banner with conversation metadata.
  1865.  
  1866.        Args:
  1867.            conversation: Conversation to create banner for
  1868.  
  1869.        Returns:
  1870.            Banner string
  1871.        """
  1872.         banner = [
  1873.             f"Conversation with: {conversation.display_name} ({conversation.id})",
  1874.             f"Exported on: {self.ctx.export_date}, at: {self.ctx.export_time}",
  1875.         ]
  1876.  
  1877.         if conversation.first_timestamp and conversation.last_timestamp:
  1878.             banner.extend([
  1879.                 f"Conversations From: {self.format_timestamp(conversation.first_timestamp)}",
  1880.                 f"                To: {self.format_timestamp(conversation.last_timestamp)}",
  1881.             ])
  1882.  
  1883.         banner.append("***** All times are in UTC *****" if not self.ctx.options.use_local_time
  1884.                      else "***** All times are in local time *****")
  1885.  
  1886.         return "\n".join(banner)
  1887.  
  1888.     def parse_content(self, content: str) -> str:
  1889.         """
  1890.        Parse and clean message content.
  1891.  
  1892.        Args:
  1893.            content: Raw message content
  1894.  
  1895.        Returns:
  1896.            Cleaned content
  1897.        """
  1898.         if self.ctx.options.strip_html:
  1899.             if BEAUTIFULSOUP_AVAILABLE:
  1900.                 return self._parse_with_beautifulsoup(content)
  1901.             else:
  1902.                 return self._parse_with_regex(content)
  1903.         return content
  1904.  
  1905.     def _parse_with_beautifulsoup(self, content: str) -> str:
  1906.         """
  1907.        Parse content using BeautifulSoup.
  1908.  
  1909.        Args:
  1910.            content: Raw HTML content
  1911.  
  1912.        Returns:
  1913.            Plain text content
  1914.        """
  1915.         soup = BeautifulSoup(content, 'lxml')
  1916.         text = soup.get_text()
  1917.         return self._pretty_quotes(text)
  1918.  
  1919.     def _parse_with_regex(self, content: str) -> str:
  1920.         """
  1921.        Parse content using regex fallback.
  1922.  
  1923.        Args:
  1924.            content: Raw HTML content
  1925.  
  1926.        Returns:
  1927.            Plain text content
  1928.        """
  1929.         tag_pattern = re.compile(r'<.*?>')
  1930.         content = tag_pattern.sub('', content)
  1931.         content = html.unescape(content)
  1932.         return self._pretty_quotes(content)
  1933.  
  1934.     def _pretty_quotes(self, text: str) -> str:
  1935.         """
  1936.        Format quotes for better readability.
  1937.  
  1938.        Args:
  1939.            text: Text with quote markers
  1940.  
  1941.        Returns:
  1942.            Text with formatted quotes
  1943.        """
  1944.         # Replace quote markers with more readable format
  1945.         quote_pattern = re.compile(r'\[[+-]?\d+(?:\.\d+)?\]')
  1946.         text = quote_pattern.sub(r'\n\t*** Quoting the following message: ***\n\t', text)
  1947.  
  1948.         response_pattern = re.compile(r'\<\<\<')
  1949.         text = response_pattern.sub('\t*** And responding with: ***\n\t', text)
  1950.  
  1951.         return text
  1952.  
  1953. class TextExporter:
  1954.     """Exports conversations to plain text format."""
  1955.  
  1956.     def __init__(self, ctx: AppContext):
  1957.         """Initialize text exporter with application context."""
  1958.         self.ctx = ctx
  1959.         self.logger = ctx.logger.getChild('exporter.text')
  1960.         self.formatter = ContentFormatter(ctx)
  1961.         # Check if aiofiles is available
  1962.         self.aiofiles_available = importlib.util.find_spec("aiofiles") is not None
  1963.         if self.aiofiles_available:
  1964.             self.logger.debug("aiofiles is available, will use for async file operations")
  1965.             import aiofiles
  1966.             self.aiofiles = aiofiles
  1967.         else:
  1968.             self.logger.debug("aiofiles not available, using custom async file wrapper")
  1969.  
  1970.     async def export_conversation(self, conversation: SkypeConversation, output_dir: Path) -> Path:
  1971.         """
  1972.        Export a conversation to a text file.
  1973.  
  1974.        Args:
  1975.            conversation: Conversation to export
  1976.            output_dir: Output directory
  1977.  
  1978.        Returns:
  1979.            Path to the exported file
  1980.        """
  1981.         self.logger.info(f"Exporting conversation '{conversation.display_name}' to text")
  1982.  
  1983.         # Create file name from conversation display name
  1984.         file_name = sanitize_filename(conversation.display_name)
  1985.         output_path = get_unique_filename(output_dir, file_name, "txt")
  1986.  
  1987.         # Prepare content
  1988.         content = []
  1989.  
  1990.         # Add banner with conversation info
  1991.         content.append(self.formatter.create_banner(conversation))
  1992.         content.append("")  # Empty line after banner
  1993.  
  1994.         # Group messages by date
  1995.         message_dates = sorted(conversation.get_message_dates())
  1996.  
  1997.         # Process each date
  1998.         for date in message_dates:
  1999.             # Add date header
  2000.             date_str = date.strftime("%A, %B %d, %Y")
  2001.             content.append(f"\n=== {date_str} ===\n")
  2002.  
  2003.             # Add messages for this date
  2004.             messages = conversation.get_messages_by_date(date)
  2005.             for message in messages:
  2006.                 content.append(self.formatter.format_message(message))
  2007.  
  2008.         # Write to file using async I/O
  2009.         try:
  2010.             if self.aiofiles_available:
  2011.                 # Use aiofiles for truly async I/O
  2012.                 async with self.aiofiles.open(output_path, 'w', encoding='utf-8') as f:
  2013.                     await f.write('\n'.join(content))
  2014.             else:
  2015.                 # Fall back to custom async wrapper
  2016.                 with self._async_open(output_path, 'w', encoding='utf-8') as f:
  2017.                     await f.write('\n'.join(content))
  2018.  
  2019.             self.logger.info(f"Exported to {output_path}")
  2020.             return output_path
  2021.  
  2022.         except Exception as e:
  2023.             self.logger.error(f"Failed to write text file: {e}")
  2024.             raise FileWriteError(f"Failed to write text file: {e}")
  2025.  
  2026.     @contextmanager
  2027.     def _async_open(self, file_path: Path, mode: str, **kwargs):
  2028.         """
  2029.        Context manager for async file operations.
  2030.  
  2031.        Args:
  2032.            file_path: Path to file
  2033.            mode: File mode
  2034.            **kwargs: Additional open arguments
  2035.  
  2036.        Yields:
  2037.            AsyncFile object
  2038.        """
  2039.         class AsyncFile:
  2040.             def __init__(self, file_obj):
  2041.                 self.file_obj = file_obj
  2042.  
  2043.             async def write(self, content):
  2044.                 loop = asyncio.get_event_loop()
  2045.                 await loop.run_in_executor(None, self.file_obj.write, content)
  2046.  
  2047.             async def read(self):
  2048.                 loop = asyncio.get_event_loop()
  2049.                 return await loop.run_in_executor(None, self.file_obj.read)
  2050.  
  2051.         file_obj = open(file_path, mode, **kwargs)
  2052.         try:
  2053.             yield AsyncFile(file_obj)
  2054.         finally:
  2055.             file_obj.close()
  2056.  
  2057. class HtmlExporter:
  2058.     """Exporter for HTML format with styling."""
  2059.  
  2060.     def __init__(self, ctx: AppContext):
  2061.         """
  2062.        Initialize the exporter.
  2063.  
  2064.        Args:
  2065.            ctx: Application context
  2066.        """
  2067.         self.ctx = ctx
  2068.         self.formatter = ContentFormatter(ctx)
  2069.         self.logger = get_logger('exporter.html', ctx)
  2070.  
  2071.         # Check for required dependencies
  2072.         if not JINJA2_AVAILABLE:
  2073.             ctx.logger.warning("Jinja2 not installed. HTML export will use basic formatting.")
  2074.  
  2075.     async def export_conversation(self, conversation: SkypeConversation, output_dir: Path) -> Path:
  2076.         """
  2077.        Export a conversation to HTML format.
  2078.  
  2079.        Args:
  2080.            conversation: Conversation to export
  2081.            output_dir: Directory to write output to
  2082.  
  2083.        Returns:
  2084.            Path to the exported file
  2085.        """
  2086.         self.logger.debug(f"Exporting conversation {conversation.display_name} to HTML")
  2087.  
  2088.         # Create filename with enhanced sanitization
  2089.         safe_name = sanitize_filename(conversation.display_name)
  2090.         filename = f"[{self.ctx.export_date}]-{safe_name}.html"
  2091.         output_path = output_dir / filename
  2092.  
  2093.         # Group messages by date
  2094.         message_groups = {}
  2095.         for date in sorted(conversation.get_message_dates()):
  2096.             message_groups[date.isoformat()] = conversation.get_messages_by_date(date)
  2097.  
  2098.         # Generate HTML
  2099.         if JINJA2_AVAILABLE:
  2100.             html_content = self._generate_html_with_jinja(conversation, message_groups)
  2101.         else:
  2102.             html_content = self._generate_basic_html(conversation, message_groups)
  2103.  
  2104.         # Write to file
  2105.         try:
  2106.             loop = asyncio.get_event_loop()
  2107.             await loop.run_in_executor(
  2108.                 None,
  2109.                 lambda: output_path.write_text(html_content, encoding='utf-8')
  2110.             )
  2111.         except Exception as e:
  2112.             self.logger.error(f"Error writing to {output_path}: {e}")
  2113.             raise FileWriteError(f"Failed to write HTML to {output_path}: {e}")
  2114.  
  2115.         self.logger.info(f"Exported {conversation.message_count} messages to {output_path}")
  2116.         return output_path
  2117.  
  2118.     def _generate_html_with_jinja(self, conversation: SkypeConversation,
  2119.                                  message_groups: Dict[str, List[SkypeMessage]]) -> str:
  2120.         """
  2121.        Generate HTML using Jinja2 templates.
  2122.  
  2123.        Args:
  2124.            conversation: Conversation to export
  2125.            message_groups: Messages grouped by date
  2126.  
  2127.        Returns:
  2128.            Generated HTML string
  2129.        """
  2130.         # Create template
  2131.         template_str = """
  2132.        <!DOCTYPE html>
  2133.        <html lang="en">
  2134.        <head>
  2135.            <meta charset="UTF-8">
  2136.            <meta name="viewport" content="width=device-width, initial-scale=1.0">
  2137.            <title>{{ conversation.display_name }} - Skype Chat</title>
  2138.            <style>
  2139.                body {
  2140.                    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
  2141.                    line-height: 1.6;
  2142.                    color: #333;
  2143.                    max-width: 800px;
  2144.                    margin: 0 auto;
  2145.                    padding: 20px;
  2146.                }
  2147.                .header {
  2148.                    background-color: #00aff0;
  2149.                    color: white;
  2150.                    padding: 15px;
  2151.                    border-radius: 5px;
  2152.                    margin-bottom: 20px;
  2153.                }
  2154.                .date-header {
  2155.                    background-color: #e6e6e6;
  2156.                    padding: 8px 15px;
  2157.                    border-radius: 5px;
  2158.                    margin: 25px 0 15px 0;
  2159.                    font-weight: bold;
  2160.                }
  2161.                .message {
  2162.                    margin-bottom: 10px;
  2163.                    padding: 10px;
  2164.                    border-radius: 5px;
  2165.                }
  2166.                .message:nth-child(odd) {
  2167.                    background-color: #f5f5f5;
  2168.                }
  2169.                .timestamp {
  2170.                    color: #777;
  2171.                    font-size: 0.85em;
  2172.                    margin-right: 10px;
  2173.                }
  2174.                .sender {
  2175.                    font-weight: bold;
  2176.                    margin-right: 10px;
  2177.                }
  2178.                .content {
  2179.                    white-space: pre-wrap;
  2180.                }
  2181.                .quote {
  2182.                    border-left: 3px solid #00aff0;
  2183.                    padding-left: 10px;
  2184.                    color: #555;
  2185.                    font-style: italic;
  2186.                }
  2187.                .metadata {
  2188.                    font-size: 0.9em;
  2189.                    color: #777;
  2190.                }
  2191.                .edited {
  2192.                    color: #999;
  2193.                    font-style: italic;
  2194.                    font-size: 0.85em;
  2195.                }
  2196.                .special {
  2197.                    color: #777;
  2198.                    font-style: italic;
  2199.                }
  2200.            </style>
  2201.        </head>
  2202.        <body>
  2203.            <div class="header">
  2204.                <h1>{{ conversation.display_name }}</h1>
  2205.                <div class="metadata">
  2206.                    <p>Exported on: {{ export_date }}, at: {{ export_time }}</p>
  2207.                    {% if conversation.first_timestamp %}
  2208.                    <p>Conversations from: {{ formatter.format_timestamp(conversation.first_timestamp) }}</p>
  2209.                    <p>To: {{ formatter.format_timestamp(conversation.last_timestamp) }}</p>
  2210.                    {% endif %}
  2211.                    <p>{{ time_zone_note }}</p>
  2212.                </div>
  2213.            </div>
  2214.  
  2215.            {% for date, messages in message_groups.items() %}
  2216.                <div class="date-header">Conversations on {{ date }}</div>
  2217.  
  2218.                {% for message in messages %}
  2219.                    <div class="message">
  2220.                        {% if include_timestamps %}
  2221.                        <span class="timestamp">[{{ formatter.format_timestamp(message.timestamp) }}]</span>
  2222.                        {% endif %}
  2223.                        <span class="sender">{{ message.sender_display_name }}:</span>
  2224.  
  2225.                        {% if message.message_type != 'RichText' %}
  2226.                            <span class="special">{{ message.content }}</span>
  2227.                        {% else %}
  2228.                            <span class="content">{{ formatter.parse_content(message.content) }}</span>
  2229.                            {% if message.edited %}
  2230.                            <div class="edited">This message was edited</div>
  2231.                            {% endif %}
  2232.                        {% endif %}
  2233.                    </div>
  2234.                {% endfor %}
  2235.            {% endfor %}
  2236.        </body>
  2237.        </html>
  2238.        """
  2239.  
  2240.         # Create template and render
  2241.         template = jinja2.Template(template_str)
  2242.         return template.render(
  2243.             conversation=conversation,
  2244.             message_groups=message_groups,
  2245.             formatter=self.formatter,
  2246.             export_date=self.ctx.export_date,
  2247.             export_time=self.ctx.export_time,
  2248.             include_timestamps=self.ctx.options.include_timestamps,
  2249.             time_zone_note="All times are in UTC" if not self.ctx.options.use_local_time else "All times are in local time"
  2250.         )
  2251.  
  2252.     def _generate_basic_html(self, conversation: SkypeConversation,
  2253.                             message_groups: Dict[str, List[SkypeMessage]]) -> str:
  2254.         """
  2255.        Generate basic HTML without Jinja2.
  2256.  
  2257.        Args:
  2258.            conversation: Conversation to export
  2259.            message_groups: Messages grouped by date
  2260.  
  2261.        Returns:
  2262.            Generated HTML string
  2263.        """
  2264.         # Create HTML pieces
  2265.         html_parts = [
  2266.             '<!DOCTYPE html>',
  2267.             '<html lang="en">',
  2268.             '<head>',
  2269.             '    <meta charset="UTF-8">',
  2270.             f'    <title>{html.escape(conversation.display_name)} - Skype Chat</title>',
  2271.             '    <style>',
  2272.             '        body { font-family: sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }',
  2273.             '        .header { background-color: #00aff0; color: white; padding: 15px; }',
  2274.             '        .date-header { background-color: #e6e6e6; padding: 8px; margin: 20px 0 10px 0; }',
  2275.             '        .message { margin-bottom: 10px; padding: 8px; }',
  2276.             '        .message:nth-child(odd) { background-color: #f5f5f5; }',
  2277.             '    </style>',
  2278.             '</head>',
  2279.             '<body>',
  2280.             f'    <div class="header"><h1>{html.escape(conversation.display_name)}</h1>',
  2281.             f'        <p>Exported on: {self.ctx.export_date}, at: {self.ctx.export_time}</p>'
  2282.         ]
  2283.  
  2284.         if conversation.first_timestamp and conversation.last_timestamp:
  2285.             html_parts.extend([
  2286.                 f'        <p>Conversations from: {self.formatter.format_timestamp(conversation.first_timestamp)}</p>',
  2287.                 f'        <p>To: {self.formatter.format_timestamp(conversation.last_timestamp)}</p>'
  2288.             ])
  2289.  
  2290.         time_note = "All times are in UTC" if not self.ctx.options.use_local_time else "All times are in local time"
  2291.         html_parts.append(f'        <p>{time_note}</p>')
  2292.         html_parts.append('    </div>')
  2293.  
  2294.         # Add messages grouped by date
  2295.         for date, messages in message_groups.items():
  2296.             html_parts.append(f'    <div class="date-header">Conversations on {date}</div>')
  2297.  
  2298.             for message in messages:
  2299.                 html_parts.append('    <div class="message">')
  2300.  
  2301.                 if self.ctx.options.include_timestamps:
  2302.                     html_parts.append(f'        <span>[{self.formatter.format_timestamp(message.timestamp)}]</span>')
  2303.  
  2304.                 html_parts.append(f'        <strong>{html.escape(message.sender_display_name)}:</strong> ')
  2305.  
  2306.                 # Handle different message types
  2307.                 if message.message_type != 'RichText':
  2308.                     html_parts.append(f'        <em>{html.escape(message.content)}</em>')
  2309.                 else:
  2310.                     content = self.formatter.parse_content(message.content)
  2311.                     html_parts.append(f'        <span>{html.escape(content)}</span>')
  2312.  
  2313.                     if message.edited:
  2314.                         html_parts.append('        <div><em>This message was edited</em></div>')
  2315.  
  2316.                 html_parts.append('    </div>')
  2317.  
  2318.         html_parts.extend(['</body>', '</html>'])
  2319.         return '\n'.join(html_parts)
  2320.  
  2321. class MarkdownExporter:
  2322.     """Exporter for Markdown format."""
  2323.  
  2324.     def __init__(self, ctx: AppContext):
  2325.         """
  2326.        Initialize the exporter.
  2327.  
  2328.        Args:
  2329.            ctx: Application context
  2330.        """
  2331.         self.ctx = ctx
  2332.         self.formatter = ContentFormatter(ctx)
  2333.         self.logger = get_logger('exporter.markdown', ctx)
  2334.  
  2335.     async def export_conversation(self, conversation: SkypeConversation, output_dir: Path) -> Path:
  2336.         """
  2337.        Export a conversation to Markdown format.
  2338.  
  2339.        Args:
  2340.            conversation: Conversation to export
  2341.            output_dir: Directory to write output to
  2342.  
  2343.        Returns:
  2344.            Path to the exported file
  2345.        """
  2346.         self.logger.debug(f"Exporting conversation {conversation.display_name} to Markdown")
  2347.  
  2348.         # Create filename with enhanced sanitization
  2349.         safe_name = sanitize_filename(conversation.display_name)
  2350.         filename = f"[{self.ctx.export_date}]-{safe_name}.md"
  2351.         output_path = output_dir / filename
  2352.  
  2353.         # Create banner
  2354.         content = [
  2355.             f"# Conversation with {conversation.display_name}",
  2356.             "",
  2357.             "## Metadata",
  2358.             f"- **Exported on:** {self.ctx.export_date}, at: {self.ctx.export_time}"
  2359.         ]
  2360.  
  2361.         if conversation.first_timestamp and conversation.last_timestamp:
  2362.             content.extend([
  2363.                 f"- **First message:** {self.formatter.format_timestamp(conversation.first_timestamp)}",
  2364.                 f"- **Last message:** {self.formatter.format_timestamp(conversation.last_timestamp)}"
  2365.             ])
  2366.  
  2367.         time_note = "All times are in UTC" if not self.ctx.options.use_local_time else "All times are in local time"
  2368.         content.append(f"- **Note:** {time_note}")
  2369.         content.append("")
  2370.  
  2371.         # Group messages by date
  2372.         for date in sorted(conversation.get_message_dates()):
  2373.             date_messages = conversation.get_messages_by_date(date)
  2374.             if date_messages:
  2375.                 content.append(f"## Conversations on {date.isoformat()}")
  2376.                 content.append("")
  2377.  
  2378.                 for message in date_messages:
  2379.                     # Format timestamp
  2380.                     timestamp = ""
  2381.                     if self.ctx.options.include_timestamps:
  2382.                         timestamp = f"**[{self.formatter.format_timestamp(message.timestamp)}]** "
  2383.  
  2384.                     # Format sender
  2385.                     sender = f"**{message.sender_display_name}:** "
  2386.  
  2387.                     # Format content
  2388.                     if message.message_type != 'RichText':
  2389.                         msg_content = f"*{message.content}*"
  2390.                     else:
  2391.                         msg_content = self.formatter.parse_content(message.content)
  2392.                         # Escape markdown characters in content
  2393.                         msg_content = re.sub(r'([_*~`#])', r'\\\1', msg_content)
  2394.  
  2395.                     # Add edited indicator
  2396.                     if message.edited:
  2397.                         msg_content += " *(edited)*"
  2398.  
  2399.                     # Add complete message
  2400.                     content.append(f"{timestamp}{sender}{msg_content}")
  2401.                     content.append("")
  2402.  
  2403.         # Write to file
  2404.         try:
  2405.             loop = asyncio.get_event_loop()
  2406.             await loop.run_in_executor(
  2407.                 None,
  2408.                 lambda: output_path.write_text('\n'.join(content), encoding='utf-8')
  2409.             )
  2410.         except Exception as e:
  2411.             self.logger.error(f"Error writing to {output_path}: {e}")
  2412.             raise FileWriteError(f"Failed to write Markdown to {output_path}: {e}")
  2413.  
  2414.         self.logger.info(f"Exported {conversation.message_count} messages to {output_path}")
  2415.         return output_path
  2416.  
  2417. class JsonExporter:
  2418.     """Exporter for JSON format with full message data."""
  2419.  
  2420.     def __init__(self, ctx: AppContext):
  2421.         """
  2422.        Initialize the exporter.
  2423.  
  2424.        Args:
  2425.            ctx: Application context
  2426.        """
  2427.         self.ctx = ctx
  2428.         self.logger = get_logger('exporter.json', ctx)
  2429.  
  2430.     async def export_conversation(self, conversation: SkypeConversation, output_dir: Path) -> Path:
  2431.         """
  2432.        Export a conversation to JSON format.
  2433.  
  2434.        Args:
  2435.            conversation: Conversation to export
  2436.            output_dir: Directory to write output to
  2437.  
  2438.        Returns:
  2439.            Path to the exported file
  2440.        """
  2441.         self.logger.debug(f"Exporting conversation {conversation.display_name} to JSON")
  2442.  
  2443.         # Create filename with enhanced sanitization
  2444.         safe_name = sanitize_filename(conversation.display_name)
  2445.         filename = f"[{self.ctx.export_date}]-{safe_name}.json"
  2446.         output_path = output_dir / filename
  2447.  
  2448.         # Create serializable data structure
  2449.         data = {
  2450.             "metadata": {
  2451.                 "id": conversation.id,
  2452.                 "display_name": conversation.display_name,
  2453.                 "export_date": self.ctx.export_date,
  2454.                 "export_time": self.ctx.export_time,
  2455.                 "message_count": conversation.message_count,
  2456.                 "first_message": conversation.first_timestamp.isoformat() if conversation.first_timestamp else None,
  2457.                 "last_message": conversation.last_timestamp.isoformat() if conversation.last_timestamp else None,
  2458.                 "participants": conversation.participants,
  2459.                 "timezone": "UTC" if not self.ctx.options.use_local_time else "local"
  2460.             },
  2461.             "messages": []
  2462.         }
  2463.  
  2464.         # Add messages
  2465.         for message in conversation.messages:
  2466.             msg_data = {
  2467.                 "id": message.id,
  2468.                 "timestamp": message.timestamp.isoformat(),
  2469.                 "sender_id": message.sender_id,
  2470.                 "sender_display_name": message.sender_display_name,
  2471.                 "content": message.content,
  2472.                 "message_type": message.message_type,
  2473.                 "edited": message.edited
  2474.             }
  2475.  
  2476.             # Include original JSON if requested
  2477.             if self.ctx.options.include_message_ids:
  2478.                 msg_data["original_json"] = message.original_json
  2479.  
  2480.             data["messages"].append(msg_data)
  2481.  
  2482.         # Write to file with indentation if pretty print is enabled
  2483.         indent = 2 if self.ctx.options.pretty_print else None
  2484.  
  2485.         try:
  2486.             loop = asyncio.get_event_loop()
  2487.             await loop.run_in_executor(
  2488.                 None,
  2489.                 lambda: output_path.write_text(
  2490.                     json.dumps(data, indent=indent, ensure_ascii=False),
  2491.                     encoding='utf-8'
  2492.                 )
  2493.             )
  2494.         except Exception as e:
  2495.             self.logger.error(f"Error writing to {output_path}: {e}")
  2496.             raise FileWriteError(f"Failed to write JSON to {output_path}: {e}")
  2497.  
  2498.         self.logger.info(f"Exported {conversation.message_count} messages to {output_path}")
  2499.         return output_path
  2500.  
  2501. class PostgreSQLExporter:
  2502.     """Exporter for PostgreSQL database with normalized schema."""
  2503.  
  2504.     def __init__(self, ctx: AppContext):
  2505.         """
  2506.        Initialize the exporter.
  2507.  
  2508.        Args:
  2509.            ctx: Application context
  2510.        """
  2511.         self.ctx = ctx
  2512.         self.logger = get_logger('exporter.postgresql', ctx)
  2513.  
  2514.         # Check for required dependencies
  2515.         if not SQLALCHEMY_AVAILABLE:
  2516.             raise ExportError("SQLAlchemy is required for PostgreSQL export but not installed")
  2517.  
  2518.         if not PSYCOPG2_AVAILABLE:
  2519.             raise ExportError("psycopg2 is required for PostgreSQL export but not installed")
  2520.  
  2521.         # Initialize database manager
  2522.         self.db_manager = DatabaseManager(ctx)
  2523.  
  2524.         # Assign DB model classes to instance attributes for use in queries
  2525.         self.DbConversation = DbConversation
  2526.         self.DbMessage = DbMessage
  2527.         self.DbParticipant = DbParticipant
  2528.  
  2529.     async def export_conversation(self, conversation: SkypeConversation, output_dir: Path) -> Path:
  2530.         """
  2531.        Export a conversation to PostgreSQL database.
  2532.  
  2533.        Args:
  2534.            conversation: Conversation to export
  2535.            output_dir: Directory to write output to
  2536.  
  2537.        Returns:
  2538.            Path to a metadata file with export info
  2539.        """
  2540.         self.logger.debug(f"Exporting conversation {conversation.display_name} to PostgreSQL")
  2541.  
  2542.         # Create metadata file
  2543.         safe_name = sanitize_filename(conversation.display_name)
  2544.         filename = f"[{self.ctx.export_date}]-{safe_name}-pg_export_info.json"
  2545.         output_path = output_dir / filename
  2546.  
  2547.         try:
  2548.             # Initialize database connection
  2549.             if not hasattr(self, '_db_initialized'):
  2550.                 self.db_manager.initialize()
  2551.                 self._db_initialized = True
  2552.  
  2553.             # Export conversation to database
  2554.             await self._export_to_database(conversation)
  2555.  
  2556.             # Create a metadata file with export information
  2557.             meta_data = {
  2558.                 "export_type": "PostgreSQL",
  2559.                 "conversation": {
  2560.                     "id": conversation.id,
  2561.                     "display_name": conversation.display_name,
  2562.                     "message_count": conversation.message_count,
  2563.                     "first_message": conversation.first_timestamp.isoformat() if conversation.first_timestamp else None,
  2564.                     "last_message": conversation.last_timestamp.isoformat() if conversation.last_timestamp else None,
  2565.                 },
  2566.                 "database": {
  2567.                     "engine": self.ctx.options.database_config.engine,
  2568.                     "host": self.ctx.options.database_config.host,
  2569.                     "port": self.ctx.options.database_config.port,
  2570.                     "database": self.ctx.options.database_config.database,
  2571.                     "schema": self.ctx.options.database_config.schema,
  2572.                 },
  2573.                 "export_date": self.ctx.export_date,
  2574.                 "export_time": self.ctx.export_time,
  2575.                 "sql_connection_string": self.get_sanitized_connection_string()
  2576.             }
  2577.  
  2578.             loop = asyncio.get_event_loop()
  2579.             await loop.run_in_executor(
  2580.                 None,
  2581.                 lambda: output_path.write_text(
  2582.                     json.dumps(meta_data, indent=2, ensure_ascii=False),
  2583.                     encoding='utf-8'
  2584.                 )
  2585.             )
  2586.  
  2587.             self.logger.info(f"Exported {conversation.message_count} messages to PostgreSQL "
  2588.                           f"and saved metadata to {output_path}")
  2589.             return output_path
  2590.  
  2591.         except Exception as e:
  2592.             self.logger.error(f"Error exporting to PostgreSQL: {e}")
  2593.             raise ExportError(f"Failed to export to PostgreSQL: {e}")
  2594.  
  2595.     async def _export_to_database(self, conversation: SkypeConversation) -> None:
  2596.         """
  2597.        Export conversation data to PostgreSQL database.
  2598.  
  2599.        Args:
  2600.            conversation: Conversation to export
  2601.        """
  2602.         # Use asyncio to run database operations in a thread pool
  2603.         loop = asyncio.get_event_loop()
  2604.         await loop.run_in_executor(
  2605.             None,
  2606.             self._export_conversation_sync,
  2607.             conversation
  2608.         )
  2609.  
  2610.     def _export_conversation_sync(self, conversation: SkypeConversation) -> None:
  2611.         """Export a conversation to PostgreSQL database (synchronous)."""
  2612.         try:
  2613.             # First handle the conversation record in its own transaction
  2614.             with self.db_manager.session() as session:
  2615.                 try:
  2616.                     # Check if conversation already exists
  2617.                     db_conversation = session.query(self.DbConversation).filter_by(
  2618.                         id=conversation.id
  2619.                     ).first()
  2620.  
  2621.                     # Create or update conversation record
  2622.                     if not db_conversation:
  2623.                         db_conversation = self.DbConversation(
  2624.                             id=conversation.id,
  2625.                             display_name=conversation.display_name,
  2626.                             first_timestamp=conversation.first_timestamp,
  2627.                             last_timestamp=conversation.last_timestamp,
  2628.                             message_count=conversation.message_count,
  2629.                             days_active=conversation.days_active,
  2630.                             export_date=datetime.datetime.now(),
  2631.                             metadata_json=json.dumps(conversation.original_json)
  2632.                                             if self.ctx.options.include_metadata else None
  2633.                         )
  2634.                         session.add(db_conversation)
  2635.                     else:
  2636.                         # Update existing conversation
  2637.                         db_conversation.display_name = conversation.display_name
  2638.                         db_conversation.first_timestamp = conversation.first_timestamp
  2639.                         db_conversation.last_timestamp = conversation.last_timestamp
  2640.                         db_conversation.message_count = conversation.message_count
  2641.                         db_conversation.days_active = conversation.days_active
  2642.                         db_conversation.export_date = datetime.datetime.now()
  2643.                         if self.ctx.options.include_metadata:
  2644.                             db_conversation.metadata_json = json.dumps(conversation.original_json)
  2645.  
  2646.                     # Process participants in the same transaction as the conversation
  2647.                     for user_id, display_name in conversation.participants.items():
  2648.                         # Check if participant already exists for this conversation
  2649.                         participant = session.query(self.DbParticipant).filter_by(
  2650.                             conversation_id=conversation.id,
  2651.                             user_id=user_id
  2652.                         ).first()
  2653.  
  2654.                         if not participant:
  2655.                             participant = self.DbParticipant(
  2656.                                 conversation_id=conversation.id,
  2657.                                 user_id=user_id,
  2658.                                 display_name=display_name
  2659.                             )
  2660.                             session.add(participant)
  2661.                         else:
  2662.                             participant.display_name = display_name
  2663.  
  2664.                     # Commit conversation and participants
  2665.                     session.commit()
  2666.                     self.logger.debug(f"Saved conversation record for {conversation.id}")
  2667.  
  2668.                 except Exception as e:
  2669.                     session.rollback()
  2670.                     self.logger.error(f"Failed to save conversation record: {e}")
  2671.                     # Re-raise to abort the whole export for this conversation
  2672.                     raise
  2673.  
  2674.             # Process messages in batches with separate transactions
  2675.             batch_size = self.ctx.options.batch_size
  2676.             total_messages = len(conversation.messages)
  2677.             successful_messages = 0
  2678.             failed_batches = 0
  2679.  
  2680.             # Process messages in batches
  2681.             for i in range(0, len(conversation.messages), batch_size):
  2682.                 batch = conversation.messages[i:i+batch_size]
  2683.  
  2684.                 # Create a new session for each batch
  2685.                 with self.db_manager.session() as session:
  2686.                     try:
  2687.                         for message in batch:
  2688.                             # Check if message already exists
  2689.                             existing_message = session.query(self.DbMessage).filter_by(
  2690.                                 id=message.id
  2691.                             ).first()
  2692.  
  2693.                             if not existing_message:
  2694.                                 # Create new message record
  2695.                                 db_message = self.DbMessage(
  2696.                                     id=message.id,
  2697.                                     conversation_id=conversation.id,
  2698.                                     timestamp=message.timestamp,
  2699.                                     sender_id=message.sender_id,
  2700.                                     sender_display_name=message.sender_display_name,
  2701.                                     content=message.content,
  2702.                                     message_type=message.message_type,
  2703.                                     edited=message.edited,
  2704.                                     metadata_json=json.dumps(message.original_json)
  2705.                                                 if self.ctx.options.include_metadata else None
  2706.                                 )
  2707.                                 session.add(db_message)
  2708.  
  2709.                         # Commit this batch
  2710.                         session.commit()
  2711.                         successful_messages += len(batch)
  2712.                         self.logger.debug(f"Processed message batch {i//batch_size + 1}/{(total_messages-1)//batch_size + 1} "
  2713.                                         f"({len(batch)} messages)")
  2714.  
  2715.                     except Exception as e:
  2716.                         session.rollback()
  2717.                         failed_batches += 1
  2718.                         self.logger.error(f"Failed to process message batch {i//batch_size + 1}: {e}")
  2719.                         # Continue with next batch instead of aborting all
  2720.  
  2721.                 # Check memory after each batch
  2722.                 if self.ctx.check_memory():
  2723.                     self.logger.debug("Memory optimization performed between batches")
  2724.  
  2725.             # Log summary
  2726.             if failed_batches > 0:
  2727.                 self.logger.warning(f"Conversation {conversation.id} export completed with {failed_batches} failed batches. "
  2728.                                  f"Successfully saved {successful_messages}/{total_messages} messages.")
  2729.             else:
  2730.                 self.logger.info(f"Successfully exported conversation {conversation.id} "
  2731.                                f"with {successful_messages} messages.")
  2732.  
  2733.         except Exception as e:
  2734.             self.logger.error(f"Failed to export conversation {conversation.id}: {e}")
  2735.             raise
  2736.  
  2737.     def get_sanitized_connection_string(self) -> str:
  2738.         """Generate SQLAlchemy connection string with password masked for secure logging."""
  2739.         config = self.ctx.options.database_config
  2740.         # Always mask password regardless of its length
  2741.         return (f"{config.engine}://{config.username}:****@"
  2742.                 f"{config.host}:{config.port}/{config.database}")
  2743.  
  2744. class ExportManager:
  2745.     """Manages the export process for all conversation formats."""
  2746.  
  2747.     def __init__(self, ctx: AppContext):
  2748.         """
  2749.        Initialize the export manager.
  2750.  
  2751.        Args:
  2752.            ctx: Application context
  2753.        """
  2754.         self.ctx = ctx
  2755.         self.logger = get_logger('export_manager', ctx)
  2756.  
  2757.         # Create exporters
  2758.         self.exporters = {
  2759.             OutputFormat.TEXT: TextExporter(ctx),
  2760.             OutputFormat.HTML: HtmlExporter(ctx),
  2761.             OutputFormat.MARKDOWN: MarkdownExporter(ctx),
  2762.             OutputFormat.JSON: JsonExporter(ctx),
  2763.             OutputFormat.POSTGRESQL: PostgreSQLExporter(ctx)
  2764.         }
  2765.  
  2766.     async def export_conversations(self, skype_export: SkypeExport,
  2767.                                   conversations: List[SkypeConversation] = None) -> Dict[str, List[Path]]:
  2768.         """
  2769.        Export selected conversations in specified formats.
  2770.  
  2771.        Args:
  2772.            skype_export: Complete Skype export data
  2773.            conversations: Optional list of conversations to export (all if None)
  2774.  
  2775.        Returns:
  2776.            Dictionary mapping format names to lists of exported file paths
  2777.        """
  2778.         self.logger.info("Starting export process...")
  2779.  
  2780.         # Use all conversations if none specified
  2781.         if conversations is None:
  2782.             conversations = list(skype_export.conversations.values())
  2783.  
  2784.         # Filter conversations if pattern specified
  2785.         if self.ctx.options.filter_pattern:
  2786.             pattern = self.ctx.options.filter_pattern
  2787.             filtered = [
  2788.                 c for c in conversations
  2789.                 if fnmatch.fnmatch(c.display_name.lower(), pattern.lower())
  2790.             ]
  2791.  
  2792.             if not filtered:
  2793.                 self.logger.warning(f"No conversations matched pattern '{pattern}'")
  2794.                 if not self.ctx.options.basic_mode:
  2795.                     self.logger.info("Available conversations:")
  2796.                     for conv in conversations[:10]:
  2797.                         self.logger.info(f"- {conv.display_name}")
  2798.                     if len(conversations) > 10:
  2799.                         self.logger.info(f"... and {len(conversations) - 10} more")
  2800.  
  2801.             conversations = filtered
  2802.  
  2803.         # Create output directory
  2804.         output_dir = self.ctx.options.output_dir
  2805.         ensure_directory(output_dir)
  2806.  
  2807.         # Determine which formats to export
  2808.         formats = [self.ctx.options.format]
  2809.         if self.ctx.options.format == OutputFormat.ALL:
  2810.             formats = [f for f in OutputFormat if f != OutputFormat.ALL]
  2811.  
  2812.         # Create format-specific directories
  2813.         format_dirs = {}
  2814.         for format in formats:
  2815.             format_name = format.name.lower()
  2816.             format_dir = output_dir / format_name
  2817.             ensure_directory(format_dir)
  2818.             format_dirs[format] = format_dir
  2819.  
  2820.         # Track exported files
  2821.         exported_files = {format.name: [] for format in formats}
  2822.  
  2823.         # Create progress bar if available
  2824.         progress_tracker = self.ctx.progress_tracker
  2825.         total_exports = len(conversations) * len(formats)
  2826.  
  2827.         if RICH_AVAILABLE and progress_tracker and not self.ctx.options.basic_mode:
  2828.             with progress_tracker as progress:
  2829.                 task = progress.add_task("[green]Exporting conversations...", total=total_exports)
  2830.  
  2831.                 # Export each conversation in each format
  2832.                 for conversation in conversations:
  2833.                     for format in formats:
  2834.                         if self.ctx.cancel_requested:
  2835.                             self.logger.info("Export cancelled by user")
  2836.                             return exported_files
  2837.  
  2838.                         exported_file = await self._export_conversation(
  2839.                             conversation, format, format_dirs[format]
  2840.                         )
  2841.                         exported_files[format.name].append(exported_file)
  2842.                         progress.update(task, advance=1)
  2843.  
  2844.                         # Periodically check memory usage
  2845.                         self.ctx.check_memory()
  2846.         else:
  2847.             # Simple progress tracking
  2848.             processed = 0
  2849.  
  2850.             # Export each conversation in each format
  2851.             for conversation in conversations:
  2852.                 for format in formats:
  2853.                     if self.ctx.cancel_requested:
  2854.                         self.logger.info("Export cancelled by user")
  2855.                         return exported_files
  2856.  
  2857.                     processed += 1
  2858.                     if processed % 5 == 0 or processed == total_exports:
  2859.                         self.logger.info(f"Export progress: {processed}/{total_exports}")
  2860.  
  2861.                     exported_file = await self._export_conversation(
  2862.                         conversation, format, format_dirs[format]
  2863.                     )
  2864.                     exported_files[format.name].append(exported_file)
  2865.  
  2866.                     # Periodically check memory usage
  2867.                     self.ctx.check_memory()
  2868.  
  2869.         # Create stats file if requested
  2870.         if self.ctx.options.include_conversation_stats:
  2871.             await self._export_stats(skype_export, output_dir)
  2872.  
  2873.         # Compress output if requested
  2874.         if self.ctx.options.compress_output:
  2875.             await self._compress_output(output_dir)
  2876.  
  2877.         return exported_files
  2878.  
  2879.     async def _export_conversation(self, conversation: SkypeConversation,
  2880.                                  format: OutputFormat, output_dir: Path) -> Path:
  2881.         """
  2882.        Export a single conversation in specified format.
  2883.  
  2884.        Args:
  2885.            conversation: Conversation to export
  2886.            format: Format to export in
  2887.            output_dir: Directory to write output to
  2888.  
  2889.        Returns:
  2890.            Path to exported file
  2891.        """
  2892.         try:
  2893.             exporter = self.exporters[format]
  2894.             return await exporter.export_conversation(conversation, output_dir)
  2895.         except Exception as e:
  2896.             self.logger.error(f"Error exporting conversation {conversation.display_name} "
  2897.                                  f"in {format.name} format: {e}")
  2898.             self.ctx.errors.append({
  2899.                 "type": "export_error",
  2900.                 "conversation_id": conversation.id,
  2901.                 "format": format.name,
  2902.                 "error": str(e),
  2903.                 "traceback": traceback.format_exc()
  2904.             })
  2905.             # Create a dummy path as fallback
  2906.             return output_dir / f"ERROR-{sanitize_filename(conversation.id)}.failed"
  2907.  
  2908.     async def _export_stats(self, skype_export: SkypeExport, output_dir: Path) -> Path:
  2909.         """
  2910.        Export conversation statistics.
  2911.  
  2912.        Args:
  2913.            skype_export: Complete Skype export data
  2914.            output_dir: Directory to write output to
  2915.  
  2916.        Returns:
  2917.            Path to stats file
  2918.        """
  2919.         stats_file = output_dir / "conversation_stats.json"
  2920.         stats = skype_export.get_conversation_stats()
  2921.  
  2922.         # Add export metadata
  2923.         stats["export_metadata"] = {
  2924.             "export_date": self.ctx.export_date,
  2925.             "export_time": self.ctx.export_time,
  2926.             "user_id": self.ctx.user_id,
  2927.             "user_display_name": self.ctx.user_display_name,
  2928.             "exported_formats": [f.name for f in OutputFormat if f != OutputFormat.ALL],
  2929.             "processed_at": datetime.datetime.now().isoformat()
  2930.         }
  2931.  
  2932.         # Add memory usage if available
  2933.         memory_report = self.ctx.get_memory_report()
  2934.         if memory_report:
  2935.             stats["system_resources"] = memory_report
  2936.  
  2937.         # Write stats file
  2938.         try:
  2939.             loop = asyncio.get_event_loop()
  2940.             await loop.run_in_executor(
  2941.                 None,
  2942.                 lambda: stats_file.write_text(
  2943.                     json.dumps(stats, indent=2, ensure_ascii=False),
  2944.                     encoding='utf-8'
  2945.                 )
  2946.             )
  2947.  
  2948.             self.logger.info(f"Exported conversation statistics to {stats_file}")
  2949.             return stats_file
  2950.         except Exception as e:
  2951.             self.logger.error(f"Error writing statistics to {stats_file}: {e}")
  2952.             raise FileWriteError(f"Failed to write statistics to {stats_file}: {e}")
  2953.  
  2954.     async def _compress_output(self, output_dir: Path) -> Path:
  2955.         """
  2956.        Compress output directory.
  2957.  
  2958.        Args:
  2959.            output_dir: Directory to compress
  2960.  
  2961.        Returns:
  2962.            Path to compressed file
  2963.        """
  2964.         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
  2965.         archive_path = output_dir.with_name(f"{output_dir.name}_{timestamp}.zip")
  2966.  
  2967.         self.logger.info(f"Compressing output to {archive_path}")
  2968.  
  2969.         # Create zip archive in executor to avoid blocking
  2970.         try:
  2971.             loop = asyncio.get_event_loop()
  2972.             await loop.run_in_executor(
  2973.                 None,
  2974.                 self._create_zip_archive,
  2975.                 output_dir,
  2976.                 archive_path
  2977.             )
  2978.  
  2979.             self.logger.info(f"Export compressed to {archive_path}")
  2980.             return archive_path
  2981.         except Exception as e:
  2982.             self.logger.error(f"Error compressing output: {e}")
  2983.             raise FileWriteError(f"Failed to compress output: {e}")
  2984.  
  2985.     def _create_zip_archive(self, source_dir: Path, output_path: Path) -> None:
  2986.         """
  2987.        Create a ZIP archive of a directory.
  2988.  
  2989.        Args:
  2990.            source_dir: Directory to compress
  2991.            output_path: Path for output ZIP file
  2992.        """
  2993.         with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
  2994.             for root, _, files in os.walk(source_dir):
  2995.                 for file in files:
  2996.                     file_path = Path(root) / file
  2997.                     # Make path relative to source_dir
  2998.                     rel_path = file_path.relative_to(source_dir)
  2999.                     zipf.write(file_path, arcname=rel_path)
  3000.  
  3001. # ═════════════════════════════════════════════════════════════════════════════
  3002. # ═══════════════════════════ BASIC MODE HANDLER ═══════════════════════════════
  3003. # ═════════════════════════════════════════════════════════════════════════════
  3004.  
  3005. class BasicModeHandler:
  3006.     """
  3007.    Handles simplified workflow for basic mode operation.
  3008.  
  3009.    This class provides a straightforward procedural flow for users
  3010.    who prefer simplicity over advanced features.
  3011.    """
  3012.  
  3013.     def __init__(self, ctx: AppContext):
  3014.         """
  3015.        Initialize basic mode handler.
  3016.  
  3017.        Args:
  3018.            ctx: Application context
  3019.        """
  3020.         self.ctx = ctx
  3021.         self.logger = get_logger('basic_mode', ctx)
  3022.  
  3023.     async def run(self, file_path: Path) -> int:
  3024.         """
  3025.        Run the basic mode workflow.
  3026.  
  3027.        Args:
  3028.            file_path: Path to Skype export file
  3029.  
  3030.        Returns:
  3031.            Exit code
  3032.        """
  3033.         # Simple welcome message
  3034.         print("\n" + "=" * 60)
  3035.         print("     SkypeExporter - Basic Mode")
  3036.         print("     Simple Skype Chat Exporter")
  3037.         print("=" * 60 + "\n")
  3038.  
  3039.         try:
  3040.             # Get user display name
  3041.             user_display_name = input("\nPlease enter your name as you want it to appear in the logs: ")
  3042.             while not user_display_name.strip():
  3043.                 user_display_name = input("Name cannot be empty. Please enter your name: ")
  3044.  
  3045.             self.ctx.user_display_name = user_display_name
  3046.             print(f"\nWelcome, {user_display_name}!")
  3047.  
  3048.             # Process input file
  3049.             print(f"\nReading Skype export file: {file_path}")
  3050.             file_reader = FileReader.create_reader(file_path)
  3051.             raw_data = await file_reader.read(file_path, self.ctx)
  3052.  
  3053.             # Parse data
  3054.             print("\nParsing Skype conversations...")
  3055.             parser = SkypeExportParser(self.ctx)
  3056.             skype_export = await parser.parse(raw_data)
  3057.  
  3058.             # Show available conversations
  3059.             conversations = list(skype_export.conversations.values())
  3060.             valid_conversations = [c for c in conversations if c.messages]
  3061.  
  3062.             if not valid_conversations:
  3063.                 print("\nNo conversations with messages found in the export.")
  3064.                 return 0
  3065.  
  3066.             print(f"\nFound {len(valid_conversations)} conversations in the export file.")
  3067.  
  3068.             # Choose export format
  3069.             print("\nAvailable export formats:")
  3070.             print("1. Text (.txt)")
  3071.             print("2. HTML (.html)")
  3072.             print("3. Markdown (.md)")
  3073.             print("4. JSON (.json)")
  3074.             print("5. All formats")
  3075.  
  3076.             format_choice = input("\nChoose format (1-5): ").strip()
  3077.             while format_choice not in ["1", "2", "3", "4", "5"]:
  3078.                 format_choice = input("Please enter a number between 1 and 5: ").strip()
  3079.  
  3080.             format_map = {
  3081.                 "1": OutputFormat.TEXT,
  3082.                 "2": OutputFormat.HTML,
  3083.                 "3": OutputFormat.MARKDOWN,
  3084.                 "4": OutputFormat.JSON,
  3085.                 "5": OutputFormat.ALL
  3086.             }
  3087.  
  3088.             self.ctx.options.format = format_map[format_choice]
  3089.  
  3090.             # Choose conversations
  3091.             print("\nDo you want to:")
  3092.             print("1. Export all conversations")
  3093.             print("2. Select specific conversations")
  3094.  
  3095.             selection_choice = input("\nChoose option (1-2): ").strip()
  3096.             while selection_choice not in ["1", "2"]:
  3097.                 selection_choice = input("Please enter either 1 or 2: ").strip()
  3098.  
  3099.             selected_conversations = None
  3100.             if selection_choice == "2":
  3101.                 selected_conversations = await self._select_conversations(valid_conversations)
  3102.  
  3103.                 if not selected_conversations:
  3104.                     print("\nNo conversations selected, nothing to export.")
  3105.                     return 0
  3106.  
  3107.             # Choose output directory
  3108.             default_output_dir = self.ctx.options.output_dir
  3109.             output_dir = input(f"\nOutput directory [default: {default_output_dir}]: ").strip()
  3110.             if not output_dir:
  3111.                 output_dir = default_output_dir
  3112.  
  3113.             self.ctx.options.output_dir = Path(output_dir)
  3114.  
  3115.             # Advanced options
  3116.             include_timestamps = input("\nInclude timestamps? (y/n) [default: y]: ").strip().lower()
  3117.             self.ctx.options.include_timestamps = include_timestamps != "n"
  3118.  
  3119.             local_time = input("Use local time instead of UTC? (y/n) [default: y]: ").strip().lower()
  3120.             self.ctx.options.use_local_time = local_time != "n"
  3121.  
  3122.             compress_output = input("Compress output to zip? (y/n) [default: n]: ").strip().lower()
  3123.             self.ctx.options.compress_output = compress_output == "y"
  3124.  
  3125.             # Export conversations
  3126.             print("\nStarting export process...")
  3127.  
  3128.             export_manager = ExportManager(self.ctx)
  3129.             exported_files = await export_manager.export_conversations(
  3130.                 skype_export, selected_conversations
  3131.             )
  3132.  
  3133.             # Display summary
  3134.             total_conversations = sum(len(files) for files in exported_files.values())
  3135.             output_path = self.ctx.options.output_dir
  3136.  
  3137.             print("\n" + "=" * 60)
  3138.             print("            Export Summary")
  3139.             print("-" * 60)
  3140.             print(f"Total conversations: {len(valid_conversations)}")
  3141.             print(f"Exported conversations: {total_conversations}")
  3142.  
  3143.             for format_name, files in exported_files.items():
  3144.                 if files:
  3145.                     print(f"{format_name} files: {len(files)}")
  3146.  
  3147.             print(f"Output directory: {output_path}")
  3148.  
  3149.             if self.ctx.errors:
  3150.                 print(f"\nErrors: {len(self.ctx.errors)}")
  3151.                 for i, error in enumerate(self.ctx.errors, 1):
  3152.                     print(f"  {i}. {error['type']} - {error['error']}")
  3153.  
  3154.             print("\nExport completed successfully!")
  3155.             print(f"Files saved to: {output_path}")
  3156.  
  3157.             return 0
  3158.  
  3159.         except Exception as e:
  3160.             print(f"\nError: {e}")
  3161.             return 1
  3162.  
  3163.     async def _select_conversations(self, conversations: List[SkypeConversation]) -> List[SkypeConversation]:
  3164.         """
  3165.        Allow user to select conversations in basic mode.
  3166.  
  3167.        Args:
  3168.            conversations: List of valid conversations
  3169.  
  3170.        Returns:
  3171.            List of selected conversations
  3172.        """
  3173.         print("\nAvailable conversations:")
  3174.         for i, conv in enumerate(conversations, 1):
  3175.             message_count = conv.message_count
  3176.             first_date = conv.first_timestamp.strftime("%Y-%m-%d") if conv.first_timestamp else "N/A"
  3177.             print(f"{i:3}. {conv.display_name} ({message_count} messages, since {first_date})")
  3178.  
  3179.         print("\nEnter conversation numbers to export, separated by spaces.")
  3180.         print("For example: '1 3 5' will export the first, third, and fifth conversations.")
  3181.         print("Enter 'all' to export all conversations.")
  3182.  
  3183.         selection = input("\nSelection: ").strip()
  3184.  
  3185.         if selection.lower() == 'all':
  3186.             return conversations
  3187.  
  3188.         try:
  3189.             indices = [int(idx.strip()) for idx in selection.split() if idx.strip()]
  3190.  
  3191.             # Validate indices
  3192.             valid_indices = [idx for idx in indices if 1 <= idx <= len(conversations)]
  3193.  
  3194.             if not valid_indices:
  3195.                 print("No valid selection made. Please try again.")
  3196.                 return await self._select_conversations(conversations)
  3197.  
  3198.             # Get selected conversations
  3199.             selected = [conversations[idx-1] for idx in valid_indices]
  3200.  
  3201.             # Confirm selection
  3202.             print(f"\nYou selected {len(selected)} conversations:")
  3203.             for conv in selected:
  3204.                 print(f"- {conv.display_name}")
  3205.  
  3206.             confirm = input("\nConfirm selection? (y/n) [default: y]: ").strip().lower()
  3207.             if confirm == "n":
  3208.                 return await self._select_conversations(conversations)
  3209.  
  3210.             return selected
  3211.  
  3212.         except ValueError:
  3213.             print("Invalid selection format. Please enter numbers separated by spaces.")
  3214.             return await self._select_conversations(conversations)
  3215.  
  3216. # ═════════════════════════════════════════════════════════════════════════════
  3217. # ═══════════════════════════ USER INTERFACE ═════════════════════════════════
  3218. # ═════════════════════════════════════════════════════════════════════════════
  3219.  
  3220. class ConversationSelector:
  3221.     """Interactive conversation selector with rich UI if available."""
  3222.  
  3223.     def __init__(self, ctx: AppContext):
  3224.         """
  3225.        Initialize the selector.
  3226.  
  3227.        Args:
  3228.            ctx: Application context
  3229.        """
  3230.         self.ctx = ctx
  3231.         self.logger = get_logger('conversation_selector', ctx)
  3232.  
  3233.     async def select_conversations(self, skype_export: SkypeExport) -> List[SkypeConversation]:
  3234.         """
  3235.        Allow user to select conversations to export.
  3236.  
  3237.        Args:
  3238.            skype_export: Complete Skype export data
  3239.  
  3240.        Returns:
  3241.            List of selected conversations
  3242.        """
  3243.         conversations = list(skype_export.conversations.values())
  3244.  
  3245.         # Filter out empty conversations
  3246.         valid_conversations = [c for c in conversations if c.messages]
  3247.  
  3248.         if not valid_conversations:
  3249.             self.logger.warning("No conversations with messages found")
  3250.             return []
  3251.  
  3252.         # Sort by display name
  3253.         valid_conversations.sort(key=lambda c: c.display_name.lower())
  3254.  
  3255.         # Use rich UI if available
  3256.         if RICH_AVAILABLE:
  3257.             return await self._rich_select_conversations(valid_conversations)
  3258.         else:
  3259.             return await self._text_select_conversations(valid_conversations)
  3260.  
  3261.     async def _rich_select_conversations(self, conversations: List[SkypeConversation]) -> List[SkypeConversation]:
  3262.         """
  3263.        Select conversations using rich UI.
  3264.  
  3265.        Args:
  3266.            conversations: Available conversations
  3267.  
  3268.        Returns:
  3269.            List of selected conversations
  3270.        """
  3271.         # Create table of conversations
  3272.         table = Table(title="Available Conversations")
  3273.         table.add_column("#", justify="right")
  3274.         table.add_column("Name", style="cyan")
  3275.         table.add_column("Messages", justify="right")
  3276.         table.add_column("First Message", justify="right")
  3277.         table.add_column("Last Message", justify="right")
  3278.  
  3279.         # Add rows
  3280.         for i, conv in enumerate(conversations, 1):
  3281.             table.add_row(
  3282.                 str(i),
  3283.                 conv.display_name,
  3284.                 str(conv.message_count),
  3285.                 conv.first_timestamp.strftime("%Y-%m-%d") if conv.first_timestamp else "N/A",
  3286.                 conv.last_timestamp.strftime("%Y-%m-%d") if conv.last_timestamp else "N/A"
  3287.             )
  3288.  
  3289.         # Display table
  3290.         self.ctx.console.print(table)
  3291.         self.ctx.console.print("\nEnter the numbers of conversations to export, separated by spaces.")
  3292.         self.ctx.console.print("Enter 'all' to export all conversations.")
  3293.  
  3294.         # Get user selection
  3295.         selection = await self._get_user_input("\nSelection: ")
  3296.  
  3297.         if selection.lower() == 'all':
  3298.             return conversations
  3299.  
  3300.         # Parse selection
  3301.         try:
  3302.             indices = [int(idx.strip()) for idx in selection.split() if idx.strip()]
  3303.  
  3304.             # Validate indices
  3305.             valid_indices = [idx for idx in indices if 1 <= idx <= len(conversations)]
  3306.             if not valid_indices:
  3307.                 self.ctx.console.print("[bold red]No valid selection made[/bold red]")
  3308.                 return []
  3309.  
  3310.             # Get selected conversations
  3311.             selected = [conversations[idx-1] for idx in valid_indices]
  3312.  
  3313.             # Confirm selection
  3314.             self.ctx.console.print(f"\nYou selected [cyan]{len(selected)}[/cyan] conversations:")
  3315.             for conv in selected[:5]:
  3316.                 self.ctx.console.print(f"- {conv.display_name}")
  3317.  
  3318.             if len(selected) > 5:
  3319.                 self.ctx.console.print(f"- ... and {len(selected) - 5} more")
  3320.  
  3321.             confirm = Confirm.ask("Confirm this selection?", default=True)
  3322.             if not confirm:
  3323.                 return await self._rich_select_conversations(conversations)
  3324.  
  3325.             return selected
  3326.  
  3327.         except ValueError:
  3328.             self.ctx.console.print("[bold red]Invalid selection format[/bold red]")
  3329.             return []
  3330.  
  3331.     async def _text_select_conversations(self, conversations: List[SkypeConversation]) -> List[SkypeConversation]:
  3332.         """
  3333.        Select conversations using text UI.
  3334.  
  3335.        Args:
  3336.            conversations: Available conversations
  3337.  
  3338.        Returns:
  3339.            List of selected conversations
  3340.        """
  3341.         print("\nYou have conversations with the following:")
  3342.         print("--------------------------------------------")
  3343.  
  3344.         for i, conv in enumerate(conversations, 1):
  3345.             first_date = "N/A"
  3346.             if conv.first_timestamp:
  3347.                 first_date = conv.first_timestamp.strftime("%Y-%m-%d")
  3348.  
  3349.             print(f"{i:3} -> {conv.display_name} ({conv.message_count} messages, since {first_date})")
  3350.  
  3351.         print("\nEnter the numbers of conversations to export, separated by spaces.")
  3352.         print("Enter 'all' to export all conversations.")
  3353.  
  3354.         # Get user selection
  3355.         selection = await self._get_user_input("\nSelection: ")
  3356.  
  3357.         if selection.lower() == 'all':
  3358.             return conversations
  3359.  
  3360.         # Parse selection
  3361.         try:
  3362.             indices = [int(idx.strip()) for idx in selection.split() if idx.strip()]
  3363.  
  3364.             # Validate indices
  3365.             valid_indices = [idx for idx in indices if 1 <= idx <= len(conversations)]
  3366.             if not valid_indices:
  3367.                 print("No valid selection made")
  3368.                 return []
  3369.  
  3370.             # Get selected conversations
  3371.             selected = [conversations[idx-1] for idx in valid_indices]
  3372.  
  3373.             # Confirm selection
  3374.             print(f"\nYou selected {len(selected)} conversations:")
  3375.             for conv in selected[:5]:
  3376.                 print(f"- {conv.display_name}")
  3377.  
  3378.             if len(selected) > 5:
  3379.                 print(f"- ... and {len(selected) - 5} more")
  3380.  
  3381.             confirm = input("\nConfirm this selection? (y/n) [default: y]: ").strip().lower()
  3382.             if confirm == "n":
  3383.                 return await self._text_select_conversations(conversations)
  3384.  
  3385.             return selected
  3386.  
  3387.         except ValueError:
  3388.             print("Invalid selection format")
  3389.             return []
  3390.  
  3391.     async def _get_user_input(self, prompt: str) -> str:
  3392.         """
  3393.        Get user input asynchronously.
  3394.  
  3395.        Args:
  3396.            prompt: Prompt text
  3397.  
  3398.        Returns:
  3399.            User input string
  3400.        """
  3401.         loop = asyncio.get_event_loop()
  3402.         return await loop.run_in_executor(None, input, prompt)
  3403.  
  3404. class UserInterface:
  3405.     """Main user interface handling interaction and display."""
  3406.  
  3407.     def __init__(self, ctx: AppContext):
  3408.         """
  3409.        Initialize the UI.
  3410.  
  3411.        Args:
  3412.            ctx: Application context
  3413.        """
  3414.         self.ctx = ctx
  3415.         self.logger = get_logger('ui', ctx)
  3416.  
  3417.     async def get_user_display_name(self) -> str:
  3418.         """
  3419.        Get display name from user with enhanced validation.
  3420.  
  3421.        Returns:
  3422.            User display name
  3423.        """
  3424.         # Use rich UI if available
  3425.         if RICH_AVAILABLE:
  3426.             self.ctx.console.print("\n[bold cyan]Please enter your display name for the logs:[/bold cyan]")
  3427.             display_name = await self._get_user_input("")
  3428.         else:
  3429.             display_name = await self._get_user_input("\nIn the logs, your name should be displayed as: ")
  3430.  
  3431.         # Validate input
  3432.         while not display_name.strip():
  3433.             if RICH_AVAILABLE:
  3434.                 self.ctx.console.print("[bold red]Display name cannot be empty![/bold red]")
  3435.                 display_name = await self._get_user_input("Please enter how you want your name to be displayed: ")
  3436.             else:
  3437.                 display_name = await self._get_user_input("\nPlease enter how you want your name to be displayed: ")
  3438.  
  3439.         # Additional validation for unusually long names
  3440.         if len(display_name) > 50:
  3441.             warning = "Your display name is unusually long. Are you sure you want to use this name?"
  3442.  
  3443.             if RICH_AVAILABLE:
  3444.                 self.ctx.console.print(f"[bold yellow]{warning}[/bold yellow]")
  3445.                 confirm = Confirm.ask("Continue with this name?", default=True)
  3446.                 if not confirm:
  3447.                     return await self.get_user_display_name()
  3448.             else:
  3449.                 print(f"\nWarning: {warning}")
  3450.                 confirm = input("Continue with this name? (y/n) [default: y]: ").strip().lower()
  3451.                 if confirm == "n":
  3452.                     return await self.get_user_display_name()
  3453.  
  3454.         return display_name
  3455.  
  3456.     def display_welcome(self) -> None:
  3457.         """Display welcome message with app info."""
  3458.         if RICH_AVAILABLE:
  3459.             # Create fancy header
  3460.             self.ctx.console.print("\n[bold blue]╔═══════════════════════════════════════════════════════════╗[/bold blue]")
  3461.             self.ctx.console.print("[bold blue]║[/bold blue]                [bold cyan]SkypeExporter v2.0.0[/bold cyan]                [bold blue]║[/bold blue]")
  3462.             self.ctx.console.print("[bold blue]║[/bold blue]           [italic]Enterprise-Grade Skype Chat Parser[/italic]           [bold blue]║[/bold blue]")
  3463.             self.ctx.console.print("[bold blue]╚═══════════════════════════════════════════════════════════╝[/bold blue]\n")
  3464.  
  3465.             # Show system info
  3466.             self.ctx.console.print("[bold]System Information:[/bold]")
  3467.             self.ctx.console.print(f"  Python: {platform.python_version()}")
  3468.             self.ctx.console.print(f"  Platform: {platform.system()} {platform.release()}")
  3469.  
  3470.             # Show memory info if available
  3471.             if self.ctx.memory_monitor:
  3472.                 mem_usage = self.ctx.memory_monitor.get_memory_usage_mb()
  3473.                 mem_percent = self.ctx.memory_monitor.get_memory_percent()
  3474.                 sys_memory = self.ctx.memory_monitor.get_system_memory_mb()
  3475.  
  3476.                 self.ctx.console.print(f"  Memory: {mem_usage:.1f} MB / {sys_memory:.1f} MB ({mem_percent:.1f}%)")
  3477.  
  3478.             # Show dependency status
  3479.             self.ctx.console.print("\n[bold]Dependency Status:[/bold]")
  3480.             dep_status = check_dependencies()
  3481.             for pkg, status in dep_status.items():
  3482.                 color = "green" if status else "red"
  3483.                 symbol = "✓" if status else "✗"
  3484.                 self.ctx.console.print(f"  [{color}]{symbol}[/{color}] {pkg}")
  3485.  
  3486.             # Show mode info
  3487.             mode = "[bold cyan]Basic Mode[/bold cyan]" if self.ctx.options.basic_mode else "[bold green]Advanced Mode[/bold green]"
  3488.             self.ctx.console.print(f"\nRunning in {mode}")
  3489.  
  3490.             self.ctx.console.print("\n[italic]Starting export process...[/italic]\n")
  3491.  
  3492.         else:
  3493.             # Simple text header
  3494.             print("\n" + "=" * 60)
  3495.             print("              SkypeExporter v2.0.0")
  3496.             print("      Enterprise-Grade Skype Chat Parser")
  3497.             print("=" * 60 + "\n")
  3498.  
  3499.             # Show system info
  3500.             print(f"Python: {platform.python_version()}")
  3501.             print(f"Platform: {platform.system()} {platform.release()}")
  3502.  
  3503.             # Show memory info if available
  3504.             if self.ctx.memory_monitor:
  3505.                 mem_usage = self.ctx.memory_monitor.get_memory_usage_mb()
  3506.                 mem_percent = self.ctx.memory_monitor.get_memory_percent()
  3507.                 sys_memory = self.ctx.memory_monitor.get_system_memory_mb()
  3508.  
  3509.                 print(f"Memory: {mem_usage:.1f} MB / {sys_memory:.1f} MB ({mem_percent:.1f}%)")
  3510.  
  3511.             # Show dependency status
  3512.             print("\nDependency Status:")
  3513.             dep_status = check_dependencies()
  3514.             for pkg, status in dep_status.items():
  3515.                 symbol = "✓" if status else "✗"
  3516.                 print(f"  {symbol} {pkg}")
  3517.  
  3518.             # Show mode info
  3519.             mode = "Basic Mode" if self.ctx.options.basic_mode else "Advanced Mode"
  3520.             print(f"\nRunning in {mode}")
  3521.  
  3522.             print("\nStarting export process...\n")
  3523.  
  3524.     def display_summary(self, skype_export: SkypeExport, exported_files: Dict[str, List[Path]]) -> None:
  3525.         """
  3526.        Display export summary.
  3527.  
  3528.        Args:
  3529.            skype_export: Complete Skype export data
  3530.            exported_files: Dictionary of exported files by format
  3531.        """
  3532.         total_conversations = sum(len(files) for files in exported_files.values())
  3533.         elapsed_time = time.time() - self.ctx.start_time
  3534.         output_dir = self.ctx.options.output_dir
  3535.  
  3536.         if RICH_AVAILABLE and not self.ctx.options.basic_mode:
  3537.             # Create summary panel
  3538.             summary = Table(title="Export Summary", show_header=False, box=None)
  3539.             summary.add_column("", style="bold cyan")
  3540.             summary.add_column("")
  3541.  
  3542.             summary.add_row("Total conversations:", str(skype_export.total_conversations))
  3543.             summary.add_row("Total messages:", str(skype_export.total_messages))
  3544.             summary.add_row("Exported conversations:", str(total_conversations))
  3545.  
  3546.             # Add export formats
  3547.             for format_name, files in exported_files.items():
  3548.                 if files:
  3549.                     summary.add_row(f"{format_name} files:", str(len(files)))
  3550.  
  3551.             summary.add_row("Output directory:", str(output_dir))
  3552.             summary.add_row("Processing time:", f"{elapsed_time:.2f} seconds")
  3553.  
  3554.             # Add memory usage if available
  3555.             memory_report = self.ctx.get_memory_report()
  3556.             if memory_report:
  3557.                 peak_mb = memory_report.get("peak_usage_mb", 0)
  3558.                 summary.add_row("Peak memory usage:", f"{peak_mb:.2f} MB")
  3559.  
  3560.             if self.ctx.errors:
  3561.                 summary.add_row("Errors:", f"[bold red]{len(self.ctx.errors)}[/bold red]")
  3562.  
  3563.             # Display summary in panel
  3564.             panel = Panel(summary, title="SkypeExporter Completed", border_style="green")
  3565.             self.ctx.console.print(panel)
  3566.  
  3567.             # Show errors if any
  3568.             if self.ctx.errors:
  3569.                 self.ctx.console.print("\n[bold red]Errors encountered:[/bold red]")
  3570.                 for i, error in enumerate(self.ctx.errors, 1):
  3571.                     self.ctx.console.print(f"  {i}. {error['type']} - {error['error']}")
  3572.  
  3573.             self.ctx.console.print("\n[bold green]Export completed successfully![/bold green]")
  3574.             self.ctx.console.print(f"Files saved to: [cyan]{output_dir}[/cyan]")
  3575.  
  3576.         else:
  3577.             # Simple text summary
  3578.             print("\n" + "=" * 60)
  3579.             print("                Export Summary")
  3580.             print("-" * 60)
  3581.             print(f"Total conversations: {skype_export.total_conversations}")
  3582.             print(f"Total messages: {skype_export.total_messages}")
  3583.             print(f"Exported conversations: {total_conversations}")
  3584.  
  3585.             # Add export formats
  3586.             for format_name, files in exported_files.items():
  3587.                 if files:
  3588.                     print(f"{format_name} files: {len(files)}")
  3589.  
  3590.             print(f"Output directory: {output_dir}")
  3591.             print(f"Processing time: {elapsed_time:.2f} seconds")
  3592.  
  3593.             # Add memory usage if available
  3594.             memory_report = self.ctx.get_memory_report()
  3595.             if memory_report:
  3596.                 peak_mb = memory_report.get("peak_usage_mb", 0)
  3597.                 print(f"Peak memory usage: {peak_mb:.2f} MB")
  3598.  
  3599.             if self.ctx.errors:
  3600.                 print(f"Errors: {len(self.ctx.errors)}")
  3601.  
  3602.             print("=" * 60)
  3603.  
  3604.             # Show errors if any
  3605.             if self.ctx.errors:
  3606.                 print("\nErrors encountered:")
  3607.                 for i, error in enumerate(self.ctx.errors, 1):
  3608.                     print(f"  {i}. {error['type']} - {error['error']}")
  3609.  
  3610.             print("\nExport completed successfully!")
  3611.             print(f"Files saved to: {output_dir}")
  3612.  
  3613.     async def _get_user_input(self, prompt: str) -> str:
  3614.         """
  3615.        Get user input asynchronously.
  3616.  
  3617.        Args:
  3618.            prompt: Prompt text
  3619.  
  3620.        Returns:
  3621.            User input string
  3622.        """
  3623.         loop = asyncio.get_event_loop()
  3624.         return await loop.run_in_executor(None, input, prompt)
  3625.  
  3626. # ═════════════════════════════════════════════════════════════════════════════
  3627. # ═══════════════════════════ APPLICATION CORE ═══════════════════════════════
  3628. # ═════════════════════════════════════════════════════════════════════════════
  3629.  
  3630. class SkypeExporterApp:
  3631.     """Main application class orchestrating the export process."""
  3632.  
  3633.     def __init__(self):
  3634.         """Initialize the application."""
  3635.         # Parse command line arguments
  3636.         self.args = self._parse_args()
  3637.  
  3638.         # Create app context
  3639.         self.ctx = AppContext(
  3640.             options=self._create_options(),
  3641.             logger=setup_logging(
  3642.                 LogLevel.DEBUG if self.args.debug else LogLevel.INFO,
  3643.                 log_file=Path(self.args.log_file) if self.args.log_file else None
  3644.             )
  3645.         )
  3646.  
  3647.         # Create UI components
  3648.         self.ui = UserInterface(self.ctx)
  3649.         self.selector = ConversationSelector(self.ctx)
  3650.         self.basic_mode_handler = BasicModeHandler(self.ctx)
  3651.  
  3652.         # Set up signal handlers
  3653.         self._setup_signal_handlers()
  3654.  
  3655.     def _parse_args(self) -> argparse.Namespace:
  3656.         """
  3657.        Parse command line arguments.
  3658.  
  3659.        Returns:
  3660.            Parsed arguments
  3661.        """
  3662.         parser = argparse.ArgumentParser(
  3663.             description="SkypeExporter: Enterprise-Grade Skype Chat Log Exporter",
  3664.             formatter_class=argparse.ArgumentDefaultsHelpFormatter
  3665.         )
  3666.  
  3667.         parser.add_argument('filename',
  3668.                          help='Path to the Skype export file (JSON, TAR, or ZIP)')
  3669.  
  3670.         parser.add_argument('-o', '--output-dir',
  3671.                          help='Directory to save exported files',
  3672.                          default=os.path.join(os.getcwd(), "skype_exports"))
  3673.  
  3674.         parser.add_argument('-f', '--format',
  3675.                          choices=['text', 'html', 'markdown', 'json', 'postgresql', 'all'],
  3676.                          default='text',
  3677.                          help='Output format for exported conversations')
  3678.  
  3679.         parser.add_argument('-c', '--choose',
  3680.                          action='store_true',
  3681.                          help='Choose which conversations to export')
  3682.  
  3683.         parser.add_argument('-p', '--pattern',
  3684.                          help='Filter conversations by name pattern (supports wildcards)')
  3685.  
  3686.         parser.add_argument('--filter',
  3687.                          help='Alternative name for pattern filter')
  3688.  
  3689.         parser.add_argument('-a', '--anonymize',
  3690.                          action='store_true',
  3691.                          help='Anonymize user names in exports')
  3692.  
  3693.         parser.add_argument('-s', '--stats',
  3694.                          action='store_true',
  3695.                          help='Include conversation statistics')
  3696.  
  3697.         parser.add_argument('--no-stats',
  3698.                          action='store_true',
  3699.                          help='Exclude conversation statistics')
  3700.  
  3701.         parser.add_argument('-t', '--timestamps',
  3702.                          action='store_true',
  3703.                          default=True,
  3704.                          help='Include timestamps in exports')
  3705.  
  3706.         parser.add_argument('--no-timestamps',
  3707.                          action='store_true',
  3708.                          help='Exclude timestamps from exports')
  3709.  
  3710.         parser.add_argument('-l', '--local-time',
  3711.                          action='store_true',
  3712.                          help='Use local time instead of UTC')
  3713.  
  3714.         parser.add_argument('--utc',
  3715.                          action='store_true',
  3716.                          help='Use UTC time (default)')
  3717.  
  3718.         parser.add_argument('--no-parallel',
  3719.                          action='store_true',
  3720.                          help='Disable parallel processing')
  3721.  
  3722.         parser.add_argument('--batch-size',
  3723.                          type=int,
  3724.                          help='Batch size for processing messages')
  3725.  
  3726.         parser.add_argument('--max-workers',
  3727.                          type=int,
  3728.                          help='Maximum number of worker threads for parallel processing')
  3729.  
  3730.         parser.add_argument('--compress',
  3731.                          action='store_true',
  3732.                          help='Compress output files into ZIP archive')
  3733.  
  3734.         parser.add_argument('--timezone',
  3735.                          help='Timezone for timestamps (e.g. "America/New_York")')
  3736.  
  3737.         parser.add_argument('--no-pretty',
  3738.                          action='store_true',
  3739.                          help='Disable pretty printing for JSON output')
  3740.  
  3741.         parser.add_argument('--include-metadata',
  3742.                          action='store_true',
  3743.                          help='Include metadata in exports')
  3744.  
  3745.         parser.add_argument('--include-ids',
  3746.                          action='store_true',
  3747.                          help='Include message IDs in exports')
  3748.  
  3749.         parser.add_argument('--include-html',
  3750.                          action='store_true',
  3751.                          help='Include HTML in exports')
  3752.  
  3753.         parser.add_argument('--media-links',
  3754.                          action='store_true',
  3755.                          help='Include media links in exports')
  3756.  
  3757.         parser.add_argument('--date-from',
  3758.                          help='Start date for filtering messages (YYYY-MM-DD format)')
  3759.  
  3760.         parser.add_argument('--date-to',
  3761.                          help='End date for filtering messages (YYYY-MM-DD format)')
  3762.  
  3763.         parser.add_argument('--debug',
  3764.                          action='store_true',
  3765.                          help='Enable debug logging')
  3766.  
  3767.         parser.add_argument('--log-file',
  3768.                          help='Path to log file')
  3769.  
  3770.         parser.add_argument('--basic',
  3771.                          action='store_true',
  3772.                          help='Use basic mode with simplified interaction')
  3773.  
  3774.         parser.add_argument('--memory-profile',
  3775.                          action='store_true',
  3776.                          help='Enable memory profiling')
  3777.  
  3778.         parser.add_argument('--no-memory-optimization',
  3779.                          action='store_true',
  3780.                          help='Disable automatic memory optimization')
  3781.  
  3782.         parser.add_argument('--no-memory-opt',
  3783.                          action='store_true',
  3784.                          help='Alternative name for disabling memory optimization')
  3785.  
  3786.         parser.add_argument('--memory-threshold',
  3787.                          type=int,
  3788.                          help='Memory usage threshold percentage for optimization (1-99)')
  3789.  
  3790.         # PostgreSQL options
  3791.         db_group = parser.add_argument_group('PostgreSQL Database Options')
  3792.         db_group.add_argument('--db-host',
  3793.                             help='Database host (for PostgreSQL export)',
  3794.                             default='localhost')
  3795.         db_group.add_argument('--db-port',
  3796.                             type=int,
  3797.                             help='Database port (for PostgreSQL export)',
  3798.                             default=5432)
  3799.         db_group.add_argument('--db-name',
  3800.                             help='Database name (for PostgreSQL export)',
  3801.                             default='skype_export')
  3802.         db_group.add_argument('--db-user',
  3803.                             help='Database username (for PostgreSQL export)',
  3804.                             default='postgres')
  3805.         db_group.add_argument('--db-password',
  3806.                             help='Database password (for PostgreSQL export)',
  3807.                             default='')
  3808.         db_group.add_argument('--db-engine',
  3809.                             help='Database engine (for PostgreSQL export)',
  3810.                             default='postgresql')
  3811.         db_group.add_argument('--db-schema',
  3812.                             help='Database schema (for PostgreSQL export)',
  3813.                             default='public')
  3814.         db_group.add_argument('--db-echo',
  3815.                             action='store_true',
  3816.                             help='Echo SQL queries (for debugging)')
  3817.  
  3818.         parser.add_argument('--version',
  3819.                          action='version',
  3820.                          version='SkypeExporter 2.0.0')
  3821.  
  3822.         return parser.parse_args()
  3823.  
  3824.     def _create_options(self) -> ExportOptions:
  3825.         """
  3826.        Create export options from command line arguments.
  3827.  
  3828.        Returns:
  3829.            Configured ExportOptions object
  3830.        """
  3831.         args = self.args
  3832.  
  3833.         # Validate numeric inputs
  3834.         try:
  3835.             if args.batch_size is not None:
  3836.                 args.batch_size = int(args.batch_size)
  3837.                 if args.batch_size <= 0:
  3838.                     raise ConfigError("Batch size must be a positive integer")
  3839.  
  3840.             if args.max_workers is not None:
  3841.                 args.max_workers = int(args.max_workers)
  3842.                 if args.max_workers < 1:
  3843.                     raise ConfigError("Max workers must be at least 1")
  3844.  
  3845.             if args.memory_threshold is not None:
  3846.                 args.memory_threshold = int(args.memory_threshold)
  3847.                 if not (1 <= args.memory_threshold <= 99):
  3848.                     raise ConfigError("Memory threshold must be between 1 and 99 percent")
  3849.         except ValueError:
  3850.             raise ConfigError("Numeric parameters must be valid integers")
  3851.  
  3852.         # Create output directory
  3853.         output_dir = Path(args.output_dir if args.output_dir else DEFAULT_OUTPUT_DIR)
  3854.  
  3855.         # Validate output directory
  3856.         if not output_dir.parent.exists():
  3857.             raise ConfigError(f"Parent directory does not exist: {output_dir.parent}")
  3858.  
  3859.         # Create database configuration if needed
  3860.         if args.format == 'postgresql' or args.format == 'all':
  3861.             db_config = DatabaseConfig(
  3862.                 engine=args.db_engine,
  3863.                 host=args.db_host,
  3864.                 port=int(args.db_port),
  3865.                 database=args.db_name,
  3866.                 username=args.db_user,
  3867.                 password=args.db_password,
  3868.                 schema=args.db_schema,
  3869.                 echo_sql=args.db_echo
  3870.             )
  3871.         else:
  3872.             db_config = DatabaseConfig()
  3873.  
  3874.         # Handle date range if specified
  3875.         date_range = None
  3876.         if args.date_from and args.date_to:
  3877.             try:
  3878.                 date_from = datetime.datetime.strptime(args.date_from, '%Y-%m-%d').date()
  3879.                 date_to = datetime.datetime.strptime(args.date_to, '%Y-%m-%d').date()
  3880.                 date_range = (date_from, date_to)
  3881.             except ValueError:
  3882.                 raise ConfigError("Date range must be in YYYY-MM-DD format")
  3883.  
  3884.         # Determine output format
  3885.         format_str = args.format.lower() if args.format else 'text'
  3886.         try:
  3887.             output_format = {
  3888.                 'text': OutputFormat.TEXT,
  3889.                 'html': OutputFormat.HTML,
  3890.                 'markdown': OutputFormat.MARKDOWN,
  3891.                 'json': OutputFormat.JSON,
  3892.                 'postgresql': OutputFormat.POSTGRESQL,
  3893.                 'all': OutputFormat.ALL
  3894.             }[format_str]
  3895.         except KeyError:
  3896.             raise ConfigError(f"Invalid output format: {format_str}")
  3897.  
  3898.         # Build options object
  3899.         options = ExportOptions(
  3900.             output_dir=output_dir,
  3901.             format=output_format,
  3902.             anonymize=args.anonymize,
  3903.             include_timestamps=not args.no_timestamps,
  3904.             use_local_time=not args.utc,
  3905.             include_metadata=args.include_metadata,
  3906.             include_message_ids=args.include_ids,
  3907.             parallel=not args.no_parallel,
  3908.             max_workers=args.max_workers or max(1, os.cpu_count() or 4),
  3909.             batch_size=args.batch_size or 1000,
  3910.             timezone=args.timezone,
  3911.             pretty_print=not args.no_pretty,
  3912.             compress_output=args.compress,
  3913.             filter_pattern=args.filter,
  3914.             date_range=date_range,
  3915.             include_conversation_stats=not args.no_stats,
  3916.             media_links=args.media_links,
  3917.             strip_html=not args.include_html,
  3918.             debug_mode=args.debug,
  3919.             basic_mode=args.basic,
  3920.             enable_memory_optimization=not args.no_memory_opt,
  3921.             memory_profile=args.memory_profile,
  3922.             memory_threshold_percent=args.memory_threshold or 75,
  3923.             database_config=db_config
  3924.         )
  3925.  
  3926.         return options
  3927.  
  3928.     def _setup_signal_handlers(self) -> None:
  3929.         """Set up handlers for system signals."""
  3930.         # Handle SIGINT (Ctrl+C)
  3931.         if hasattr(signal, 'SIGINT'):
  3932.             signal.signal(signal.SIGINT, self._signal_handler)
  3933.  
  3934.         # Handle SIGTERM
  3935.         if hasattr(signal, 'SIGTERM'):
  3936.             signal.signal(signal.SIGTERM, self._signal_handler)
  3937.  
  3938.     def _signal_handler(self, sig, frame) -> None:
  3939.         """
  3940.        Handle system signals to allow graceful shutdown.
  3941.  
  3942.        Args:
  3943.            sig: Signal number
  3944.            frame: Current stack frame
  3945.        """
  3946.         self.ctx.logger.info(f"Received signal {sig}, shutting down gracefully...")
  3947.         self.ctx.cancel_requested = True
  3948.  
  3949.     async def run(self) -> int:
  3950.         """
  3951.        Run the application.
  3952.  
  3953.        Returns:
  3954.            Exit code (0 for success, non-zero for error)
  3955.        """
  3956.         try:
  3957.             # Run in basic mode if requested
  3958.             if self.ctx.options.basic_mode:
  3959.                 input_path = Path(self.args.filename)
  3960.                 if not input_path.exists():
  3961.                     print(f"Error: Input file not found: {input_path}")
  3962.                     return 1
  3963.  
  3964.                 return await self.basic_mode_handler.run(input_path)
  3965.  
  3966.             # Standard advanced mode
  3967.             # Display welcome message
  3968.             self.ui.display_welcome()
  3969.  
  3970.             # Check dependencies
  3971.             dependency_status = check_dependencies()
  3972.             missing_deps = [pkg for pkg, status in dependency_status.items() if not status]
  3973.  
  3974.             if missing_deps:
  3975.                 self.ctx.logger.warning(f"Missing dependencies: {', '.join(missing_deps)}")
  3976.  
  3977.                 # Try to install missing dependencies
  3978.                 if self.ctx.options.format != OutputFormat.TEXT:
  3979.                     # Check if required deps for the selected format are missing
  3980.                     format_deps = {
  3981.                         OutputFormat.HTML: ['jinja2'],
  3982.                         OutputFormat.MARKDOWN: ['markdown'],
  3983.                         OutputFormat.POSTGRESQL: ['sqlalchemy', 'psycopg2-binary']
  3984.                     }
  3985.  
  3986.                     required_for_format = format_deps.get(self.ctx.options.format, [])
  3987.                     missing_required = [d for d in required_for_format if d in missing_deps]
  3988.  
  3989.                     if missing_required:
  3990.                         self.ctx.logger.info("Attempting to install missing dependencies required for "
  3991.                                          f"{self.ctx.options.format.name} format...")
  3992.                         install_dependencies()
  3993.  
  3994.             # Get user display name
  3995.             self.ctx.user_display_name = await self.ui.get_user_display_name()
  3996.  
  3997.             # Process input file
  3998.             input_path = Path(self.args.filename)
  3999.             if not input_path.exists():
  4000.                 self.ctx.logger.error(f"Input file not found: {input_path}")
  4001.                 return 1
  4002.  
  4003.             # Create appropriate reader and read file
  4004.             reader = FileReader.create_reader(input_path)
  4005.             raw_data = await reader.read(input_path, self.ctx)
  4006.  
  4007.             # Parse the export data
  4008.             parser = SkypeExportParser(self.ctx)
  4009.             skype_export = await parser.parse(raw_data)
  4010.  
  4011.             # Select conversations to export
  4012.             selected_conversations = None
  4013.             if self.args.choose:
  4014.                 selected_conversations = await self.selector.select_conversations(skype_export)
  4015.  
  4016.                 if not selected_conversations:
  4017.                     self.ctx.logger.warning("No conversations selected, nothing to export")
  4018.                     return 0
  4019.  
  4020.             # Export selected conversations
  4021.             export_manager = ExportManager(self.ctx)
  4022.             exported_files = await export_manager.export_conversations(
  4023.                 skype_export, selected_conversations
  4024.             )
  4025.  
  4026.             # Display summary
  4027.             self.ui.display_summary(skype_export, exported_files)
  4028.  
  4029.             return 0
  4030.  
  4031.         except Exception as e:
  4032.             if self.ctx.options.basic_mode:
  4033.                 print(f"Error: {e}")
  4034.             else:
  4035.                 self.ctx.logger.error(f"Error: {e}")
  4036.  
  4037.             if self.ctx.options.debug_mode:
  4038.                 if RICH_AVAILABLE:
  4039.                     self.ctx.console.print_exception()
  4040.                 else:
  4041.                     self.ctx.logger.error(traceback.format_exc())
  4042.             return 1
  4043.  
  4044. def main() -> int:
  4045.     """
  4046.    Main entry point for the application.
  4047.  
  4048.    Returns:
  4049.        Exit code
  4050.    """
  4051.     app = SkypeExporterApp()
  4052.  
  4053.     # Get the event loop
  4054.     try:
  4055.         loop = asyncio.get_event_loop()
  4056.     except RuntimeError:
  4057.         # Create new event loop if none exists
  4058.         loop = asyncio.new_event_loop()
  4059.         asyncio.set_event_loop(loop)
  4060.  
  4061.     # Run the application
  4062.     try:
  4063.         return loop.run_until_complete(app.run())
  4064.     except KeyboardInterrupt:
  4065.         print("\nOperation cancelled by user")
  4066.         return 130  # Standard exit code for SIGINT
  4067.     finally:
  4068.         # Clean up
  4069.         loop.close()
  4070.  
  4071. if __name__ == "__main__":
  4072.     sys.exit(main())
Tags: Skype
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement