#!/usr/bin/env python3
"""
Analyze field relationships in Elasticsearch backup file.
Checks if two fields have a 1:1 mapping relationship.
"""

import gzip
import json
import sys
from collections import defaultdict

def analyze_relationship(backup_file: str, field1: str, field2: str):
    """
    Analyze the relationship between two fields.
    """
    # Track mappings in both directions
    field1_to_field2 = defaultdict(set)  # field1 value -> set of field2 values
    field2_to_field1 = defaultdict(set)  # field2 value -> set of field1 values
    
    total_docs = 0
    docs_with_both = 0
    
    print(f"📋 Analyzing relationship: {field1} ↔ {field2}")
    print(f"   File: {backup_file}\n")
    
    with gzip.open(backup_file, 'rt', encoding='utf-8') as f:
        for line in f:
            record = json.loads(line.strip())
            
            # Skip header
            if record.get('type') == 'header':
                continue
            
            total_docs += 1
            source = record.get('_source', {})
            
            val1 = source.get(field1)
            val2 = source.get(field2)
            
            # Handle lists/arrays
            if isinstance(val1, list):
                val1 = tuple(val1) if val1 else None
            if isinstance(val2, list):
                val2 = tuple(val2) if val2 else None
            
            if val1 is not None and val2 is not None:
                docs_with_both += 1
                field1_to_field2[val1].add(val2)
                field2_to_field1[val2].add(val1)
            
            if total_docs % 1000 == 0:
                print(f"   Processed {total_docs:,} documents...", end='\r')
    
    print(f"\n\n📊 Results:")
    print(f"   Total documents: {total_docs:,}")
    print(f"   Documents with both fields: {docs_with_both:,}")
    print(f"   Unique {field1} values: {len(field1_to_field2)}")
    print(f"   Unique {field2} values: {len(field2_to_field1)}")
    
    # Check 1:1 relationship
    is_1_to_1 = True
    violations_1_to_2 = []
    violations_2_to_1 = []
    
    for val1, val2_set in field1_to_field2.items():
        if len(val2_set) > 1:
            is_1_to_1 = False
            violations_1_to_2.append((val1, val2_set))
    
    for val2, val1_set in field2_to_field1.items():
        if len(val1_set) > 1:
            is_1_to_1 = False
            violations_2_to_1.append((val2, val1_set))
    
    print(f"\n🔍 Relationship Analysis:")
    
    if is_1_to_1:
        print(f"   ✅ 1:1 relationship confirmed!")
        print(f"   Each {field1} maps to exactly one {field2} and vice versa.")
    else:
        print(f"   ❌ NOT a 1:1 relationship")
        
        if violations_1_to_2:
            print(f"\n   {field1} → {field2} violations ({len(violations_1_to_2)}):")
            for val1, val2_set in violations_1_to_2[:5]:
                print(f"      '{val1}' → {val2_set}")
            if len(violations_1_to_2) > 5:
                print(f"      ... and {len(violations_1_to_2) - 5} more")
        
        if violations_2_to_1:
            print(f"\n   {field2} → {field1} violations ({len(violations_2_to_1)}):")
            for val2, val1_set in violations_2_to_1[:5]:
                print(f"      '{val2}' → {val1_set}")
            if len(violations_2_to_1) > 5:
                print(f"      ... and {len(violations_2_to_1) - 5} more")
    
    # Print the mapping table
    print(f"\n📋 Complete Mapping Table ({field2} → {field1}):")
    print("-" * 60)
    
    for val2 in sorted(field2_to_field1.keys(), key=lambda x: str(x)):
        val1_set = field2_to_field1[val2]
        for val1 in val1_set:
            print(f"   {val2}: {val1}")
    
    return {
        'is_1_to_1': is_1_to_1,
        'unique_field1': len(field1_to_field2),
        'unique_field2': len(field2_to_field1),
        'mapping': dict(field2_to_field1),
    }


if __name__ == '__main__':
    if len(sys.argv) < 4:
        print("Usage: python analyze_relationship.py <backup_file> <field1> <field2>")
        print("Example: python analyze_relationship.py backup.json.gz arabic_category_name category_id")
        sys.exit(1)
    
    backup_file = sys.argv[1]
    field1 = sys.argv[2]
    field2 = sys.argv[3]
    
    analyze_relationship(backup_file, field1, field2)
