#datasource_controller.py
from fastapi import Depends, UploadFile, HTTPException
from sqlalchemy.orm import Session
from sqlalchemy import func, case
import pandas as pd
from src.apps.datasource.models import Datasource
from src.apps.auth.controller import get_current_user
from src.apps.datasource.services import process_csv_data, create_datasource,save_csv_file
from src.apps.stores.models import Store, Branch  
from fastapi import HTTPException, Depends
from sqlalchemy.orm import Session
from src.utils.db import get_db
from src.apps.auth.controller import get_current_user
from src.apps.datasource.schemas import ExternalDatasourceCreate, InternalDatasourceCreate
from src.apps.datasource.services import create_external_data_source
import requests
import os
import json
from urllib.parse import urlparse
from dotenv import load_dotenv
import pusher
from src.utils.settings import settings
# Load environment variables
load_dotenv()

CSV_WEBHOOK_URL = os.getenv("CSV_WEBHOOK_URL")

# Fetch environment variables
app_id = settings.app_id
key = settings.key
secret = settings.secret
cluster = settings.cluster
batch_size = settings.batch_size

# 1. Configure your Pusher credentials 
pusher_client = pusher.Pusher(
    app_id= app_id,  
    key=key,
    secret=secret,
    cluster=cluster,
    ssl=True
)
#       ---- CONTROLLER LOGIC TO HANDLE CSV FILE UPLOAD ------

async def process_csv_reviews(datasource_id: int, file_path: str, db: Session):
    """
    Controller for processing CSV file and sending reviews to the webhook.
    """
    return await process_csv_data(db, datasource_id, file_path, CSV_WEBHOOK_URL)


async def upload_csv_file(file: UploadFile, store_id: int, branch_id: int, db: Session, user):
    """
    Controller for handling CSV file uploads and processing.
    """
    # Validate Store
    store = db.query(Store).filter(Store.store_id == store_id, Store.user_id == user.user_id).first()
    if not store:
        raise HTTPException(
            status_code=403,
            detail={"status": False, "code": 403, "message": "Unauthorized access to this store"}
        )
    
    # Validate Branch
    branch = db.query(Branch).filter(
        Branch.branch_id == branch_id,
        Branch.store_id == store_id,
        Branch.user_id == user.user_id
    ).first()
    if not branch:
        raise HTTPException(
           status_code=403,
            detail={"status": False, "code": 403, "message": "Unauthorized access to this branch"}
        )

    # Step 1: Save the file locally
    file_path = save_csv_file(file)

    # Step 2: Validate CSV format before processing
    try:
        df = pd.read_csv(file_path)
        required_columns = {"name", "comment", "rating", "date"}
        if not required_columns.issubset(set(df.columns)):
            raise HTTPException(
                status_code=400,
                detail={
                        "status": False,
                        "code": 400,
                        "message": "CSV format is incorrect. Required columns: name, comment, rating, date"
                        }
            )
    except pd.errors.EmptyDataError:
        raise HTTPException(status_code=400, detail="CSV file is empty or invalid.")
    except pd.errors.ParserError:
        raise HTTPException(status_code=400, detail="CSV file could not be parsed.")
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error reading CSV: {str(e)}")

    # Step 3: Create datasource entry in DB
    datasource_entry = create_datasource(db, store_id, branch_id, file_path)

    # Step 4: Call `process_csv_reviews`
    try:
        result = await process_csv_reviews(
            datasource_id=int(datasource_entry.ds_id),
            file_path=str(file_path),
            db=db  # Pass database session
        )

    except Exception as e:
        raise HTTPException(
            status_code=500,
            detail={"status": False, "code": 500, "message": f"Error processing CSV: {str(e)}"}
        )
        
    
    channel_name = f"import-update"
    pusher_client.trigger(
        channel_name,
        event_name="progress-update",
        data={
            "datasource_id": datasource_entry.ds_id,
            "processed": 0,
            "total": 0,
            "percent": 0,
            "status": "fetching", 
            "status_msg": "Fetching records"
        }
    )    

    return {
        "status": True,
        "code": 200,
        "message": "CSV uploaded and processing started successfully",
        "data": {
            "datasource_id": datasource_entry.ds_id,
            "snapshot_id": str(datasource_entry.snapshot_id),
            "file_url": file_path,
            "processing_result": result  # Include processing result
        }
    }



# ------ CONTROLLER LOGIC TO HANDLE EXTERNAL DATA SOURCE ADD ------


# Fetch environment variables
BRIGHT_DATA_TOKEN = os.getenv("BRIGHT_DATA_TOKEN")
WEBHOOK_STATIC_TOKEN = os.getenv("WEBHOOK_STATIC_TOKEN")
WEBHOOK_BASE_URL = os.getenv("WEBHOOK_BASE_URL")
BRIGHT_DATASET_ID = os.getenv("BRIGHT_DATASET_ID")
GOOGLE_DATASET_ID = os.getenv("GOOGLE_DATASET_ID")
BRIGHT_DATA_API_URL = os.getenv("BRIGHT_DATA_API_URL")

def add_external_data_source(
    data: ExternalDatasourceCreate,
    db: Session,
    user
):
    """
    Controller function to handle adding external data sources and triggering Bright Data API.
    """

    # Step 1: Detect source type and choose the appropriate dataset ID
    url_str = str(data.url).lower()

    # Step 1: Detect source type and normalize Google URLs
    if "yelp" in url_str:
        selected_dataset_id = BRIGHT_DATASET_ID
        source_type = "yelp"

    elif any(keyword in url_str for keyword in ["google", "maps", "g.co"]):
        selected_dataset_id = GOOGLE_DATASET_ID
        source_type = "google"

        # ✅ Normalize Google URL to ensure it uses .com
        parsed_url = urlparse(str(data.url))
        hostname = parsed_url.hostname or ""
        
        if not hostname.endswith("google.com"):
            # Force hostname to google.com and rebuild the URL
            corrected_hostname = "www.google.com"
            corrected_url = parsed_url._replace(netloc=corrected_hostname).geturl()
            print(f"Original Google URL: {data.url}")
            print(f"Corrected Google URL: {corrected_url}")
            data.url = corrected_url  # 🔁 Overwrite with corrected URL

    else:
        raise HTTPException(
            status_code=400,
            detail={"status": False, "code": 400, "message": "Unsupported data source URL"}
        )

    headers = {
        "Authorization": f"Bearer {BRIGHT_DATA_TOKEN}",
        "Content-Type": "application/json",
    }

    # Construct the webhook URL dynamically
    webhook_url = f"{WEBHOOK_BASE_URL}/webhook"
    notify_url = f"{WEBHOOK_BASE_URL}/notify"  

    params = {
        "dataset_id": selected_dataset_id,
        "endpoint": webhook_url,
        "auth_header": f"Bearer {WEBHOOK_STATIC_TOKEN}",
        "format": "json",
        "uncompressed_webhook": "true",
        "notify": notify_url,  
        "include_errors": "true"
    }

   
    # The source_url from user input
    data_payload = [{"url": str(data.url)}]

    try:
        response = requests.post(BRIGHT_DATA_API_URL, headers=headers, params=params, json=data_payload)
        response.raise_for_status()

        print("\n===== Bright Data API Response =====")
        print(f"Status Code: {response.status_code}")
        print(json.dumps(response.json(), indent=4))  

        bright_data_response = response.json()

    except requests.exceptions.RequestException as req_err:
        print(f"Error making Bright Data API request: {req_err}")
        raise HTTPException(
            status_code=500, 
            detail={"status": False, "code": 500, "message": f"Bright Data API request failed: {str(req_err)}"}
        )

    except requests.exceptions.JSONDecodeError:
        print("Error: Bright Data API returned invalid JSON")
        raise HTTPException(
            status_code=500, 
            detail={"status": False, "code": 500, "message": "Bright Data API returned invalid JSON"}
        )

    if not isinstance(bright_data_response, dict) or "snapshot_id" not in bright_data_response:
        print("Error: Invalid response from Bright Data API")
        raise HTTPException(
            status_code=500, 
            detail={"status": False, "code": 500, "message": "Invalid response from Bright Data API"}
        )

    snapshot_id = bright_data_response["snapshot_id"]

    # Step 3: Create the external datasource entry in the database with snapshot_id
    datasource_entry = create_external_data_source(data, db, user, snapshot_id)

    channel_name = f"import-update"
    pusher_client.trigger(
        channel_name,
        event_name="progress-update",
        data={
            "datasource_id": datasource_entry.ds_id,
            "processed": 0,
            "total": 0,
            "percent": 0,
            "status": "fetching", 
            "status_msg": "Fetching records"
        }
    )    
    
    
    return {
        "status": True,
        "code": 200,
        "message": "External data source added successfully",
        "data": {
            "datasource_id": datasource_entry.ds_id,
            "snapshot_id": datasource_entry.snapshot_id,
            "url": datasource_entry.url
        }
    }

  

    ##------------GET DATASOURCE BY BRANCH ID----------

def fetch_datasource_by_branch(
    db: Session, 
    branch_id: int, 
    user_id: int, 
    page: int = None, 
    page_size: int = None
):
    """Retrieve datasource records for a given branch_id that belongs to the logged-in user.
    
    - If pagination parameters (`page` & `page_size`) are provided, return paginated results.
    - Otherwise, return the full list.
    - Sorting Order: Status (`completed` > `pending` > `failed`), then `created_at` (latest first).
    """

    # Ensure branch belongs to the current user
    branch = db.query(Branch).filter(Branch.branch_id == branch_id, Branch.user_id == user_id).first()
    
    if not branch:
        raise HTTPException(
            status_code=403,
            detail={"status": False, "code": 403, "message": "Unauthorized access to this branch"}
        )

    branch_name = branch.branch_name  # Fetch branch name

    # Define custom sorting order for status
    status_order = case(
        (Datasource.last_fetch_status == "completed", 1),
        (Datasource.last_fetch_status == "pending", 2),
        (Datasource.last_fetch_status == "failed", 3),
        else_=4  # Default lowest priority if status is something else
    )

    # Query to get datasource records with sorting applied
    datasource_query = db.query(Datasource).filter(
        Datasource.branch_id == branch_id
    ).order_by(
        Datasource.created_at.desc(),
        status_order  # Sort by status priority
        # Then by creation date (latest first)
    )

    # If pagination is not provided, return all records as a normal list
    if page is None or page_size is None:
        datasource_entries = datasource_query.all()
    else:
        # Count total records before applying pagination
        total_datasources = datasource_query.count()

        # Apply pagination
        datasource_entries = datasource_query.offset((page - 1) * page_size).limit(page_size).all()

        # Calculate total pages
        total_pages = (total_datasources + page_size - 1) // page_size

    # **🔹 Debug Step: If no records found, return "No records found for this branch"**
    if not datasource_entries:
        return {
            "status": False,
            "code": 404,
            "message": "No records found for this branch"
        }

    # Format the response data
    formatted_data = [
        {
            "store_id": entry.store_id,
            "branch_id": entry.branch_id,
            "branch_name": branch_name,
            "url": entry.url,
            "last_fetch_date": entry.last_fetch_date,
            "last_fetch_status": entry.last_fetch_status,
            "ds_id": entry.ds_id,
            "source_type": entry.source_type,
            "snapshot_id": entry.snapshot_id,
            "created_at": entry.created_at
        }
        for entry in datasource_entries
    ]

    response = {
        "status": True,
        "code": 200,
        "message": "Datasource records retrieved successfully",
        "data": formatted_data
    }

    # Add pagination details only if pagination was applied
    if page is not None and page_size is not None:
        response["pagination"] = {
            "total_records": total_datasources,
            "total_pages": total_pages,
            "current_page": page,
            "page_size": page_size
        }

    return response

