import asyncio
import sys
import os
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from db_connector import upsert_historical_candles, get_db
from nepse import AsyncNepse

# Adjust path to find local packages if necessary
sys.path.append(os.path.dirname(os.path.abspath(__file__)))


def get_company_page(symbol):
    """Retrieve ShareSansar company page details including CSRF token and company ID"""
    url = f"https://www.sharesansar.com/company/{symbol.lower()}"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Referer': 'https://www.sharesansar.com/'
    }
    session = requests.Session()
    try:
        response = session.get(url, headers=headers)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, "lxml")
        company_id_tag = soup.select_one("#companyid")
        company_id = company_id_tag.get_text(strip=True) if company_id_tag else None
        
        csrf_token_tag = soup.select_one("meta[name=_token]")
        csrf_token = csrf_token_tag["content"] if csrf_token_tag else None
        
        return session, company_id, csrf_token
    except Exception as e:
        print(f"Error fetching ShareSansar page for {symbol}: {e}")
        return None, None, None


def scrape_sharesansar_history(symbol):
    """Scrape 5 years of real history (including real Open Prices) from ShareSansar"""
    symbol = symbol.upper()
    session, company_id, csrf_token = get_company_page(symbol)
    
    if not company_id:
        print(f"Skipping {symbol}: Could not resolve company ID from ShareSansar.")
        return []

    payload = {
        "company": company_id,
        "draw": 1,
        "start": 0,
        "length": 1500,  # 5 years of trading days is ~1250, so 1500 is perfect!
        "daterange": "",
        "search[value]": "",
        "search[regex]": "false",
        "order[0][column]": 1,
        "order[0][dir]": "desc",
    }
    
    columns = [
        "DT_Row_Index", "published_date", "open", "high", "low", "close",
        "per_change", "traded_quantity", "traded_amount"
    ]
    
    for idx, col_name in enumerate(columns):
        payload[f"columns[{idx}][data]"] = col_name
        payload[f"columns[{idx}][name]"] = ""
        payload[f"columns[{idx}][searchable]"] = "true" if col_name == "published_date" else "false"
        payload[f"columns[{idx}][orderable]"] = "false"
        payload[f"columns[{idx}][search][value]"] = ""
        payload[f"columns[{idx}][search][regex]"] = "false"

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Referer': f'https://www.sharesansar.com/company/{symbol.lower()}',
        'Origin': 'https://www.sharesansar.com',
        'X-Requested-With': 'XMLHttpRequest',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
    }
    if csrf_token:
        headers['X-CSRF-Token'] = csrf_token

    try:
        response = session.post('https://www.sharesansar.com/company-price-history', data=payload, headers=headers)
        response.raise_for_status()
        response_json = response.json()
        raw_history = response_json.get('data', [])
        
        candles_payload = []
        for row in raw_history:
            pub_date = row.get('published_date') or row.get('date')
            if not pub_date:
                continue
                
            # Convert ShareSansar values (strings with potential commas) to database types
            def clean_float(val):
                if not val:
                    return 0.0
                try:
                    return float(str(val).replace(',', '').strip())
                except:
                    return 0.0
                    
            def clean_int(val):
                if not val:
                    return 0
                try:
                    return int(float(str(val).replace(',', '').strip()))
                except:
                    return 0

            candles_payload.append({
                "symbol": symbol,
                "business_date": pub_date.strip(),
                "open_price": clean_float(row.get('open')),
                "high_price": clean_float(row.get('high')),
                "low_price": clean_float(row.get('low')),
                "close_price": clean_float(row.get('close')),
                "volume": clean_int(row.get('traded_quantity'))
            })
            
        return candles_payload
    except Exception as e:
        print(f"Error scraping ShareSansar history for {symbol}: {e}")
        return []


async def run_backfill():
    db = get_db()
    if not db:
        print("Backfill Error: Supabase connection is not configured in .env. Exiting.")
        return

    nepse = AsyncNepse()
    nepse.setTLSVerification(False)
    
    print("Fetching active company list from NEPSE...")
    companies = await nepse.getCompanyList()
    print(f"Discovered {len(companies)} symbols to backfill.")
    
    # Run backfill sync sequentially to be highly respectful of ShareSansar limits
    for idx, company in enumerate(companies):
        symbol = company.get("symbol")
        if not symbol:
            continue
            
        name = company.get("securityName", company.get("companyName", ""))
        text_to_check = f"{symbol} {name}".lower()
        import re
        if re.search(r'\d', text_to_check):
            continue
        forbidden_words = ["yojana", "debenture", "mutual fund", "bluechip fund", "balanced", "equity fund"]
        if any(word in text_to_check for word in forbidden_words):
            continue
            
        print(f"[{idx+1}/{len(companies)}] Backfilling {symbol} with REAL Open prices from ShareSansar...")
        candles = scrape_sharesansar_history(symbol)
        
        if candles:
            print(f" -> Found {len(candles)} historical records. Upserting to Supabase...")
            upsert_historical_candles(candles)
            # Short sleep to prevent hitting ShareSansar rate limits
            await asyncio.sleep(2)
        else:
            print(f" -> No history found for {symbol}.")
            
    print("ShareSansar 5-Year Backfill Completed Successfully!")


if __name__ == "__main__":
    asyncio.run(run_backfill())
