Several performance improvements

2026-01-15 08:22:56 -03:00 · 2025-08-30 01:38:54 -03:00
parent 9b2655e787
commit d02db1c8e6
2 changed files with 172 additions and 77 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -9,6 +9,5 @@ services:
      - "5000:5000"
    environment:
      - FLASK_ENV=production
      - GUNICORN_CMD_ARGS="--workers=3 --timeout=600 --keep-alive=10"
      # - PROXY_URL=https://your-domain.com
    restart: unless-stopped
--- a/run.py
+++ b/run.py
@@ -5,7 +5,9 @@ import logging
 import os
 import re
 import socket
 import time
 import urllib.parse
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from functools import lru_cache
 import dns.resolver
@@ -90,6 +92,19 @@ def setup_custom_dns():
 setup_custom_dns()
 # Create a session with connection pooling for better performance
 session = requests.Session()
 session.mount('http://', requests.adapters.HTTPAdapter(
    pool_connections=10,
    pool_maxsize=20,
    max_retries=3
 ))
 session.mount('https://', requests.adapters.HTTPAdapter(
    pool_connections=10,
    pool_maxsize=20,
    max_retries=3
 ))
 # Common request function with caching for API endpoints
@lru_cache(maxsize=128)
 def fetch_api_data(url, timeout=10):
@@ -97,23 +112,32 @@ def fetch_api_data(url, timeout=10):
    ua = UserAgent()
    headers = {
        "User-Agent": ua.chrome,
-        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+        "Accept": "application/json,text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Connection": "keep-alive",
        "Accept-Encoding": "gzip, deflate",
    }
    try:
        hostname = urllib.parse.urlparse(url).netloc.split(":")[0]
        logger.info(f"Making request to host: {hostname}")
-        response = requests.get(url, headers=headers, timeout=timeout)
+        # Use session for connection pooling with streaming for large responses
        response = session.get(url, headers=headers, timeout=timeout, stream=True)
        response.raise_for_status()
-        # Try to parse as JSON
+        # For large responses, use streaming JSON parsing
        try:
-            return json.loads(response.text)
+            # Check content length to decide parsing strategy
            content_length = response.headers.get('Content-Length')
            if content_length and int(content_length) > 10_000_000:  # > 10MB
                logger.info(f"Large response detected ({content_length} bytes), using optimized parsing")
            # Stream the JSON content for better memory efficiency
            response.encoding = 'utf-8'  # Ensure proper encoding
            return response.json()
        except json.JSONDecodeError:
-            # Return text if not valid JSON
+            # Fallback to text for non-JSON responses
            return response.text
    except requests.exceptions.SSLError:
@@ -310,106 +334,171 @@ def validate_xtream_credentials(url, username, password):
    return data, None, None
 def fetch_api_endpoint(url_info):
    """Fetch a single API endpoint - used for concurrent requests"""
    url, name, timeout = url_info
    try:
        logger.info(f"🚀 Fetching {name}...")
        start_time = time.time()
        data = fetch_api_data(url, timeout=timeout)
        end_time = time.time()
        if isinstance(data, list):
            logger.info(f"✅ Completed {name} in {end_time-start_time:.1f}s - got {len(data)} items")
        else:
            logger.info(f"✅ Completed {name} in {end_time-start_time:.1f}s")
        return name, data
    except Exception as e:
        logger.warning(f"❌ Failed to fetch {name}: {e}")
        return name, None
 def fetch_categories_and_channels(url, username, password, include_vod=False):
-    """Fetch categories and channels from the Xtream API"""
+    """Fetch categories and channels from the Xtream API using concurrent requests"""
    all_categories = []
    all_streams = []
    try:
-        # Fetch live categories and streams
+        # Prepare all API endpoints to fetch concurrently
-        live_category_url = f"{url}/player_api.php?username={username}&password={password}&action=get_live_categories"
+        api_endpoints = [
-        live_categories = fetch_api_data(live_category_url, timeout=60)
+            (f"{url}/player_api.php?username={username}&password={password}&action=get_live_categories",
             "live_categories", 60),
            (f"{url}/player_api.php?username={username}&password={password}&action=get_live_streams",
             "live_streams", 180),
        ]
        # Add VOD endpoints if requested (WARNING: This will be much slower!)
        if include_vod:
            logger.warning("⚠️  Including VOD content - this will take significantly longer!")
            logger.info("💡 For faster loading, use the API without include_vod=true")
            # Only add the most essential VOD endpoints - skip the massive streams for categories-only requests
            api_endpoints.extend([
                (f"{url}/player_api.php?username={username}&password={password}&action=get_vod_categories",
                 "vod_categories", 60),
                (f"{url}/player_api.php?username={username}&password={password}&action=get_series_categories",
                 "series_categories", 60),
            ])
            # Only fetch the massive stream lists if explicitly needed for M3U generation
            vod_for_m3u = request.endpoint == 'generate_m3u'
            if vod_for_m3u:
                logger.warning("🐌 Fetching massive VOD/Series streams for M3U generation...")
                api_endpoints.extend([
                    (f"{url}/player_api.php?username={username}&password={password}&action=get_vod_streams",
                     "vod_streams", 240),
                    (f"{url}/player_api.php?username={username}&password={password}&action=get_series",
                     "series", 240),
                ])
            else:
                logger.info("⚡ Skipping massive VOD streams for categories-only request")
        # Fetch all endpoints concurrently using ThreadPoolExecutor
        logger.info(f"Starting concurrent fetch of {len(api_endpoints)} API endpoints...")
        results = {}
        with ThreadPoolExecutor(max_workers=10) as executor:  # Increased workers for better concurrency
            # Submit all API calls
            future_to_name = {executor.submit(fetch_api_endpoint, endpoint): endpoint[1]
                             for endpoint in api_endpoints}
            # Collect results as they complete
            for future in as_completed(future_to_name):
                name, data = future.result()
                results[name] = data
        logger.info("All concurrent API calls completed!")
        # Process live categories and streams (required)
        live_categories = results.get("live_categories")
        live_streams = results.get("live_streams")
        if isinstance(live_categories, tuple):  # Error response
            return None, None, live_categories[0], live_categories[1]
        if isinstance(live_streams, tuple):  # Error response
            return None, None, live_streams[0], live_streams[1]
-        live_channel_url = f"{url}/player_api.php?username={username}&password={password}&action=get_live_streams"
+        if not isinstance(live_categories, list) or not isinstance(live_streams, list):
        live_channels = fetch_api_data(live_channel_url, timeout=180)  # Much longer timeout for large channel lists
        if isinstance(live_channels, tuple):  # Error response
            return None, None, live_channels[0], live_channels[1]
        if not isinstance(live_categories, list) or not isinstance(live_channels, list):
            return (
                None,
                None,
                json.dumps(
                    {
                        "error": "Invalid Data Format",
-                        "details": "Live categories or channels data is not in the expected format",
+                        "details": "Live categories or streams data is not in the expected format",
                    }
                ),
                500,
            )
-        # Add content type to live categories and streams
+        # Optimized data processing - batch operations for massive datasets
-        for category in live_categories:
+        logger.info("Processing live content...")
            category["content_type"] = "live"
        for stream in live_channels:
            stream["content_type"] = "live"
-        all_categories.extend(live_categories)
+        # Batch set content_type for live content
-        all_streams.extend(live_channels)
+        if live_categories:
            for category in live_categories:
                category["content_type"] = "live"
            all_categories.extend(live_categories)
-        # If VOD is requested, fetch VOD content with timeout handling
+        if live_streams:
            for stream in live_streams:
                stream["content_type"] = "live"
            all_streams.extend(live_streams)
        logger.info(f"✅ Added {len(live_categories)} live categories and {len(live_streams)} live streams")
        # Process VOD content if requested and available
        if include_vod:
-            logger.info("Fetching VOD content - this may take longer...")
+            logger.info("Processing VOD content...")
-            try:
+            # Process VOD categories
-                # Fetch VOD categories with timeout
+            vod_categories = results.get("vod_categories")
-                vod_category_url = f"{url}/player_api.php?username={username}&password={password}&action=get_vod_categories"
+            if isinstance(vod_categories, list) and vod_categories:
-                vod_categories = fetch_api_data(vod_category_url, timeout=60)
+                for category in vod_categories:
                    category["content_type"] = "vod"
                all_categories.extend(vod_categories)
                logger.info(f"✅ Added {len(vod_categories)} VOD categories")
-                if isinstance(vod_categories, list):
+            # Process series categories first (lightweight)
-                    # Add content type to VOD categories
+            series_categories = results.get("series_categories")
-                    for category in vod_categories:
+            if isinstance(series_categories, list) and series_categories:
-                        category["content_type"] = "vod"
+                for category in series_categories:
-                    all_categories.extend(vod_categories)
+                    category["content_type"] = "series"
-                    logger.info(f"Added {len(vod_categories)} VOD categories")
+                all_categories.extend(series_categories)
                logger.info(f"✅ Added {len(series_categories)} series categories")
-                    # Only fetch VOD streams if categories were successful
+            # Only process massive stream lists if they were actually fetched
-                    vod_streams_url = f"{url}/player_api.php?username={username}&password={password}&action=get_vod_streams"
+            vod_streams = results.get("vod_streams")
-                    vod_streams = fetch_api_data(vod_streams_url, timeout=240)  # Very long timeout for massive VOD libraries
+            if isinstance(vod_streams, list) and vod_streams:
                logger.info(f"🔥 Processing {len(vod_streams)} VOD streams (this is the slow part)...")
-                    if isinstance(vod_streams, list):
+                # Batch process for better performance
-                        # Add content type to VOD streams
+                batch_size = 5000
-                        for stream in vod_streams:
+                for i in range(0, len(vod_streams), batch_size):
-                            stream["content_type"] = "vod"
+                    batch = vod_streams[i:i + batch_size]
-                        all_streams.extend(vod_streams)
+                    for stream in batch:
-                        logger.info(f"Added {len(vod_streams)} VOD streams")
+                        stream["content_type"] = "vod"
                    if i + batch_size < len(vod_streams):
                        logger.info(f"  Processed {i + batch_size}/{len(vod_streams)} VOD streams...")
-            except Exception as e:
+                all_streams.extend(vod_streams)
-                logger.warning(f"Failed to fetch VOD content: {e}")
+                logger.info(f"✅ Added {len(vod_streams)} VOD streams")
                # Continue without VOD content rather than failing completely
-            try:
+            # Process series (this can also be huge!)
-                # Fetch series categories with timeout
+            series = results.get("series")
-                series_category_url = (
+            if isinstance(series, list) and series:
-                    f"{url}/player_api.php?username={username}&password={password}&action=get_series_categories"
+                logger.info(f"🔥 Processing {len(series)} series (this is also slow)...")
                )
                series_categories = fetch_api_data(series_category_url, timeout=60)
-                if isinstance(series_categories, list):
+                # Batch process for better performance
-                    # Add content type to series categories
+                batch_size = 5000
-                    for category in series_categories:
+                for i in range(0, len(series), batch_size):
-                        category["content_type"] = "series"
+                    batch = series[i:i + batch_size]
-                    all_categories.extend(series_categories)
+                    for show in batch:
-                    logger.info(f"Added {len(series_categories)} series categories")
+                        show["content_type"] = "series"
                    if i + batch_size < len(series):
                        logger.info(f"  Processed {i + batch_size}/{len(series)} series...")
-                    # Only fetch series if categories were successful
+                all_streams.extend(series)
-                    series_url = f"{url}/player_api.php?username={username}&password={password}&action=get_series"
+                logger.info(f"✅ Added {len(series)} series")
                    series = fetch_api_data(series_url, timeout=240)  # Very long timeout for massive series libraries
                    if isinstance(series, list):
                        # Add content type to series
                        for show in series:
                            show["content_type"] = "series"
                        all_streams.extend(series)
                        logger.info(f"Added {len(series)} series")
            except Exception as e:
                logger.warning(f"Failed to fetch series content: {e}")
                # Continue without series content rather than failing completely
    except Exception as e:
        logger.error(f"Critical error fetching API data: {e}")
@@ -425,7 +514,7 @@ def fetch_categories_and_channels(url, username, password, include_vod=False):
            500,
        )
-    logger.info(f"Successfully fetched {len(all_categories)} total categories and {len(all_streams)} total streams")
+    logger.info(f"🚀 CONCURRENT FETCH COMPLETE: {len(all_categories)} total categories and {len(all_streams)} total streams")
    return all_categories, all_streams, None, None
@@ -437,8 +526,9 @@ def get_categories():
    if error:
        return error
-    # Check for VOD parameter - default to false to avoid timeouts
+    # Check for VOD parameter - default to false to avoid timeouts (VOD is massive and slow!)
    include_vod = request.args.get("include_vod", "false").lower() == "true"
    logger.info(f"VOD content requested: {include_vod}")
    # Validate credentials
    user_data, error_json, error_code = validate_xtream_credentials(url, username, password)
@@ -511,6 +601,12 @@ def generate_m3u():
    no_stream_proxy = request.args.get("nostreamproxy", "").lower() == "true"
    include_vod = request.args.get("include_vod", "false").lower() == "true"  # Default to false to avoid timeouts
    # For M3U generation, warn about VOD performance impact
    if include_vod:
        logger.warning("⚠️  M3U generation with VOD enabled - expect 2-5 minute generation time!")
    else:
        logger.info("⚡ M3U generation for live content only - should be fast!")
    # Log filter parameters
    logger.info(
        f"Filter parameters - wanted_groups: {wanted_groups}, unwanted_groups: {unwanted_groups}, include_vod: {include_vod}"