video post-processing, moves saved, used by ts

2025-05-14 21:17:41 -03:00
parent ffd9775445
commit c4291c8759
15 changed files with 1262 additions and 203 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -301,4 +301,7 @@ dist
 .pnp.*
 # Yolo
-*.pt
+*.pt
 # Downloads
 downloaded_videos/
--- a/jd-clone/index.json
+++ b/jd-clone/index.json
@@ -0,0 +1,18 @@
 {
    "songs": {
        "Butter": {
            "Name": "Butter",
            "Artist": "BTS",
            "Album": "Butter",
            "Year": 2021,
            "Genre": "K-Pop",
            "GameData": {
                "moves": "https://cdn.ovosimpatico.com/jdlo/maps/Butter/poses.json",
                "video": "https://cdn.ovosimpatico.com/jdlo/maps/Butter/Butter_ULTRA.webm",
                "audio": "https://cdn.ovosimpatico.com/jdlo/maps/Butter/Butter.ogg",
                "cover": "https://cdn.ovosimpatico.com/jdlo/maps/Butter/butter_cover_generic.png",
                "audio_preview": "https://cdn.ovosimpatico.com/jdlo/maps/Butter/butter_audiopreview.ogg"
            }
        }
    }
 }
--- a/jd-clone/package.json
+++ b/jd-clone/package.json
@@ -5,7 +5,7 @@
  "type": "module",
  "scripts": {
    "dev": "vite",
-    "build": "tsc -b && vite build",
+    "build": "tsc -b && vite build && cp ../index.json dist/",
    "lint": "eslint .",
    "preview": "vite preview"
  },
--- a/jd-clone/src/components/common/AudioPreview.tsx
+++ b/jd-clone/src/components/common/AudioPreview.tsx
@@ -0,0 +1,61 @@
 import { useEffect, useRef, useState } from 'react';
 import useAppStore from '../../store/app-store';
 interface AudioPreviewProps {
  src: string;
  autoPlay?: boolean;
 }
 const AudioPreview = ({ src, autoPlay = false }: AudioPreviewProps) => {
  const audioRef = useRef<HTMLAudioElement | null>(null);
  const [isPlaying, setIsPlaying] = useState(false);
  const masterVolume = useAppStore(state => state.settings.volume.master);
  const musicVolume = useAppStore(state => state.settings.volume.music);
  useEffect(() => {
    if (!audioRef.current) return;
    // Set volume based on app settings
    const volume = masterVolume * musicVolume;
    audioRef.current.volume = volume;
    // Autoplay if needed
    if (autoPlay && audioRef.current) {
      audioRef.current.play().catch(error => {
        console.warn('Autoplay prevented:', error);
      });
    }
    return () => {
      if (audioRef.current) {
        audioRef.current.pause();
        audioRef.current.currentTime = 0;
      }
    };
  }, [src, autoPlay, masterVolume, musicVolume]);
  useEffect(() => {
    const audio = audioRef.current;
    if (!audio) return;
    const handlePlay = () => setIsPlaying(true);
    const handlePause = () => setIsPlaying(false);
    const handleEnded = () => setIsPlaying(false);
    audio.addEventListener('play', handlePlay);
    audio.addEventListener('pause', handlePause);
    audio.addEventListener('ended', handleEnded);
    return () => {
      audio.removeEventListener('play', handlePlay);
      audio.removeEventListener('pause', handlePause);
      audio.removeEventListener('ended', handleEnded);
    };
  }, []);
  return (
    <audio ref={audioRef} src={src} loop={false} preload="auto" />
  );
 };
 export default AudioPreview;
--- a/jd-clone/src/components/game/VideoPlayer.scss
+++ b/jd-clone/src/components/game/VideoPlayer.scss
@@ -0,0 +1,14 @@
 .video-player {
  position: relative;
  overflow: hidden;
  background-color: black;
  border-radius: 8px;
  box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
  video {
    display: block;
    object-fit: contain;
    width: 100%;
    height: 100%;
  }
 }
--- a/jd-clone/src/components/game/VideoPlayer.tsx
+++ b/jd-clone/src/components/game/VideoPlayer.tsx
@@ -0,0 +1,168 @@
 import { useEffect, useRef } from 'react';
 import useAppStore from '../../store/app-store';
 import './VideoPlayer.scss';
 interface VideoPlayerProps {
  src: string;
  audioSrc?: string;
  playing: boolean;
  muted?: boolean;
  width?: number | string;
  height?: number | string;
  onEnded?: () => void;
 }
 const VideoPlayer = ({
  src,
  audioSrc,
  playing,
  muted = false,
  width = '100%',
  height = '100%',
  onEnded
 }: VideoPlayerProps) => {
  const videoRef = useRef<HTMLVideoElement | null>(null);
  const audioRef = useRef<HTMLAudioElement | null>(null);
  const masterVolume = useAppStore(state => state.settings.volume.master);
  const musicVolume = useAppStore(state => state.settings.volume.music);
  // Manage play/pause state for video
  useEffect(() => {
    const video = videoRef.current;
    if (!video) return;
    if (playing) {
      video.play().catch(error => {
        console.warn('Video playback prevented:', error);
      });
    } else {
      video.pause();
    }
  }, [playing]);
  // Manage play/pause state for audio
  useEffect(() => {
    const audio = audioRef.current;
    if (!audio || !audioSrc) return;
    if (playing) {
      // Sync with video if needed
      if (videoRef.current) {
        audio.currentTime = videoRef.current.currentTime;
      }
      audio.play().catch(error => {
        console.warn('Audio playback prevented:', error);
      });
    } else {
      audio.pause();
    }
  }, [playing, audioSrc]);
  // Sync audio with video when video seeks or loads
  useEffect(() => {
    const video = videoRef.current;
    const audio = audioRef.current;
    if (!video || !audio || !audioSrc) return;
    const handleTimeUpdate = () => {
      // Only sync if the difference is significant (more than 0.1 seconds)
      if (Math.abs(video.currentTime - audio.currentTime) > 0.1) {
        audio.currentTime = video.currentTime;
      }
    };
    const handlePlay = () => {
      if (playing) {
        audio.play().catch(error => {
          console.warn('Audio playback prevented:', error);
        });
      }
    };
    const handlePause = () => {
      audio.pause();
    };
    video.addEventListener('seeked', handleTimeUpdate);
    video.addEventListener('play', handlePlay);
    video.addEventListener('pause', handlePause);
    return () => {
      video.removeEventListener('seeked', handleTimeUpdate);
      video.removeEventListener('play', handlePlay);
      video.removeEventListener('pause', handlePause);
    };
  }, [playing, audioSrc]);
  // Manage volume for video
  useEffect(() => {
    const video = videoRef.current;
    if (!video) return;
    // If we have a separate audio source, mute the video
    if (audioSrc) {
      video.muted = true;
    } else {
      // Otherwise use the video's audio
      if (muted) {
        video.muted = true;
      } else {
        video.muted = false;
        video.volume = masterVolume * musicVolume;
      }
    }
  }, [masterVolume, musicVolume, muted, audioSrc]);
  // Manage volume for audio
  useEffect(() => {
    const audio = audioRef.current;
    if (!audio || !audioSrc) return;
    if (muted) {
      audio.muted = true;
    } else {
      audio.muted = false;
      audio.volume = masterVolume * musicVolume;
    }
  }, [masterVolume, musicVolume, muted, audioSrc]);
  // Setup event handlers for video
  useEffect(() => {
    const video = videoRef.current;
    if (!video) return;
    const handleEnded = () => {
      if (onEnded) onEnded();
    };
    video.addEventListener('ended', handleEnded);
    return () => {
      video.removeEventListener('ended', handleEnded);
    };
  }, [onEnded]);
  return (
    <div className="video-player" style={{ width, height }}>
      <video
        ref={videoRef}
        src={src}
        preload="auto"
        playsInline
        width="100%"
        height="100%"
        muted={!!audioSrc || muted} // Always mute video if using separate audio
      />
      {audioSrc && (
        <audio
          ref={audioRef}
          src={audioSrc}
          preload="auto"
        />
      )}
    </div>
  );
 };
 export default VideoPlayer;
--- a/jd-clone/src/pages/gameplay/GameplayPage.scss
+++ b/jd-clone/src/pages/gameplay/GameplayPage.scss
@@ -249,6 +249,47 @@
    align-items: center;
    gap: 1rem;
  }
  // Completion overlay
  &__completion-overlay {
    position: fixed;
    top: 0;
    left: 0;
    right: 0;
    bottom: 0;
    background-color: rgba(0, 0, 0, 0.7);
    display: flex;
    align-items: center;
    justify-content: center;
    z-index: 100;
    animation: fade-in 0.5s ease-out;
  }
  &__completion-content {
    background-color: rgba(26, 26, 26, 0.9);
    border-radius: 12px;
    padding: 3rem;
    text-align: center;
    box-shadow: 0 0 30px rgba(255, 215, 0, 0.3);
    animation: scale-in 0.5s ease-out;
    h2 {
      font-size: 3.5rem;
      margin-top: 0;
      margin-bottom: 1.5rem;
      color: #ffca3a;
    }
    p {
      font-size: 1.8rem;
      margin-bottom: 1rem;
      &:last-child {
        color: #8ac926;
        font-weight: bold;
      }
    }
  }
 }
@keyframes feedback-pulse {
@@ -263,4 +304,24 @@
    transform: scale(1);
    opacity: 1;
  }
 }
@keyframes fade-in {
  from {
    opacity: 0;
  }
  to {
    opacity: 1;
  }
 }
@keyframes scale-in {
  from {
    transform: scale(0.8);
    opacity: 0;
  }
  to {
    transform: scale(1);
    opacity: 1;
  }
 }
--- a/jd-clone/src/pages/gameplay/GameplayPage.tsx
+++ b/jd-clone/src/pages/gameplay/GameplayPage.tsx
@@ -1,6 +1,7 @@
 import { useEffect, useState } from 'react';
 import { useNavigate } from 'react-router-dom';
 import { PoseRenderer } from '../../components/game/PoseRenderer';
 import VideoPlayer from '../../components/game/VideoPlayer';
 import usePoseDetection from '../../hooks/usePoseDetection';
 import useInputDetection from '../../hooks/useInputDetection';
 import useControllerDetection from '../../hooks/useControllerDetection';
@@ -19,6 +20,7 @@ function GameplayPage() {
  const [gameStarted, setGameStarted] = useState(false);
  const [countdown, setCountdown] = useState(3);
  const [error, setError] = useState<string | null>(null);
  const [gameCompleted, setGameCompleted] = useState(false);
  // Get app state
  const selectedSongId = useAppStore(state => state.selectedSongId);
@@ -101,6 +103,7 @@ function GameplayPage() {
    // Reset game state
    resetGameState();
    setGameCompleted(false);
    loadGameData();
@@ -142,6 +145,16 @@ function GameplayPage() {
    navigate('/results');
  };
  // Handle video ended
  const handleVideoEnded = () => {
    setGameCompleted(true);
    endGame();
    // Wait a short moment before navigating to results
    setTimeout(() => {
      navigate('/results');
    }, 1500);
  };
  // If we don't have a selected song, redirect to setup
  if (!selectedSongId) {
    useEffect(() => {
@@ -192,6 +205,21 @@ function GameplayPage() {
    );
  };
  // Helper to render game completion overlay
  const renderCompletionOverlay = () => {
    if (!gameCompleted) return null;
    return (
      <div className="gameplay-page__completion-overlay">
        <div className="gameplay-page__completion-content">
          <h2>Song Complete!</h2>
          <p>Great job dancing!</p>
          <p>Redirecting to results...</p>
        </div>
      </div>
    );
  };
  return (
    <div className="gameplay-page gameplay-page--tv-mode">
      {loading ? (
@@ -271,11 +299,19 @@ function GameplayPage() {
          <div className="gameplay-page__content">
            <div className="gameplay-page__video-container">
-              {/* Video will go here */}
+              {song && song.GameData ? (
-              <div className="gameplay-page__video-placeholder">
+                <VideoPlayer
-                <p>Dance video will play here</p>
+                  src={song.GameData.video}
-                <p>Song: {song?.title} by {song?.artist}</p>
+                  audioSrc={song.GameData.audio}
-              </div>
+                  playing={isPlaying && !isPaused}
                  onEnded={handleVideoEnded}
                />
              ) : (
                <div className="gameplay-page__video-placeholder">
                  <p>Dance video will play here</p>
                  <p>Song information not available</p>
                </div>
              )}
            </div>
            <div className="gameplay-page__camera-container">
@@ -302,6 +338,7 @@ function GameplayPage() {
          </div>
          {renderPauseOverlay()}
          {renderCompletionOverlay()}
        </>
      )}
    </div>
--- a/jd-clone/src/pages/results/ResultsPage.tsx
+++ b/jd-clone/src/pages/results/ResultsPage.tsx
@@ -46,8 +46,8 @@ function ResultsPage() {
        <h1 className="results-page__title">Results</h1>
        <div className="results-page__song-info">
-          <h2>{selectedSong?.title || 'Unknown Song'}</h2>
+          <h2>{selectedSong?.Name || 'Unknown Song'}</h2>
-          <p>{selectedSong?.artist || 'Unknown Artist'}</p>
+          <p>{selectedSong?.Artist || 'Unknown Artist'}</p>
        </div>
        <div className="results-page__score-container">
--- a/jd-clone/src/pages/setup/GameSetupPage.tsx
+++ b/jd-clone/src/pages/setup/GameSetupPage.tsx
@@ -2,6 +2,7 @@ import React, { useEffect, useState } from 'react';
 import { useNavigate } from 'react-router-dom';
 import { Button } from '../../components/common/Button';
 import { PoseRenderer } from '../../components/game/PoseRenderer';
 import AudioPreview from '../../components/common/AudioPreview';
 import usePoseDetection from '../../hooks/usePoseDetection';
 import useAppStore from '../../store/app-store';
 import songService from '../../services/song-service';
@@ -13,6 +14,7 @@ function GameSetupPage() {
  const [songs, setSongs] = useState<Song[]>([]);
  const [loading, setLoading] = useState(true);
  const [selectedSongId, setSelectedSongId] = useState<string | null>(null);
  const [currentPreviewUrl, setCurrentPreviewUrl] = useState<string | null>(null);
  const difficulty = useAppStore(state => state.settings.difficulty);
  const setDifficulty = useAppStore(state => state.setDifficulty);
@@ -31,6 +33,8 @@ function GameSetupPage() {
        // Select the first song by default
        if (songList.length > 0 && !selectedSongId) {
          setSelectedSongId(songList[0].id);
          // Set the preview URL for the first song
          setCurrentPreviewUrl(songList[0].GameData.audio_preview);
        }
      } catch (error) {
        console.error('Failed to load songs:', error);
@@ -48,6 +52,12 @@ function GameSetupPage() {
  const handleSongSelect = (songId: string) => {
    setSelectedSongId(songId);
    // Update the audio preview URL when selecting a new song
    const selectedSong = songs.find(song => song.id === songId);
    if (selectedSong) {
      setCurrentPreviewUrl(selectedSong.GameData.audio_preview);
    }
  };
  const handleStartGame = () => {
@@ -103,27 +113,32 @@ function GameSetupPage() {
                  className={`setup-page__song-item ${selectedSongId === song.id ? 'setup-page__song-item--selected' : ''}`}
                  onClick={() => handleSongSelect(song.id)}
                >
-                  <div className="setup-page__song-cover" style={{ backgroundImage: `url(${song.coverUrl})` }} />
+                  <div className="setup-page__song-cover" style={{ backgroundImage: `url(${song.GameData.cover})` }} />
                  <div className="setup-page__song-info">
-                    <h3 className="setup-page__song-title">{song.title}</h3>
+                    <h3 className="setup-page__song-title">{song.Name}</h3>
-                    <p className="setup-page__song-artist">{song.artist}</p>
+                    <p className="setup-page__song-artist">{song.Artist}</p>
-                    <p className="setup-page__song-duration">{Math.floor(song.duration / 60)}:{(song.duration % 60).toString().padStart(2, '0')}</p>
+                    <p className="setup-page__song-genre">{song.Genre || 'Unknown genre'}</p>
                  </div>
                </div>
              ))}
            </div>
          )}
          {/* Audio Preview Player */}
          {currentPreviewUrl && (
            <AudioPreview src={currentPreviewUrl} autoPlay={true} />
          )}
        </div>
        <div className="setup-page__difficulty-selection">
          <h2>Select Difficulty</h2>
          <div className="setup-page__difficulty-buttons">
-            {selectedSong?.difficulty.map((diff) => (
+            {(['easy', 'medium', 'hard', 'extreme'] as DifficultyLevel[]).map((diff) => (
              <button
                key={diff}
                className={`setup-page__difficulty-button setup-page__difficulty-button--${diff} ${difficulty === diff ? 'setup-page__difficulty-button--selected' : ''}`}
                onClick={() => handleDifficultyChange(diff)}
-                disabled={!selectedSong.difficulty.includes(diff)}
+                disabled={selectedSong?.difficulty ? !selectedSong.difficulty.includes(diff) : false}
              >
                {diff.toUpperCase()}
              </button>
--- a/jd-clone/src/services/song-service.ts
+++ b/jd-clone/src/services/song-service.ts
@@ -1,140 +1,101 @@
 import { Song, Choreography, DifficultyLevel, Move } from '../types';
 // Mock song data for development purposes
 const MOCK_SONGS: Song[] = [
  {
    id: 'song1',
    title: 'Dance The Night',
    artist: 'Dua Lipa',
    bpm: 120,
    duration: 176,
    coverUrl: 'https://example.com/covers/dance-the-night.jpg',
    audioUrl: 'https://example.com/songs/dance-the-night.mp3',
    videoUrl: 'https://example.com/videos/dance-the-night.mp4',
    difficulty: ['easy', 'medium', 'hard'],
    tags: ['pop', 'upbeat', 'disco']
  },
  {
    id: 'song2',
    title: 'Levitating',
    artist: 'Dua Lipa ft. DaBaby',
    bpm: 103,
    duration: 203,
    coverUrl: 'https://example.com/covers/levitating.jpg',
    audioUrl: 'https://example.com/songs/levitating.mp3',
    videoUrl: 'https://example.com/videos/levitating.mp4',
    difficulty: ['easy', 'medium', 'hard', 'extreme'],
    tags: ['pop', 'upbeat', 'disco']
  },
  {
    id: 'song3',
    title: 'Physical',
    artist: 'Dua Lipa',
    bpm: 124,
    duration: 183,
    coverUrl: 'https://example.com/covers/physical.jpg',
    audioUrl: 'https://example.com/songs/physical.mp3',
    videoUrl: 'https://example.com/videos/physical.mp4',
    difficulty: ['medium', 'hard', 'extreme'],
    tags: ['pop', 'dance', 'workout']
  }
 ];
 // Mock choreography data with placeholder moves
 const MOCK_CHOREOGRAPHIES: Record<string, Record<DifficultyLevel, Choreography>> = {
  song1: {
    easy: {
      songId: 'song1',
      difficulty: 'easy',
      moves: Array(20).fill(null).map((_, index) => ({
        id: `song1-easy-move-${index}`,
        startTime: index * 8000,
        duration: 4000,
        keyPosePoints: [], // This would contain actual pose landmarks
        difficulty: 'easy',
        score: 100
      }))
    },
    medium: {
      songId: 'song1',
      difficulty: 'medium',
      moves: Array(30).fill(null).map((_, index) => ({
        id: `song1-medium-move-${index}`,
        startTime: index * 6000,
        duration: 3000,
        keyPosePoints: [],
        difficulty: 'medium',
        score: 150
      }))
    },
    hard: {
      songId: 'song1',
      difficulty: 'hard',
      moves: Array(40).fill(null).map((_, index) => ({
        id: `song1-hard-move-${index}`,
        startTime: index * 4000,
        duration: 2000,
        keyPosePoints: [],
        difficulty: 'hard',
        score: 200
      }))
    },
    extreme: {
      songId: 'song1',
      difficulty: 'extreme',
      moves: Array(50).fill(null).map((_, index) => ({
        id: `song1-extreme-move-${index}`,
        startTime: index * 3000,
        duration: 1500,
        keyPosePoints: [],
        difficulty: 'extreme',
        score: 300
      }))
    }
  }
 };
 class SongService {
  /**
   * Get all available songs
   */
  async getSongs(): Promise<Song[]> {
-    // In a real app, this would fetch from an API
+    try {
-    return new Promise((resolve) => {
+      const response = await fetch('/index.json');
-      setTimeout(() => {
+      if (!response.ok) {
-        resolve(MOCK_SONGS);
+        throw new Error(`Failed to fetch songs: ${response.statusText}`);
-      }, 500);
+      }
-    });
+
      const data = await response.json();
      // Transform the data to match our Song interface
      return Object.entries(data.songs).map(([id, songData]: [string, any]) => ({
        id,
        ...songData,
        // Add default difficulty levels since they're not in the JSON
        difficulty: ['medium', 'hard']
      }));
    } catch (error) {
      console.error('Error fetching songs:', error);
      return [];
    }
  }
  /**
   * Get a specific song by ID
   */
  async getSongById(id: string): Promise<Song | null> {
-    return new Promise((resolve) => {
+    try {
-      setTimeout(() => {
+      const songs = await this.getSongs();
-        const song = MOCK_SONGS.find(s => s.id === id) || null;
+      return songs.find(song => song.id === id) || null;
-        resolve(song);
+    } catch (error) {
-      }, 300);
+      console.error(`Error fetching song ${id}:`, error);
-    });
+      return null;
    }
  }
  /**
   * Get choreography for a song at a specific difficulty
   */
  async getChoreography(songId: string, difficulty: DifficultyLevel): Promise<Choreography | null> {
-    return new Promise((resolve) => {
+    try {
-      setTimeout(() => {
+      const song = await this.getSongById(songId);
-        const songChoreographies = MOCK_CHOREOGRAPHIES[songId];
+      if (!song) {
-        if (!songChoreographies) {
+        return null;
-          resolve(null);
+      }
          return;
        }
-        const choreography = songChoreographies[difficulty];
+      // Fetch the moves data from the URL in song.GameData.moves
-        resolve(choreography || null);
+      const response = await fetch(song.GameData.moves);
-      }, 500);
+      if (!response.ok) {
-    });
+        throw new Error(`Failed to fetch choreography: ${response.statusText}`);
      }
      const choreographyData = await response.json();
      // Process the choreography data based on difficulty
      // This would need to be adjusted based on the actual format of the moves data
      return {
        songId,
        difficulty,
        moves: this.processChoreographyData(choreographyData, difficulty)
      };
    } catch (error) {
      console.error(`Error fetching choreography for song ${songId}:`, error);
      return null;
    }
  }
  /**
   * Process choreography data from the JSON file
   * Note: This would need to be adjusted based on the actual data structure
   */
  private processChoreographyData(data: any, difficulty: DifficultyLevel): Move[] {
    // This is a placeholder implementation
    // You'll need to adapt this based on the actual format of your poses.json files
    const moves: Move[] = [];
    // Example implementation assuming data has a moves array
    if (Array.isArray(data.moves)) {
      data.moves.forEach((moveData: any, index: number) => {
        moves.push({
          id: `${difficulty}-move-${index}`,
          startTime: moveData.startTime || index * 3000,
          duration: moveData.duration || 2000,
          keyPosePoints: moveData.keyPoints || [],
          difficulty,
          score: difficulty === 'easy' ? 100 :
                 difficulty === 'medium' ? 150 :
                 difficulty === 'hard' ? 200 : 300
        });
      });
    }
    return moves;
  }
  /**
@@ -144,12 +105,9 @@ class SongService {
    // This would use the pose detection API to analyze a video and generate choreography data
    console.log(`Generating choreography for ${videoUrl}, song ${songId}, difficulty ${difficulty}`);
-    // For now, just return a mock choreography
+    // This is a placeholder - in a real implementation,
-    return new Promise((resolve) => {
+    // this would call an API to process the video and generate choreography
-      setTimeout(() => {
+    return null;
        resolve(MOCK_CHOREOGRAPHIES.song1[difficulty]);
      }, 2000);
    });
  }
 }
--- a/jd-clone/src/types/index.ts
+++ b/jd-clone/src/types/index.ts
@@ -21,17 +21,23 @@ export interface PoseData {
 }
 // Game content types
 export interface SongGameData {
  moves: string;
  video: string;
  audio: string;
  cover: string;
  audio_preview: string;
 }
 export interface Song {
  id: string;
-  title: string;
+  Name: string;
-  artist: string;
+  Artist: string;
-  bpm: number;
+  Album?: string;
-  duration: number;
+  Year?: number;
-  coverUrl: string;
+  Genre?: string;
-  audioUrl: string;
+  GameData: SongGameData;
-  videoUrl: string;
+  difficulty?: DifficultyLevel[];
  difficulty: DifficultyLevel[];
  tags: string[];
 }
 export type DifficultyLevel = 'easy' | 'medium' | 'hard' | 'extreme';
--- a/pose_detector_window.py
+++ b/pose_detector_window.py
@@ -1,5 +1,6 @@
 import argparse
 import json
 import math
 import os
 import time
 import urllib.request
@@ -77,7 +78,66 @@ def download_video(url: str, output_dir: str = "downloaded_videos") -> str:
    print(f"✅ Video downloaded successfully to {output_path}")
    return output_path
-def normalize_landmarks(landmarks: List[Dict], window_size: int = 5, poly_order: int = 4) -> List[Dict]:
+def normalize_landmarks_per_person(people_landmarks: List[Dict], window_size: int = 5, poly_order: int = 4) -> List[Dict]:
    """Normalize landmarks over time for each person using Savitzky-Golay filter"""
    if not people_landmarks:
        return people_landmarks
    # Reorganize by person ID
    person_data = {}
    for frame_data in people_landmarks:
        frame_num = frame_data['frame']
        timestamp = frame_data['timestamp']
        for person in frame_data['people']:
            person_id = person['person_id']
            if person_id not in person_data:
                person_data[person_id] = {
                    'frames': [],
                    'timestamps': [],
                    'landmarks': []
                }
            person_data[person_id]['frames'].append(frame_num)
            person_data[person_id]['timestamps'].append(timestamp)
            person_data[person_id]['landmarks'].append(person['landmarks'])
    # Normalize each person's landmarks
    for person_id, data in person_data.items():
        if len(data['landmarks']) >= window_size:
            data['landmarks'] = normalize_landmarks(
                data['landmarks'],
                window_size=window_size,
                poly_order=poly_order
            )
    # Reconstruct the frame data structure
    normalized_data = []
    for frame_data in people_landmarks:
        frame_num = frame_data['frame']
        timestamp = frame_data['timestamp']
        new_people = []
        for person in frame_data['people']:
            person_id = person['person_id']
            idx = person_data[person_id]['frames'].index(frame_num)
            new_people.append({
                'person_id': person_id,
                'bbox': person['bbox'],
                'landmarks': person_data[person_id]['landmarks'][idx]
            })
        normalized_data.append({
            'frame': frame_num,
            'timestamp': timestamp,
            'people': new_people
        })
    return normalized_data
 def normalize_landmarks(landmarks: List[List[Dict]], window_size: int = 5, poly_order: int = 4) -> List[List[Dict]]:
    """Normalize landmarks over time using Savitzky-Golay filter to smooth motion"""
    if not landmarks or len(landmarks) < window_size:
        return landmarks
@@ -86,6 +146,12 @@ def normalize_landmarks(landmarks: List[Dict], window_size: int = 5, poly_order:
    if window_size % 2 == 0:
        window_size += 1
    # Check if all frames have the same number of landmarks
    if not all(len(frame) == len(landmarks[0]) for frame in landmarks):
        # If inconsistent landmark counts, use a simpler approach (frame by frame smoothing)
        print("⚠️ Warning: Inconsistent landmark counts across frames. Using simplified smoothing.")
        return landmarks
    # Extract x, y values for each landmark
    landmark_count = len(landmarks[0])
    x_values = np.zeros((len(landmarks), landmark_count))
@@ -117,40 +183,226 @@ def normalize_landmarks(landmarks: List[Dict], window_size: int = 5, poly_order:
    return normalized_landmarks
 def calculate_iou(box1, box2):
    """Calculate IoU (Intersection over Union) between two bounding boxes"""
    # Extract coordinates
    x1_1, y1_1, x2_1, y2_1 = box1
    x1_2, y1_2, x2_2, y2_2 = box2
    # Calculate intersection area
    x_left = max(x1_1, x1_2)
    y_top = max(y1_1, y1_2)
    x_right = min(x2_1, x2_2)
    y_bottom = min(y2_1, y2_2)
    if x_right < x_left or y_bottom < y_top:
        return 0.0
    intersection_area = (x_right - x_left) * (y_bottom - y_top)
    # Calculate union area
    box1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
    box2_area = (x2_2 - x1_2) * (y2_2 - y1_2)
    union_area = box1_area + box2_area - intersection_area
    return intersection_area / union_area if union_area > 0 else 0
 def calculate_keypoint_distance(landmarks1, landmarks2):
    """Calculate average distance between corresponding keypoints"""
    if not landmarks1 or not landmarks2:
        return float('inf')
    # Create dictionary for fast lookup
    kps1 = {lm['idx']: (lm['x'], lm['y']) for lm in landmarks1}
    kps2 = {lm['idx']: (lm['x'], lm['y']) for lm in landmarks2}
    # Find common keypoints
    common_idx = set(kps1.keys()) & set(kps2.keys())
    if not common_idx:
        return float('inf')
    # Calculate distance between corresponding keypoints
    total_dist = 0
    for idx in common_idx:
        x1, y1 = kps1[idx]
        x2, y2 = kps2[idx]
        dist = math.sqrt((x1 - x2)**2 + (y1 - y2)**2)
        total_dist += dist
    return total_dist / len(common_idx)
 def assign_person_ids(current_people, previous_people, iou_threshold=0.3, distance_threshold=0.2):
    """Assign stable IDs to people across frames based on IOU and keypoint distance"""
    if not previous_people:
        # First frame, assign new IDs to everyone
        next_id = 0
        for person in current_people:
            person['person_id'] = next_id
            next_id += 1
        return current_people
    # Create copy of current people to modify
    assigned_people = []
    unassigned_current = current_people.copy()
    # Try to match current detections with previous ones
    matched_prev_ids = set()
    # Sort previous people by ID to maintain consistency in matching
    sorted_prev = sorted(previous_people, key=lambda x: x['person_id'])
    for prev_person in sorted_prev:
        prev_id = prev_person['person_id']
        prev_box = prev_person['bbox']
        prev_landmarks = prev_person['landmarks']
        best_match = None
        best_score = float('inf')  # Lower is better for distance
        for curr_person in unassigned_current:
            curr_box = curr_person['bbox']
            curr_landmarks = curr_person['landmarks']
            # Calculate IoU between bounding boxes
            iou = calculate_iou(prev_box, curr_box)
            # Calculate keypoint distance
            kp_dist = calculate_keypoint_distance(prev_landmarks, curr_landmarks)
            # Combined score (lower is better)
            score = kp_dist * (1.5 - iou)  # Favor high IoU and low distance
            if (iou >= iou_threshold or kp_dist <= distance_threshold) and score < best_score:
                best_match = curr_person
                best_score = score
        if best_match:
            # Assign the previous ID to this person
            best_match['person_id'] = prev_id
            matched_prev_ids.add(prev_id)
            assigned_people.append(best_match)
            unassigned_current.remove(best_match)
    # Find the next available ID
    next_id = 0
    existing_ids = {p['person_id'] for p in previous_people}
    while next_id in existing_ids:
        next_id += 1
    # Assign new IDs to unmatched current detections
    for person in unassigned_current:
        person['person_id'] = next_id
        assigned_people.append(person)
        next_id += 1
    return assigned_people
 def compress_pose_data(all_frame_data, frame_sampling=1, precision=3):
    """Compress pose data to reduce JSON file size by reducing precision and sampling frames"""
    compressed_data = []
    # Process only every nth frame based on sampling rate
    for i, frame_data in enumerate(all_frame_data):
        if i % frame_sampling != 0:
            continue
        # Compress frame data
        compressed_frame = {
            'f': frame_data['frame'],  # Short key name
            't': round(frame_data['timestamp'], 2),  # Reduce timestamp precision
            'p': []  # Short key for people
        }
        # Process each person
        for person in frame_data['people']:
            # Only keep essential bbox info (we only need width/height for visualization)
            x1, y1, x2, y2 = person['bbox']
            width = x2 - x1
            height = y2 - y1
            compressed_person = {
                'id': person['person_id'],  # Keep ID as is
                'b': [round(x1, 1), round(y1, 1), round(width, 1), round(height, 1)],  # Simplified bbox with less precision
                'k': []  # Short key for keypoints/landmarks
            }
            # Process each landmark with reduced precision
            for lm in person['landmarks']:
                compressed_person['k'].append([
                    lm['idx'],  # Keep index as is (small integer)
                    round(lm['x'], precision),  # Reduce coordinate precision
                    round(lm['y'], precision),  # Reduce coordinate precision
                    round(lm['confidence'], 2)  # Reduce confidence precision
                ])
            compressed_frame['p'].append(compressed_person)
        compressed_data.append(compressed_frame)
    return compressed_data
 def process_frame(frame: np.ndarray, model, detection_threshold: float = 0.5, show_preview: bool = False):
-    """Process a single frame with YOLOv11-pose"""
+    """Process a single frame with YOLOv11-pose, handling multiple people"""
    # Process with YOLO
    try:
        results = model.predict(frame, verbose=False, conf=detection_threshold)
        # Extract keypoints if available
        landmarks_data = None
        processed_frame = None
        people_data = []
        # Get frame dimensions
        h, w = frame.shape[:2]
        if results and len(results[0].keypoints.data) > 0:
-            # Get keypoints from the first detection
+            # Get all keypoints and bounding boxes
-            keypoints = results[0].keypoints.data[0]  # [17, 3] - (x, y, confidence)
+            keypoints = results[0].keypoints.data  # [num_people, 17, 3] - (x, y, confidence)
            boxes = results[0].boxes.xyxy.cpu()  # [num_people, 4] - (x1, y1, x2, y2)
-            # Extract keypoints to landmarks_data
+            for i, (kps, box) in enumerate(zip(keypoints, boxes)):
-            landmarks_data = []
+                # Extract keypoints to landmarks_data
-            for idx, kp in enumerate(keypoints):
+                landmarks_data = []
-                x, y, conf = kp.tolist()
+                for idx, kp in enumerate(kps):
-                if conf >= detection_threshold:
+                    x, y, conf = kp.tolist()
-                    landmarks_data.append({
+                    if conf >= detection_threshold:
-                        'idx': idx,
+                        landmarks_data.append({
-                        'x': x / w,  # Normalize to 0-1 range
+                            'idx': idx,
-                        'y': y / h,  # Normalize to 0-1 range
+                            'x': round(x / w, 4),  # Normalize to 0-1 range with 4 decimal precision
-                        'confidence': conf
+                            'y': round(y / h, 4),  # Normalize to 0-1 range with 4 decimal precision
                            'confidence': round(conf, 2)  # Reduce confidence to 2 decimal places
                        })
                if landmarks_data:  # Only add if we have valid landmarks
                    # Add bounding box and landmarks for this person
                    people_data.append({
                        'bbox': box.tolist(),  # Store unnormalized for IoU calculation
                        'landmarks': landmarks_data  # Store normalized for consistency
                    })
            # Create visualization if preview is enabled
            if show_preview:
                processed_frame = results[0].plot()
-        return processed_frame, landmarks_data
+                # Add person IDs to the visualization if they're already assigned
                for person in people_data:
                    if 'person_id' in person:
                        # Get center of bounding box
                        x1, y1, x2, y2 = person['bbox']
                        center_x = int((x1 + x2) / 2)
                        center_y = int(y1)  # Top of the bbox
                        # Draw ID text
                        cv2.putText(
                            processed_frame,
                            f"ID: {person['person_id']}",
                            (center_x, center_y - 10),
                            cv2.FONT_HERSHEY_SIMPLEX,
                            0.8,
                            (0, 255, 255),
                            2
                        )
        return processed_frame, people_data
    except RuntimeError as e:
        # Check if this is an NMS backend error
@@ -170,7 +422,9 @@ def run_pose_detection(
    model_size='n',
    device='auto',
    show_preview=True,
-    batch_size=1
+    batch_size=1,
    frame_sampling=1,  # New parameter to control frame sampling rate
    precision=3        # New parameter to control coordinate precision
 ):
    """YOLOv11 pose detection with CUDA acceleration, properly handling NMS issues"""
    start_time = time.time()
@@ -240,12 +494,13 @@ def run_pose_detection(
        window_name = "YOLOv11 Pose"
        cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
-    # Initialize variables for batch processing
+    # Initialize variables for tracking
-    all_landmarks = []
+    all_frame_data = []
    processed_frames = 0
-    frames_buffer = []
+    last_people_data = []
    last_fps_update = time.time()
    current_fps = 0
    total_people_detected = 0
    # Main processing loop
    print("⏳ Processing frames...")
@@ -256,18 +511,23 @@ def run_pose_detection(
        try:
            # Process the frame
-            processed_frame, landmarks_data = process_frame(
+            processed_frame, people_data = process_frame(
                frame, model, detection_threshold, show_preview
            )
-            # Store landmark data with timestamp
+            # Assign stable person IDs
-            if landmarks_data:
+            if people_data:
                people_data = assign_person_ids(people_data, last_people_data)
                last_people_data = people_data.copy()
                # Store frame data with people
                frame_data = {
                    'frame': processed_frames,
                    'timestamp': processed_frames / fps if fps > 0 else time.time() - start_time,
-                    'landmarks': landmarks_data
+                    'people': people_data
                }
-                all_landmarks.append(frame_data)
+                all_frame_data.append(frame_data)
                total_people_detected += len(people_data)
        except RuntimeError as e:
            if str(e) == "CUDA NMS Error":
@@ -295,7 +555,7 @@ def run_pose_detection(
            # Show CUDA status
            cv2.putText(
                processed_frame,
-                f"Device: {model.device} (Full GPU processing)",
+                f"Device: {model.device} | People: {len(people_data) if people_data else 0}",
                (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2
            )
@@ -320,50 +580,62 @@ def run_pose_detection(
    print(f"⏱️ Processed {processed_frames} frames in {elapsed_time:.2f}s ({effective_fps:.2f} fps)")
-    if all_landmarks:
+    if all_frame_data:
-        print(f"🧮 Detected poses in {len(all_landmarks)} frames ({(len(all_landmarks)/max(1, processed_frames))*100:.1f}%)")
+        unique_people = set()
        for frame in all_frame_data:
            for person in frame['people']:
                unique_people.add(person['person_id'])
        print(f"🧮 Detected {len(all_frame_data)} frames with poses ({len(all_frame_data)/max(1, processed_frames)*100:.1f}%)")
        print(f"👥 Detected {len(unique_people)} unique people with {total_people_detected} total detections")
    else:
        print(f"⚠️ No poses detected. Try adjusting detection threshold or check the video content.")
    # Save results if output file is specified
-    if output_file and all_landmarks:
+    if output_file and all_frame_data:
        output_dir = os.path.dirname(output_file)
        if output_dir:
            os.makedirs(output_dir, exist_ok=True)
        # Apply normalization if requested
-        if normalize and len(all_landmarks) > filter_window_size:
+        if normalize and len(all_frame_data) > filter_window_size:
-            print(f"🔄 Normalizing data...")
+            print(f"🔄 Normalizing data for each person...")
-            landmarks_only = [frame_data['landmarks'] for frame_data in all_landmarks]
+            all_frame_data = normalize_landmarks_per_person(
-            normalized_landmarks = normalize_landmarks(
+                all_frame_data,
                landmarks_only,
                window_size=filter_window_size,
                poly_order=filter_poly_order
            )
-            # Put normalized landmarks back
+        # Compress data to reduce file size
-            for i, frame_data in enumerate(all_landmarks):
+        print(f"🗜️ Compressing data (frame sampling: {frame_sampling}, precision: {precision})...")
-                if i < len(normalized_landmarks):
+        compressed_frames = compress_pose_data(all_frame_data, frame_sampling, precision)
-                    all_landmarks[i]['landmarks'] = normalized_landmarks[i]
+        actual_frames_saved = len(compressed_frames)
-        # Create output in compatible format
+        # Calculate compression ratio
        original_frame_count = len(all_frame_data)
        compression_ratio = (original_frame_count - actual_frames_saved) / original_frame_count * 100
        print(f"📊 Compression: {original_frame_count} frames reduced to {actual_frames_saved} ({compression_ratio:.1f}% reduction)")
        # Create output in compatible format with compressed frames
        json_data = {
-            'source': source_name,
+            'src': source_name,  # Shortened key
-            'frame_width': frame_width,
+            'w': frame_width,    # Shortened key
-            'frame_height': frame_height,
+            'h': frame_height,   # Shortened key
            'fps': fps,
-            'total_frames': processed_frames,
+            'frames': processed_frames,
-            'keypoint_names': KEYPOINT_NAMES,
+            'keypoints': KEYPOINT_NAMES,  # More descriptive key
-            'connections': [{'start': c[0], 'end': c[1]} for c in POSE_CONNECTIONS],
+            'connections': [{'s': c[0], 'e': c[1]} for c in POSE_CONNECTIONS],  # Shortened keys
-            'frames': all_landmarks,
+            'data': compressed_frames,  # Use compressed data
-            'metadata': {
+            'meta': {  # Shortened key
                'model': f"YOLOv11-{model_size}-pose",
                'device': str(model.device),
                'normalized': normalize,
-                'detection_threshold': detection_threshold,
+                'threshold': detection_threshold,
-                'filter_window_size': filter_window_size if normalize else None,
+                'filter_size': filter_window_size if normalize else None,
-                'filter_poly_order': filter_poly_order if normalize else None,
+                'filter_order': filter_poly_order if normalize else None,
-                'created_at': time.strftime('%Y-%m-%d %H:%M:%S')
+                'frame_sampling': frame_sampling,
                'precision': precision,
                'created': time.strftime('%Y-%m-%d %H:%M:%S')
            }
        }
@@ -371,7 +643,8 @@ def run_pose_detection(
        with open(output_file, 'w') as f:
            json.dump(json_data, f)
-        print(f"💾 Saved tracking data to {output_file}")
+        file_size_mb = os.path.getsize(output_file) / (1024 * 1024)
        print(f"💾 Saved tracking data to {output_file} ({file_size_mb:.2f} MB)")
    elif output_file:
        print(f"⚠️ No pose data to save. Output file was not created.")
@@ -383,7 +656,7 @@ def run_pose_detection(
    # Restore original NMS function
    torchvision.ops.nms = original_nms
-    return all_landmarks
+    return all_frame_data
 def main():
    # Set up simple argument parser
@@ -411,8 +684,12 @@ def main():
                      help='Window size for smoothing filter (must be odd, larger = smoother)')
    parser.add_argument('--filter-order', type=int, default=4,
                      help='Polynomial order for smoothing filter (1-4)')
-    parser.add_argument('--batch-size', type=int, default=1,
+    parser.add_argument('--batch-size', type=int, default=4,
                      help='Batch size for processing (higher uses more VRAM but can be faster)')
    parser.add_argument('--frame-sampling', type=int, default=2,
                      help='Save only every Nth frame (1=all frames, 2=half, 4=quarter, etc.)')
    parser.add_argument('--precision', type=int, default=3, choices=[2, 3, 4],
                      help='Decimal precision for coordinates (2-4, lower=smaller file)')
    args = parser.parse_args()
@@ -430,6 +707,8 @@ def main():
    print(f"• Device: {args.device}")
    print(f"• Preview: {'Disabled' if args.no_preview else 'Enabled'}")
    print(f"• Normalization: {'Disabled' if args.no_normalize else 'Enabled'}")
    print(f"• Frame sampling: Every {args.frame_sampling} frame(s)")
    print(f"• Coordinate precision: {args.precision} decimal places")
    print("="*50 + "\n")
    # Run pose detection
@@ -444,7 +723,9 @@ def main():
            model_size=args.model,
            device=args.device,
            show_preview=not args.no_preview,
-            batch_size=args.batch_size
+            batch_size=args.batch_size,
            frame_sampling=args.frame_sampling,
            precision=args.precision
        )
    except KeyboardInterrupt:
        print("\n⏹️ Process interrupted by user")
--- a/pose_viewer.py
+++ b/pose_viewer.py
@@ -0,0 +1,436 @@
 import argparse
 import json
 import os
 import sys
 import time
 import urllib.request
 from pathlib import Path
 import cv2
 import numpy as np
 import pygame
 from pygame.locals import *
 # Define colors
 BLACK = (0, 0, 0)
 WHITE = (255, 255, 255)
 RED = (255, 0, 0)
 GREEN = (0, 255, 0)
 BLUE = (0, 0, 255)
 YELLOW = (255, 255, 0)
 CYAN = (0, 255, 255)
 MAGENTA = (255, 0, 255)
 # Define keypoint colors (custom palette)
 KEYPOINT_COLORS = [
    (255, 0, 0),      # nose (red)
    (255, 85, 0),     # left_eye (orange-red)
    (255, 170, 0),    # right_eye (orange)
    (255, 255, 0),    # left_ear (yellow)
    (170, 255, 0),    # right_ear (yellow-green)
    (85, 255, 0),     # left_shoulder (green-yellow)
    (0, 255, 0),      # right_shoulder (green)
    (0, 255, 85),     # left_elbow (green-cyan)
    (0, 255, 170),    # right_elbow (cyan-green)
    (0, 255, 255),    # left_wrist (cyan)
    (0, 170, 255),    # right_wrist (cyan-blue)
    (0, 85, 255),     # left_hip (blue-cyan)
    (0, 0, 255),      # right_hip (blue)
    (85, 0, 255),     # left_knee (blue-purple)
    (170, 0, 255),    # right_knee (purple-blue)
    (255, 0, 255),    # left_ankle (magenta)
    (255, 0, 170)     # right_ankle (magenta-pink)
 ]
 # Person ID colors
 PERSON_COLORS = [
    (255, 0, 0),     # red
    (0, 255, 0),     # green
    (0, 0, 255),     # blue
    (255, 255, 0),   # yellow
    (255, 0, 255),   # magenta
    (0, 255, 255),   # cyan
    (255, 128, 0),   # orange
    (128, 0, 255),   # purple
    (0, 255, 128),   # mint
    (255, 255, 255)  # white
 ]
 def download_video(url, output_dir="downloaded_videos"):
    """Download a video from a URL and return the local file path"""
    os.makedirs(output_dir, exist_ok=True)
    video_name = os.path.basename(url).split("?")[0]
    if not video_name or "." not in video_name:
        video_name = f"video_{int(time.time())}.mp4"
    output_path = os.path.join(output_dir, video_name)
    if os.path.exists(output_path):
        print(f"✅ Video already downloaded: {output_path}")
        return output_path
    print(f"⬇️ Downloading video from {url} to {output_path}...")
    urllib.request.urlretrieve(url, output_path)
    print(f"✅ Video downloaded successfully to {output_path}")
    return output_path
 def load_pose_data(json_file):
    """Load pose data from a JSON file"""
    print(f"📂 Loading pose data from {json_file}...")
    with open(json_file, 'r') as f:
        data = json.load(f)
    # Extract metadata
    width = data.get('w', 1280)
    height = data.get('h', 720)
    fps = data.get('fps', 30)
    total_frames = data.get('frames', 0)
    # Get frame sampling and precision from metadata if available
    metadata = data.get('meta', {})
    frame_sampling = metadata.get('frame_sampling', 1)
    precision = metadata.get('precision', 3)
    # Extract connections
    connections = []
    for conn in data.get('connections', []):
        start = conn.get('s', 0)
        end = conn.get('e', 0)
        connections.append((start, end))
    # Extract keypoint names
    keypoint_names = data.get('keypoints', [])
    # Extract frame data
    frames = data.get('data', [])
    print(f"✅ Loaded {len(frames)} frames of pose data")
    print(f"📊 Video: {width}x{height}@{fps}fps, {total_frames} total frames")
    print(f"🔍 Frame sampling: {frame_sampling}, Precision: {precision}")
    return {
        'width': width,
        'height': height,
        'fps': fps,
        'total_frames': total_frames,
        'frame_sampling': frame_sampling,
        'precision': precision,
        'connections': connections,
        'keypoint_names': keypoint_names,
        'frames': frames
    }
 def create_pygame_window(width, height, title="Pose Viewer"):
    """Create a PyGame window"""
    pygame.init()
    window = pygame.display.set_mode((width, height))
    pygame.display.set_caption(title)
    return window
 def draw_pose(frame, pose_data, frame_idx, original_width, original_height, prev_frame_idx=None):
    """Draw pose data on a given frame"""
    # Clone the frame to avoid modifying the original
    pose_frame = frame.copy()
    # Find the closest pose frame to the current video frame (should be first and only in temp_pose_data)
    if pose_data['frames']:
        closest_frame = pose_data['frames'][0]
        connections = pose_data['connections']
        # Draw each person
        for person_idx, person in enumerate(closest_frame['p']):
            person_id = person['id']
            person_color = PERSON_COLORS[person_id % len(PERSON_COLORS)]
            # Get keypoints
            keypoints = person['k']
            # Create a dictionary to store keypoints by index
            kp_dict = {}
            for kp in keypoints:
                kp_dict[kp[0]] = (
                    int(kp[1] * original_width),
                    int(kp[2] * original_height),
                    kp[3]
                )
            # Draw connections
            for conn in connections:
                if conn[0] in kp_dict and conn[1] in kp_dict:
                    start_point = kp_dict[conn[0]][:2]
                    end_point = kp_dict[conn[1]][:2]
                    # Use average confidence to determine line thickness
                    avg_conf = (kp_dict[conn[0]][2] + kp_dict[conn[1]][2]) / 2
                    thickness = int(avg_conf * 3) + 1
                    cv2.line(pose_frame, start_point, end_point, person_color, thickness)
            # Draw keypoints
            for kp_idx, (x, y, conf) in kp_dict.items():
                # Circle size based on confidence
                radius = int(conf * 5) + 2
                cv2.circle(pose_frame, (x, y), radius, KEYPOINT_COLORS[kp_idx % len(KEYPOINT_COLORS)], -1)
            # Draw person ID
            bbox = person['b']
            x, y = int(bbox[0]), int(bbox[1])
            cv2.putText(
                pose_frame,
                f"ID: {person_id}",
                (x, y - 10),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                person_color,
                2
            )
    return pose_frame
 def draw_ui_controls(surface, width, height, playing, current_frame, total_frames):
    """Draw UI controls on the PyGame surface"""
    # Background for controls
    control_height = 50
    control_surface = pygame.Surface((width, control_height))
    control_surface.fill(BLACK)
    # Draw play/pause button
    button_width = 80
    button_height = 30
    button_x = 20
    button_y = (control_height - button_height) // 2
    pygame.draw.rect(control_surface, BLUE, (button_x, button_y, button_width, button_height))
    font = pygame.font.SysFont(None, 24)
    text = font.render("Pause" if playing else "Play", True, WHITE)
    text_rect = text.get_rect(center=(button_x + button_width//2, button_y + button_height//2))
    control_surface.blit(text, text_rect)
    # Draw stop button
    stop_button_x = button_x + button_width + 20
    pygame.draw.rect(control_surface, RED, (stop_button_x, button_y, button_width, button_height))
    stop_text = font.render("Stop", True, WHITE)
    stop_text_rect = stop_text.get_rect(center=(stop_button_x + button_width//2, button_y + button_height//2))
    control_surface.blit(stop_text, stop_text_rect)
    # Draw seek bar
    seekbar_x = stop_button_x + button_width + 40
    seekbar_y = button_y + button_height // 2
    seekbar_width = width - seekbar_x - 40
    seekbar_height = 10
    # Background bar
    pygame.draw.rect(control_surface, (100, 100, 100),
                    (seekbar_x, seekbar_y - seekbar_height//2, seekbar_width, seekbar_height))
    # Progress bar
    progress = current_frame / total_frames if total_frames > 0 else 0
    progress_width = int(seekbar_width * progress)
    pygame.draw.rect(control_surface, GREEN,
                    (seekbar_x, seekbar_y - seekbar_height//2, progress_width, seekbar_height))
    # Display current time / total time
    time_text = font.render(f"Frame: {current_frame} / {total_frames}", True, WHITE)
    time_rect = time_text.get_rect(center=(seekbar_x + seekbar_width//2, seekbar_y - 20))
    control_surface.blit(time_text, time_rect)
    # Blit the control surface to the main surface
    surface.blit(control_surface, (0, height - control_height))
    # Return button regions for click handling
    play_button_rect = pygame.Rect(button_x, height - control_height + button_y, button_width, button_height)
    stop_button_rect = pygame.Rect(stop_button_x, height - control_height + button_y, button_width, button_height)
    seekbar_rect = pygame.Rect(seekbar_x, height - control_height + seekbar_y - seekbar_height//2,
                              seekbar_width, seekbar_height)
    return play_button_rect, stop_button_rect, seekbar_rect
 def run_viewer(video_path, json_path):
    """Main function to run the pose viewer"""
    # Load pose data
    pose_data = load_pose_data(json_path)
    # Open video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"❌ Error: Could not open video {video_path}")
        return
    # Get video properties
    video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    # Create window - now only showing single visualization
    window_width = video_width
    window_height = video_height + 50  # Additional space for controls
    window = create_pygame_window(window_width, window_height, f"Pose Viewer - {os.path.basename(video_path)}")
    # Setup clock
    clock = pygame.time.Clock()
    # State variables
    playing = False
    current_frame = 0
    prev_frame_idx = None
    last_valid_pose_frame = None
    # Create a frame lookup for fast access - maps video frame number to pose data frame
    frame_lookup = {}
    max_pose_frame = 0
    for pose_frame in pose_data['frames']:
        frame_num = pose_frame['f']
        frame_lookup[frame_num] = pose_frame
        max_pose_frame = max(max_pose_frame, frame_num)
    # Initial render
    ret, frame = cap.read()
    if not ret:
        print("❌ Error: Could not read the first frame")
        return
    # Main loop
    while True:
        # Handle events
        for event in pygame.event.get():
            if event.type == QUIT:
                pygame.quit()
                sys.exit()
            elif event.type == KEYDOWN:
                if event.key == K_ESCAPE:
                    pygame.quit()
                    sys.exit()
                elif event.key == K_SPACE:
                    playing = not playing
            elif event.type == MOUSEBUTTONDOWN:
                # Check if any buttons were clicked
                mouse_pos = pygame.mouse.get_pos()
                play_button_rect, stop_button_rect, seekbar_rect = draw_ui_controls(
                    window, window_width, window_height, playing, current_frame, total_frames
                )
                if play_button_rect.collidepoint(mouse_pos):
                    playing = not playing
                elif stop_button_rect.collidepoint(mouse_pos):
                    playing = False
                    current_frame = 0
                    cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
                    ret, frame = cap.read()
                    prev_frame_idx = None
                    last_valid_pose_frame = None
                elif seekbar_rect.collidepoint(mouse_pos):
                    # Calculate position ratio
                    x_offset = mouse_pos[0] - seekbar_rect.x
                    ratio = x_offset / seekbar_rect.width
                    # Set frame position
                    current_frame = int(ratio * total_frames)
                    cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
                    ret, frame = cap.read()
                    prev_frame_idx = None  # Reset previous frame index after seeking
        # Handle playback
        if playing:
            ret, frame = cap.read()
            if not ret:
                # End of video, loop back to start
                playing = False
                current_frame = 0
                cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
                ret, frame = cap.read()
                prev_frame_idx = None
                last_valid_pose_frame = None
                if not ret:
                    break
            current_frame += 1
        # Find the appropriate pose frame for the current video frame
        frame_sampling = pose_data['frame_sampling']
        # Try to find the exact frame in lookup
        pose_frame = frame_lookup.get(current_frame)
        # If not found, find the closest previous frame based on sampling
        if not pose_frame:
            # Calculate what the nearest pose frame should be
            # This searches for the most recent pose frame
            nearest_frame = current_frame
            while nearest_frame > 0 and nearest_frame not in frame_lookup:
                nearest_frame -= 1
            if nearest_frame in frame_lookup:
                pose_frame = frame_lookup[nearest_frame]
        # Update the last valid pose frame if we found one
        if pose_frame:
            last_valid_pose_frame = pose_frame
        # Draw pose on frame - use the most recent valid pose frame
        if last_valid_pose_frame:
            # Create a special frame dict with only the current pose for draw_pose
            temp_pose_data = pose_data.copy()
            temp_pose_data['frames'] = [last_valid_pose_frame]
            pose_frame = draw_pose(frame, temp_pose_data, current_frame, video_width, video_height)
        else:
            # If no pose data found yet, just show the original frame
            pose_frame = frame.copy()
        prev_frame_idx = current_frame
        # Convert frame from BGR to RGB for PyGame
        rgb_pose_frame = cv2.cvtColor(pose_frame, cv2.COLOR_BGR2RGB)
        pygame_pose_frame = pygame.surfarray.make_surface(rgb_pose_frame.swapaxes(0, 1))
        # Draw frame
        window.blit(pygame_pose_frame, (0, 0))
        # Draw UI controls
        play_button_rect, stop_button_rect, seekbar_rect = draw_ui_controls(
            window, window_width, window_height, playing, current_frame, total_frames
        )
        # Draw metadata
        font = pygame.font.SysFont(None, 20)
        metadata_text = f"Frame Sampling: {pose_data['frame_sampling']}, Precision: {pose_data['precision']}"
        metadata_surface = font.render(metadata_text, True, WHITE)
        window.blit(metadata_surface, (10, 10))
        # Update display
        pygame.display.flip()
        # Cap framerate
        clock.tick(fps)
    # Clean up
    cap.release()
    pygame.quit()
 def main():
    parser = argparse.ArgumentParser(description='Pose Viewer for JSON pose data with video')
    parser.add_argument('--video', '-v', required=True, help='Video file path or URL')
    parser.add_argument('--json', '-j', required=True, help='JSON pose data file path')
    args = parser.parse_args()
    # Handle URL input for video
    video_path = args.video
    if video_path.startswith('http://') or video_path.startswith('https://'):
        video_path = download_video(video_path)
    if not os.path.exists(video_path):
        print(f"❌ Error: Video file not found: {video_path}")
        return
    if not os.path.exists(args.json):
        print(f"❌ Error: JSON file not found: {args.json}")
        return
    # Run the viewer
    run_viewer(video_path, args.json)
 if __name__ == "__main__":
    main()
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,4 +6,5 @@ flask-cors>=3.0.10
 numpy>=1.19.0
 scipy>=1.7.0
 pillow>=9.0.0
-mediapipe>=0.8.9
+mediapipe>=0.8.9
 pygame>=2.0.0