import argparse import json import os import sys import time import urllib.request from pathlib import Path import cv2 import numpy as np import pygame from pygame.locals import * # Define colors BLACK = (0, 0, 0) WHITE = (255, 255, 255) RED = (255, 0, 0) GREEN = (0, 255, 0) BLUE = (0, 0, 255) YELLOW = (255, 255, 0) CYAN = (0, 255, 255) MAGENTA = (255, 0, 255) # Define keypoint colors (custom palette) KEYPOINT_COLORS = [ (255, 0, 0), # nose (red) (255, 85, 0), # left_eye (orange-red) (255, 170, 0), # right_eye (orange) (255, 255, 0), # left_ear (yellow) (170, 255, 0), # right_ear (yellow-green) (85, 255, 0), # left_shoulder (green-yellow) (0, 255, 0), # right_shoulder (green) (0, 255, 85), # left_elbow (green-cyan) (0, 255, 170), # right_elbow (cyan-green) (0, 255, 255), # left_wrist (cyan) (0, 170, 255), # right_wrist (cyan-blue) (0, 85, 255), # left_hip (blue-cyan) (0, 0, 255), # right_hip (blue) (85, 0, 255), # left_knee (blue-purple) (170, 0, 255), # right_knee (purple-blue) (255, 0, 255), # left_ankle (magenta) (255, 0, 170) # right_ankle (magenta-pink) ] # Person ID colors PERSON_COLORS = [ (255, 0, 0), # red (0, 255, 0), # green (0, 0, 255), # blue (255, 255, 0), # yellow (255, 0, 255), # magenta (0, 255, 255), # cyan (255, 128, 0), # orange (128, 0, 255), # purple (0, 255, 128), # mint (255, 255, 255) # white ] def download_video(url, output_dir="downloaded_videos"): """Download a video from a URL and return the local file path""" os.makedirs(output_dir, exist_ok=True) video_name = os.path.basename(url).split("?")[0] if not video_name or "." not in video_name: video_name = f"video_{int(time.time())}.mp4" output_path = os.path.join(output_dir, video_name) if os.path.exists(output_path): print(f"✅ Video already downloaded: {output_path}") return output_path print(f"⬇️ Downloading video from {url} to {output_path}...") urllib.request.urlretrieve(url, output_path) print(f"✅ Video downloaded successfully to {output_path}") return output_path def load_pose_data(json_file): """Load pose data from a JSON file""" print(f"📂 Loading pose data from {json_file}...") with open(json_file, 'r') as f: data = json.load(f) # Extract metadata width = data.get('w', 1280) height = data.get('h', 720) fps = data.get('fps', 30) total_frames = data.get('frames', 0) # Get frame sampling and precision from metadata if available metadata = data.get('meta', {}) frame_sampling = metadata.get('frame_sampling', 1) precision = metadata.get('precision', 3) # Extract connections connections = [] for conn in data.get('connections', []): start = conn.get('s', 0) end = conn.get('e', 0) connections.append((start, end)) # Extract keypoint names keypoint_names = data.get('keypoints', []) # Extract frame data frames = data.get('data', []) print(f"✅ Loaded {len(frames)} frames of pose data") print(f"📊 Video: {width}x{height}@{fps}fps, {total_frames} total frames") print(f"🔍 Frame sampling: {frame_sampling}, Precision: {precision}") return { 'width': width, 'height': height, 'fps': fps, 'total_frames': total_frames, 'frame_sampling': frame_sampling, 'precision': precision, 'connections': connections, 'keypoint_names': keypoint_names, 'frames': frames } def create_pygame_window(width, height, title="Pose Viewer"): """Create a PyGame window""" pygame.init() window = pygame.display.set_mode((width, height)) pygame.display.set_caption(title) return window def draw_pose(frame, pose_data, frame_idx, original_width, original_height, prev_frame_idx=None): """Draw pose data on a given frame""" # Clone the frame to avoid modifying the original pose_frame = frame.copy() # Find the closest pose frame to the current video frame (should be first and only in temp_pose_data) if pose_data['frames']: closest_frame = pose_data['frames'][0] connections = pose_data['connections'] # Draw each person for person_idx, person in enumerate(closest_frame['p']): person_id = person['id'] person_color = PERSON_COLORS[person_id % len(PERSON_COLORS)] # Get keypoints keypoints = person['k'] # Create a dictionary to store keypoints by index kp_dict = {} for kp in keypoints: kp_dict[kp[0]] = ( int(kp[1] * original_width), int(kp[2] * original_height), kp[3] ) # Draw connections for conn in connections: if conn[0] in kp_dict and conn[1] in kp_dict: start_point = kp_dict[conn[0]][:2] end_point = kp_dict[conn[1]][:2] # Use average confidence to determine line thickness avg_conf = (kp_dict[conn[0]][2] + kp_dict[conn[1]][2]) / 2 thickness = int(avg_conf * 3) + 1 cv2.line(pose_frame, start_point, end_point, person_color, thickness) # Draw keypoints for kp_idx, (x, y, conf) in kp_dict.items(): # Circle size based on confidence radius = int(conf * 5) + 2 cv2.circle(pose_frame, (x, y), radius, KEYPOINT_COLORS[kp_idx % len(KEYPOINT_COLORS)], -1) # Draw person ID bbox = person['b'] x, y = int(bbox[0]), int(bbox[1]) cv2.putText( pose_frame, f"ID: {person_id}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, person_color, 2 ) return pose_frame def draw_ui_controls(surface, width, height, playing, current_frame, total_frames): """Draw UI controls on the PyGame surface""" # Background for controls control_height = 50 control_surface = pygame.Surface((width, control_height)) control_surface.fill(BLACK) # Draw play/pause button button_width = 80 button_height = 30 button_x = 20 button_y = (control_height - button_height) // 2 pygame.draw.rect(control_surface, BLUE, (button_x, button_y, button_width, button_height)) font = pygame.font.SysFont(None, 24) text = font.render("Pause" if playing else "Play", True, WHITE) text_rect = text.get_rect(center=(button_x + button_width//2, button_y + button_height//2)) control_surface.blit(text, text_rect) # Draw stop button stop_button_x = button_x + button_width + 20 pygame.draw.rect(control_surface, RED, (stop_button_x, button_y, button_width, button_height)) stop_text = font.render("Stop", True, WHITE) stop_text_rect = stop_text.get_rect(center=(stop_button_x + button_width//2, button_y + button_height//2)) control_surface.blit(stop_text, stop_text_rect) # Draw seek bar seekbar_x = stop_button_x + button_width + 40 seekbar_y = button_y + button_height // 2 seekbar_width = width - seekbar_x - 40 seekbar_height = 10 # Background bar pygame.draw.rect(control_surface, (100, 100, 100), (seekbar_x, seekbar_y - seekbar_height//2, seekbar_width, seekbar_height)) # Progress bar progress = current_frame / total_frames if total_frames > 0 else 0 progress_width = int(seekbar_width * progress) pygame.draw.rect(control_surface, GREEN, (seekbar_x, seekbar_y - seekbar_height//2, progress_width, seekbar_height)) # Display current time / total time time_text = font.render(f"Frame: {current_frame} / {total_frames}", True, WHITE) time_rect = time_text.get_rect(center=(seekbar_x + seekbar_width//2, seekbar_y - 20)) control_surface.blit(time_text, time_rect) # Blit the control surface to the main surface surface.blit(control_surface, (0, height - control_height)) # Return button regions for click handling play_button_rect = pygame.Rect(button_x, height - control_height + button_y, button_width, button_height) stop_button_rect = pygame.Rect(stop_button_x, height - control_height + button_y, button_width, button_height) seekbar_rect = pygame.Rect(seekbar_x, height - control_height + seekbar_y - seekbar_height//2, seekbar_width, seekbar_height) return play_button_rect, stop_button_rect, seekbar_rect def run_viewer(video_path, json_path): """Main function to run the pose viewer""" # Load pose data pose_data = load_pose_data(json_path) # Open video cap = cv2.VideoCapture(video_path) if not cap.isOpened(): print(f"❌ Error: Could not open video {video_path}") return # Get video properties video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # Create window - now only showing single visualization window_width = video_width window_height = video_height + 50 # Additional space for controls window = create_pygame_window(window_width, window_height, f"Pose Viewer - {os.path.basename(video_path)}") # Setup clock clock = pygame.time.Clock() # State variables playing = False current_frame = 0 prev_frame_idx = None last_valid_pose_frame = None # Create a frame lookup for fast access - maps video frame number to pose data frame frame_lookup = {} max_pose_frame = 0 for pose_frame in pose_data['frames']: frame_num = pose_frame['f'] frame_lookup[frame_num] = pose_frame max_pose_frame = max(max_pose_frame, frame_num) # Initial render ret, frame = cap.read() if not ret: print("❌ Error: Could not read the first frame") return # Main loop while True: # Handle events for event in pygame.event.get(): if event.type == QUIT: pygame.quit() sys.exit() elif event.type == KEYDOWN: if event.key == K_ESCAPE: pygame.quit() sys.exit() elif event.key == K_SPACE: playing = not playing elif event.type == MOUSEBUTTONDOWN: # Check if any buttons were clicked mouse_pos = pygame.mouse.get_pos() play_button_rect, stop_button_rect, seekbar_rect = draw_ui_controls( window, window_width, window_height, playing, current_frame, total_frames ) if play_button_rect.collidepoint(mouse_pos): playing = not playing elif stop_button_rect.collidepoint(mouse_pos): playing = False current_frame = 0 cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame) ret, frame = cap.read() prev_frame_idx = None last_valid_pose_frame = None elif seekbar_rect.collidepoint(mouse_pos): # Calculate position ratio x_offset = mouse_pos[0] - seekbar_rect.x ratio = x_offset / seekbar_rect.width # Set frame position current_frame = int(ratio * total_frames) cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame) ret, frame = cap.read() prev_frame_idx = None # Reset previous frame index after seeking # Handle playback if playing: ret, frame = cap.read() if not ret: # End of video, loop back to start playing = False current_frame = 0 cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame) ret, frame = cap.read() prev_frame_idx = None last_valid_pose_frame = None if not ret: break current_frame += 1 # Find the appropriate pose frame for the current video frame frame_sampling = pose_data['frame_sampling'] # Try to find the exact frame in lookup pose_frame = frame_lookup.get(current_frame) # If not found, find the closest previous frame based on sampling if not pose_frame: # Calculate what the nearest pose frame should be # This searches for the most recent pose frame nearest_frame = current_frame while nearest_frame > 0 and nearest_frame not in frame_lookup: nearest_frame -= 1 if nearest_frame in frame_lookup: pose_frame = frame_lookup[nearest_frame] # Update the last valid pose frame if we found one if pose_frame: last_valid_pose_frame = pose_frame # Draw pose on frame - use the most recent valid pose frame if last_valid_pose_frame: # Create a special frame dict with only the current pose for draw_pose temp_pose_data = pose_data.copy() temp_pose_data['frames'] = [last_valid_pose_frame] pose_frame = draw_pose(frame, temp_pose_data, current_frame, video_width, video_height) else: # If no pose data found yet, just show the original frame pose_frame = frame.copy() prev_frame_idx = current_frame # Convert frame from BGR to RGB for PyGame rgb_pose_frame = cv2.cvtColor(pose_frame, cv2.COLOR_BGR2RGB) pygame_pose_frame = pygame.surfarray.make_surface(rgb_pose_frame.swapaxes(0, 1)) # Draw frame window.blit(pygame_pose_frame, (0, 0)) # Draw UI controls play_button_rect, stop_button_rect, seekbar_rect = draw_ui_controls( window, window_width, window_height, playing, current_frame, total_frames ) # Draw metadata font = pygame.font.SysFont(None, 20) metadata_text = f"Frame Sampling: {pose_data['frame_sampling']}, Precision: {pose_data['precision']}" metadata_surface = font.render(metadata_text, True, WHITE) window.blit(metadata_surface, (10, 10)) # Update display pygame.display.flip() # Cap framerate clock.tick(fps) # Clean up cap.release() pygame.quit() def main(): parser = argparse.ArgumentParser(description='Pose Viewer for JSON pose data with video') parser.add_argument('--video', '-v', required=True, help='Video file path or URL') parser.add_argument('--json', '-j', required=True, help='JSON pose data file path') args = parser.parse_args() # Handle URL input for video video_path = args.video if video_path.startswith('http://') or video_path.startswith('https://'): video_path = download_video(video_path) if not os.path.exists(video_path): print(f"❌ Error: Video file not found: {video_path}") return if not os.path.exists(args.json): print(f"❌ Error: JSON file not found: {args.json}") return # Run the viewer run_viewer(video_path, args.json) if __name__ == "__main__": main()