Spaces:

microsoft
/

VITRA

Running on Zero

File size: 6,534 Bytes

aae3ba1

# Utils for video processing, frame manipulation, and visualization.
import cv2
import numpy as np
from typing import List
import imageio

def rotate_frame(frame: np.ndarray) -> np.ndarray:
    """Rotate a frame 90 degrees clockwise."""
    return cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)

def center_crop_image(frame: np.ndarray, crop_percent: float = 1.0) -> np.ndarray:
    """
    Center crop an image to a specified percentage of its original size.

    Args:
        frame (np.ndarray): Input image.
        crop_percent (float): Percentage of original size to crop to (0 < crop_percent <= 1).

    Returns:
        np.ndarray: Cropped image.
    """
    if crop_percent == 1.0:  
        return frame

    # Get original dimensions  
    original_height, original_width = frame.shape[:2]  
    
    # Calculate new dimensions  
    new_width = int(original_width * crop_percent)  
    new_height = int(original_height * crop_percent)  
    
    # Calculate top-left corner of the cropping box  
    start_x = (original_width - new_width) // 2  
    start_y = (original_height - new_height) // 2  
    
    # Perform the crop  
    cropped_frame = frame[start_y:start_y + new_height, start_x:start_x + new_width]  

    return cropped_frame

def read_video_frames(
    cap,
    start_frame: int = None,
    end_frame: int = None,
    interval: int = 1,
    rotate: bool = False,
    crop_percent: float = 1.0
    ) -> List[np.ndarray]:
    """
    Read frames from a video capture object with optional rotation and cropping.

    Args:
        cap: OpenCV VideoCapture object.
        start_frame (int): Starting frame index.
        end_frame (int): Ending frame index.
        interval (int): Frame interval for sampling.
        rotate (bool): Whether to rotate frames 90 degrees clockwise.
        crop_percent (float): Center crop percentage.

    Returns:
        List[np.ndarray]: List of frames.
    """
    frame_count = 0  
    frame_list = []

    # If a start frame is specified, move the video pointer
    if start_frame is not None:
        cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame) 
        frame_count += start_frame
    
    # If no end frame is specified, read until the end of the video
    if end_frame is None:
        end_frame = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Read frames in a loop
    while frame_count < end_frame:  
        ret, frame = cap.read()  
        if not ret:  
            break  

        # Process frames at the specified interval
        if frame_count % interval == 0:

            # Optionally rotate the frame
            if rotate:
                frame = rotate_frame(frame)
            
            # Optionally center crop the frame
            frame = center_crop_image(frame, crop_percent=crop_percent)
            frame_list.append(frame) 

        frame_count += 1  

    return frame_list

def save_to_video(frames: List[np.ndarray], output_path: str, fps: int = 30):
    """
    Save a list of frames to a video file.

    Args:
        frames (List[np.ndarray]): List of frames.
        output_path (str): Output video path.
        fps (int): Frames per second.
    """
    imageio.mimsave(output_path, frames, fps=fps, codec='libx264')

def resize_frames_to_long_side(frames: List[np.ndarray], target_long_side: int) -> List[np.ndarray]:
    """
    Resize frames so the longer side matches the target size, preserving aspect ratio.

    Args:
        frames (List[np.ndarray]): List of frames.
        target_long_side (int): Desired length of the longer side.

    Returns:
        List[np.ndarray]: Resized frames.
    """

    # If target_long_side is None, return original frames without resizing
    if target_long_side is None:
        return frames
    
    resized_frames = []  
    # Loop over each frame
    for frame in frames:  
        height, width = frame.shape[:2]  
          
        # Determine the scaling factor and new dimensions  
        if width > height:  
            scale_factor = target_long_side / width  
        else:  
            scale_factor = target_long_side / height  
          
        new_width = int(width * scale_factor)  
        new_height = int(height * scale_factor)  
          
        # Resize the frame while maintaining the aspect ratio  
        resized_frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_AREA)  
        resized_frames.append(resized_frame)  
      
    return resized_frames  

def sample_frames_evenly(video_frames: List[np.ndarray], num_frames: int) -> List[np.ndarray]:
    """
    Sample frames evenly from a video sequence.

    Args:
        video_frames (List[np.ndarray]): List of frames.
        num_frames (int): Number of frames to sample.

    Returns:
        List[np.ndarray]: Sampled frames.
    """
    total = len(video_frames)
    if num_frames >= total:
        return video_frames.copy() 

    indices = np.linspace(0, total - 1, num=num_frames, dtype=int)
    return [video_frames[i] for i in indices]

def wrap_text(text: str, max_width: int, font, font_scale: float) -> List[str]:
    """  
    Wraps text to fit within a given width.  
    
    Args:  
        text (str): The text to wrap.  
        max_width (int): The maximum width in pixels.  
        font: The font to use.  
        font_scale (float): The scale of the font.  
    
    Returns:  
        List of lines of wrapped text.  
    """  
    words = text.split(' ')  
    lines = []  
    current_line = ''  
    
    for word in words:  
        test_line = current_line + word + ' '  
        # Measure the width of the line  
        size = cv2.getTextSize(test_line, font, font_scale, 1)[0]  
        if size[0] > max_width:  
            lines.append(current_line.strip())  
            current_line = word + ' '  
        else:  
            current_line = test_line  
    
    if current_line:  
        lines.append(current_line.strip())  
    
    return lines  

def add_overlay_text(frame: np.ndarray, caption: str) -> np.ndarray:
    """
    Add overlay text to a frame.

    Args:
        frame (np.ndarray): Input image.
        caption (str): Text to overlay.

    Returns:
        np.ndarray: Frame with text.
    """

    w = frame.shape[1]  # Width of the frame
    y0, dy = 30, 20  # Starting Y position and line height  
    for i, line in enumerate(wrap_text(caption, w, cv2.FONT_HERSHEY_SIMPLEX, 1.0)):  
        y = y0 + i * dy  
        cv2.putText(frame, line, (10, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)  
    return frame