Skip to content

Commit

Permalink
Added Tutorial Hvass-Labs#9
Browse files Browse the repository at this point in the history
  • Loading branch information
Hvass-Labs committed Oct 7, 2016
1 parent 21d8b23 commit aa6b31e
Show file tree
Hide file tree
Showing 7 changed files with 4,513 additions and 0 deletions.
2,232 changes: 2,232 additions & 0 deletions 09_Video_Data.ipynb

Large diffs are not rendered by default.

156 changes: 156 additions & 0 deletions cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
########################################################################
#
# Cache-wrapper for a function or class.
#
# Save the result of calling a function or creating an object-instance
# to harddisk. This is used to persist the data so it can be reloaded
# very quickly and easily.
#
# Implemented in Python 3.5
#
########################################################################
#
# This file is part of the TensorFlow Tutorials available at:
#
# https://github.com/Hvass-Labs/TensorFlow-Tutorials
#
# Published under the MIT License. See the file LICENSE for details.
#
# Copyright 2016 by Magnus Erik Hvass Pedersen
#
########################################################################

import os
import pickle
import numpy as np

########################################################################


def cache(cache_path, fn, *args, **kwargs):
"""
Cache-wrapper for a function or class. If the cache-file exists
then the data is reloaded and returned, otherwise the function
is called and the result is saved to cache. The fn-argument can
also be a class instead, in which case an object-instance is
created and saved to the cache-file.
:param cache_path:
File-path for the cache-file.
:param fn:
Function or class to be called.
:param args:
Arguments to the function or class-init.
:param kwargs:
Keyword arguments to the function or class-init.
:return:
The result of calling the function or creating the object-instance.
"""

# If the cache-file exists.
if os.path.exists(cache_path):
# Load the cached data from the file.
with open(cache_path, mode='rb') as file:
obj = pickle.load(file)

print("- Data loaded from cache-file: " + cache_path)
else:
# The cache-file does not exist.

# Call the function / class-init with the supplied arguments.
obj = fn(*args, **kwargs)

# Save the data to a cache-file.
with open(cache_path, mode='wb') as file:
pickle.dump(obj, file)

print("- Data saved to cache-file: " + cache_path)

return obj


########################################################################


def convert_numpy2pickle(in_path, out_path):
"""
Convert a numpy-file to pickle-file.
The first version of the cache-function used numpy for saving the data.
Instead of re-calculating all the data, you can just convert the
cache-file using this function.
:param in_path:
Input file in numpy-format written using numpy.save().
:param out_path:
Output file written as a pickle-file.
:return:
Nothing.
"""

# Load the data using numpy.
data = np.load(in_path)

# Save the data using pickle.
with open(out_path, mode='wb') as file:
pickle.dump(data, file)


########################################################################

if __name__ == '__main__':
# This is a short example of using a cache-file.

# This is the function that will only get called if the result
# is not already saved in the cache-file. This would normally
# be a function that takes a long time to compute, or if you
# need persistent data for some other reason.
def expensive_function(a, b):
return a * b

print('Computing expensive_function() ...')

# Either load the result from a cache-file if it already exists,
# otherwise calculate expensive_function(a=123, b=456) and
# save the result to the cache-file for next time.
result = cache(cache_path='cache_expensive_function.pkl',
fn=expensive_function, a=123, b=456)

print('result =', result)

# Newline.
print()

# This is another example which saves an object to a cache-file.

# We want to cache an object-instance of this class.
# The motivation is to do an expensive computation only once,
# or if we need to persist the data for some other reason.
class ExpensiveClass:
def __init__(self, c, d):
self.c = c
self.d = d
self.result = c * d

def print_result(self):
print('c =', self.c)
print('d =', self.d)
print('result = c * d =', self.result)

print('Creating object from ExpensiveClass() ...')

# Either load the object from a cache-file if it already exists,
# otherwise make an object-instance ExpensiveClass(c=123, d=456)
# and save the object to the cache-file for the next time.
obj = cache(cache_path='cache_ExpensiveClass.pkl',
fn=ExpensiveClass, c=123, d=456)

obj.print_result()

########################################################################
197 changes: 197 additions & 0 deletions convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
#!/usr/bin/python

########################################################################
#
# Function and script for converting videos to images.
#
# This can be run as a script in a Linux shell by typing:
#
# python convert.py
#
# Or by running:
#
# chmod +x convert.py
# ./convert.py
#
# Requires the program avconv to be installed.
# Tested with avconv v. 9.18-6 on Linux Mint.
#
# Implemented in Python 3.5 (seems to work in Python 2.7 as well)
#
########################################################################
#
# This file is part of the TensorFlow Tutorials available at:
#
# https://github.com/Hvass-Labs/TensorFlow-Tutorials
#
# Published under the MIT License. See the file LICENSE for details.
#
# Copyright 2016 by Magnus Erik Hvass Pedersen
#
########################################################################

import os
import subprocess
import argparse

########################################################################


def video2images(in_dir, out_dir, crop_size, out_size, framerate, video_exts):
"""
Convert videos to images. The videos are located in the directory in_dir
and all its sub-directories which are processed recursively. The directory
structure is replicated to out_dir where the jpeg-images are saved.
:param in_dir:
Input directory for the videos e.g. "/home/magnus/video/"
All sub-directories are processed recursively.
:param out_dir:
Output directory for the images e.g. "/home/magnus/video-images/"
:param crop_size:
Integer. First the videos are cropped to this width and height.
:param out_size:
Integer. After cropping, the videos are resized to this width and height.
:param framerate:
Integer. Number of frames to grab per second.
:param video_exts:
Tuple of strings. Extensions for video-files e.g. ('.mts', '.mp4')
Not case-sensitive.
:return:
Nothing.
"""

# Convert all video extensions to lower-case.
video_exts = tuple(ext.lower() for ext in video_exts)

# Number of videos processed.
video_count = 0

# Process all the sub-dirs recursively.
for current_dir, dir_names, file_names in os.walk(in_dir):
# The current dir relative to the input directory.
relative_path = os.path.relpath(current_dir, in_dir)

# Name of the new directory for the output images.
new_dir = os.path.join(out_dir, relative_path)

# If the output-directory does not exist, then create it.
if not os.path.exists(new_dir):
os.makedirs(new_dir)

# For all the files in the current directory.
for file_name in file_names:
# If the file has a valid video-extension. Compare lower-cases.
if file_name.lower().endswith(video_exts):
# File-path for the input video.
in_file = os.path.join(current_dir, file_name)

# Split the file-path in root and extension.
file_root, file_ext = os.path.splitext(file_name)

# Create the template file-name for the output images.
new_file_name = file_root + "-%4d.jpg"

# Complete file-path for the output images incl. all sub-dirs.
new_file_path = os.path.join(new_dir, new_file_name)

# Clean up the path by removing e.g. "/./"
new_file_path = os.path.normpath(new_file_path)

# Print status.
print("Converting video to images:")
print("- Input video: {0}".format(in_file))
print("- Output images: {0}".format(new_file_path))

# Command to be run in the shell for the video-conversion tool.
cmd = "avconv -i {0} -r {1} -vf crop={2}:{2} -vf scale={3}:{3} -qscale 2 {4}"

# Fill in the arguments for the command-line.
cmd = cmd.format(in_file, framerate, crop_size, out_size, new_file_path)

# Run the command-line in a shell.
subprocess.call(cmd, shell=True)

# Increase the number of videos processed.
video_count += 1

# Print newline.
print()

print("Number of videos converted: {0}".format(video_count))


########################################################################
# This script allows you to run the video-conversion from the command-line.

if __name__ == "__main__":
# Argument description.
desc = "Convert videos to images. " \
"Recursively processes all sub-dirs of INDIR " \
"and replicates the dir-structure to OUTDIR. " \
"The video is first cropped to CROP:CROP pixels, " \
"then resized to SIZE:SIZE pixels and written as a jpeg-file. "

# Create the argument parser.
parser = argparse.ArgumentParser(description=desc)

# Add arguments to the parser.
parser.add_argument("--indir", required=True,
help="input directory where videos are located")

parser.add_argument("--outdir", required=True,
help="output directory where images will be saved")

parser.add_argument("--crop", required=True, type=int,
help="the input videos are first cropped to CROP:CROP pixels")

parser.add_argument("--size", required=True, type=int,
help="the input videos are then resized to SIZE:SIZE pixels")

parser.add_argument("--rate", required=False, type=int, default=5,
help="the number of frames to convert per second")

parser.add_argument("--exts", required=False, nargs="+",
help="list of extensions for video-files e.g. .mts .mp4")

# Parse the command-line arguments.
args = parser.parse_args()

# Get the arguments.
in_dir = args.indir
out_dir = args.outdir
crop_size = args.crop
out_size = args.size
framerate = args.rate
video_exts = args.exts

if video_exts is None:
# Default extensions for video-files.
video_exts = (".MTS", ".mp4")
else:
# A list of strings is provided as a command-line argument, but we
# need a tuple instead of a list, so convert it to a tuple.
video_exts = tuple(video_exts)

# Print the arguments.
print("Convert videos to images.")
print("- Input dir: " + in_dir)
print("- Output dir: " + out_dir)
print("- Crop width and height: {0}".format(crop_size))
print("- Resize width and height: {0}".format(out_size))
print("- Frame-rate: {0}".format(framerate))
print("- Video extensions: {0}".format(video_exts))
print()

# Perform the conversions.
video2images(in_dir=in_dir, out_dir=out_dir,
crop_size=crop_size, out_size=out_size,
framerate=framerate, video_exts=video_exts)

########################################################################
Loading

0 comments on commit aa6b31e

Please sign in to comment.