Expand source code
# -*- coding: utf-8 -*-
# Copyright 2018-2019 Jacob M. Graving <>
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import h5py
import os
import pandas as pd
from import DataGenerator
__all__ = ["initialize_dataset", "initialize_skeleton", "merge_new_images"]
def initialize_skeleton(skeleton):
""" Initialize the skeleton from input data.
Takes in either a .csv or .xlsx file and makes a DataFrame.
skeleton: pandas.DataFrame
Filepath of the .csv or .xlsx file that has indexed information
on name of the keypoint (part, e.g. head), parent (the direct
connecting part, e.g. neck connects to head, parent is head),
and swap (swapping positions with a part when reflected over X).
if isinstance(skeleton, str):
if skeleton.endswith(".csv"):
skeleton = pd.read_csv(skeleton)
elif skeleton.endswith(".xlsx"):
skeleton = pd.read_excel(skeleton)
raise ValueError("skeleton must be .csv or .xlsx file")
elif isinstance(skeleton, pd.DataFrame):
skeleton = skeleton
raise TypeError("skeleton must be type `str` or pandas.DataFrame")
if "name" not in skeleton.columns:
raise KeyError("skeleton file must contain a `name` column")
elif "parent" not in skeleton.columns:
raise KeyError("skeleton file must contain a `parent` column")
if "x" not in skeleton.columns:
skeleton["x"] = -1
if "y" not in skeleton.columns:
skeleton["y"] = -1
if "tree" not in skeleton.columns:
skeleton["tree"] = -1
for idx, name in enumerate(skeleton["parent"].values):
branch = np.where(skeleton["name"] == name)[0]
if branch.shape[0] > 0:
branch = branch[0]
skeleton.loc[idx, "tree"] = branch
if "swap_index" not in skeleton.columns:
skeleton["swap_index"] = -1
for idx, name in enumerate(skeleton["name"].values):
for jdx, swap_name in enumerate(skeleton["swap"].values):
if swap_name == name:
skeleton.loc[idx, "swap_index"] = jdx
return skeleton
def initialize_dataset(
datapath, images, skeleton, keypoints=None, dataset="images", overwrite=False
Intialize an image dataset for annotation as an h5 file
datapath : str
The path to the annotations file. Must be .h5
e.g. '/path/to/file.h5'
images : ndarray, shape (n_images, height, width, channels)
A numpy array containing image data.
`images.dtype` should be np.uint8
skeleton: str or pandas.DataFrame
Filepath of the .csv or .xlsx file that has indexed information
on name of the keypoint (part, e.g. head), parent (the direct
connecting part, e.g. neck connects to head, parent is head),
and swap (swapping positions with a part when reflected).
See example files for more information.
keypoints : None or ndarray, shape (n_images, n_keypoints, 2)
Optionally pass keypoints for initializing annotations for the
new images.
dataset : str, default = "images"
The name of the dataset within the h5 file to save the images.
overwrite: bool, default = False
Whether to overwrite an existing .h5 file with the same name.
if os.path.exists(datapath) and overwrite is False:
raise OSError(
"Annotation set {} already exists. Delete the file or set `overwrite=True`.".format(
if not isinstance(images, np.ndarray):
raise TypeError(
"images must be ndarray with shape (n_images, height, width, channels)"
elif images.ndim != 4:
raise TypeError(
"images must be ndarray with shape (n_images, height, width, channels)"
elif images.dtype != np.uint8:
raise TypeError("`images` must be ndarray with dtype np.uint8")
if keypoints is not None:
if not isinstance(keypoints, np.ndarray):
raise TypeError(
"keypoints must be None or ndarray with shape (n_images, n_keypoints, 2)"
elif keypoints.ndim != 3:
raise TypeError(
"images must be ndarray with shape (n_images, n_keypoints, 2)"
elif keypoints.shape[0] != images.shape[0]:
raise IndexError(
"shape for `images` and `keypoints` must match along axis 0."
n_images = images.shape[0]
height = images.shape[1]
width = images.shape[2]
n_channels = images.shape[3]
skeleton = initialize_skeleton(skeleton)
skeleton_names = skeleton["name"].values
skeleton = skeleton[["tree", "swap_index"]].values
n_keypoints = skeleton.shape[0]
with h5py.File(datapath, mode="w") as h5file:
maxshape=(None,) + images.shape[1:],
data = keypoints if keypoints is not None else -np.ones((n_images, n_keypoints, 2))
(n_images, n_keypoints, 2),
maxshape=(None,) + data.shape[1:],
data = np.zeros((n_images, n_keypoints), dtype=bool)
(n_images, n_keypoints),
maxshape=(None,) + data.shape[1:],
h5file.create_dataset("skeleton", skeleton.shape, dtype=np.int32, data=skeleton)
def merge_new_images(
Merge new images with an annotation set
datapath : str
The path to the annotations file. Must be .h5
e.g. '/path/to/file.h5'
merged_datapath : str
The path to save the merged annotations file. Must be .h5
e.g. '/path/to/merged_file.h5'
images : ndarray, shape (n_images, height, width, channels)
A numpy array containing image data.
`images.dtype` should be np.uint8
keypoints : None or ndarray, shape (n_images, n_keypoints, 2)
Optionally pass keypoints for initializing annotations for the
new images.
dataset : str, default = "images"
The dataset within the h5 file to save the images.
overwrite: bool, default = False
Whether to overwrite an existing .h5 file with the same name.
mode : str
The mode for loading the existing data.
Must be "annotated", or "full" (the full dataset)
if os.path.exists(merged_datapath) and overwrite is False:
raise OSError(
"Annotation set {} already exists. Delete the file or set `overwrite=True`.".format(
if not isinstance(images, np.ndarray):
raise TypeError(
"images must be ndarray with shape (n_images, height, width, channels)"
elif images.ndim != 4:
raise TypeError(
"images must be ndarray with shape (n_images, height, width, channels)"
elif images.dtype != np.uint8:
raise TypeError("`images` must be ndarray with dtype np.uint8")
if keypoints is not None:
if not isinstance(keypoints, np.ndarray):
raise TypeError(
"keypoints must be None or ndarray with shape (n_images, n_keypoints, 2)"
elif keypoints.ndim != 3:
raise TypeError(
"images must be ndarray with shape (n_images, n_keypoints, 2)"
elif keypoints.shape[0] != images.shape[0]:
raise IndexError(
"shape for `images` and `keypoints` must match along axis 0."
data_generator = DataGenerator(datapath, dataset=dataset, mode="full")
if images.shape[1:] != data_generator.image_shape:
raise IndexError(
"`images` shape {} does not match existing dataset {}".format(
images.shape[1:], data_generator.image_shape
if keypoints is not None:
if keypoints.shape[-1] == 3:
keypoints = keypoints[:, :, :2]
if keypoints.shape[1:] != data_generator.keypoints_shape:
raise IndexError(
"`keypoints` shape {} does not match existing dataset {}".format(
keypoints.shape[1:], data_generator.keypoints_shape
h5file = h5py.File(datapath, mode="r")
n_samples_merged = h5file[dataset].shape[0] + images.shape[0]
merged_h5file = h5py.File(merged_datapath, "w")
shape=(n_samples_merged,) + data_generator.image_shape,
maxshape=(None,) + data_generator.image_shape,
shape=(n_samples_merged,) + data_generator.keypoints_shape,
maxshape=(None,) + data_generator.keypoints_shape,
(n_samples_merged, data_generator.keypoints_shape[0]),
maxshape=(None, data_generator.keypoints_shape[0]),
"skeleton", h5file["skeleton"].shape, dtype=np.int32, data=h5file["skeleton"][:]
for idx in range(h5file[dataset].shape[0]):
merged_h5file[dataset][idx] = h5file[dataset][idx]
merged_h5file["annotations"][idx] = h5file["annotations"][idx]
merged_h5file["annotated"][idx] = h5file["annotated"][idx]
for idx in range(h5file[dataset].shape[0], n_samples_merged):
merged_h5file[dataset][idx] = images[idx - h5file[dataset].shape[0]]
if keypoints is not None:
merged_h5file["annotations"][idx] = keypoints[
idx - h5file[dataset].shape[0]
merged_h5file["annotations"][idx] = np.zeros(data_generator.keypoints_shape)
merged_h5file["annotated"][idx] = np.zeros(
data_generator.keypoints_shape[0], dtype=bool
def initialize_dataset(datapath, images, skeleton, keypoints=None, dataset='images', overwrite=False)
Intialize an image dataset for annotation as an h5 file
- The path to the annotations file. Must be .h5 e.g. '/path/to/file.h5'
)- A numpy array containing image data.
should be np.uint8 skeleton
- Filepath of the .csv or .xlsx file that has indexed information on name of the keypoint (part, e.g. head), parent (the direct connecting part, e.g. neck connects to head, parent is head), and swap (swapping positions with a part when reflected). See example files for more information.
)- Optionally pass keypoints for initializing annotations for the new images.
, default ="images"
- The name of the dataset within the h5 file to save the images.
, default =False
- Whether to overwrite an existing .h5 file with the same name.
Expand source code
def initialize_dataset( datapath, images, skeleton, keypoints=None, dataset="images", overwrite=False ): """ Intialize an image dataset for annotation as an h5 file Parameters ---------- datapath : str The path to the annotations file. Must be .h5 e.g. '/path/to/file.h5' images : ndarray, shape (n_images, height, width, channels) A numpy array containing image data. `images.dtype` should be np.uint8 skeleton: str or pandas.DataFrame Filepath of the .csv or .xlsx file that has indexed information on name of the keypoint (part, e.g. head), parent (the direct connecting part, e.g. neck connects to head, parent is head), and swap (swapping positions with a part when reflected). See example files for more information. keypoints : None or ndarray, shape (n_images, n_keypoints, 2) Optionally pass keypoints for initializing annotations for the new images. dataset : str, default = "images" The name of the dataset within the h5 file to save the images. overwrite: bool, default = False Whether to overwrite an existing .h5 file with the same name. """ if os.path.exists(datapath) and overwrite is False: raise OSError( "Annotation set {} already exists. Delete the file or set `overwrite=True`.".format( datapath ) ) if not isinstance(images, np.ndarray): raise TypeError( "images must be ndarray with shape (n_images, height, width, channels)" ) elif images.ndim != 4: raise TypeError( "images must be ndarray with shape (n_images, height, width, channels)" ) elif images.dtype != np.uint8: raise TypeError("`images` must be ndarray with dtype np.uint8") if keypoints is not None: if not isinstance(keypoints, np.ndarray): raise TypeError( "keypoints must be None or ndarray with shape (n_images, n_keypoints, 2)" ) elif keypoints.ndim != 3: raise TypeError( "images must be ndarray with shape (n_images, n_keypoints, 2)" ) elif keypoints.shape[0] != images.shape[0]: raise IndexError( "shape for `images` and `keypoints` must match along axis 0." ) n_images = images.shape[0] height = images.shape[1] width = images.shape[2] n_channels = images.shape[3] skeleton = initialize_skeleton(skeleton) skeleton_names = skeleton["name"].values skeleton = skeleton[["tree", "swap_index"]].values n_keypoints = skeleton.shape[0] with h5py.File(datapath, mode="w") as h5file: h5file.create_dataset( dataset, shape=images.shape, dtype=np.uint8, data=images, maxshape=(None,) + images.shape[1:], ) data = keypoints if keypoints is not None else -np.ones((n_images, n_keypoints, 2)) h5file.create_dataset( "annotations", (n_images, n_keypoints, 2), dtype=np.float64, data=data, maxshape=(None,) + data.shape[1:], ) data = np.zeros((n_images, n_keypoints), dtype=bool) h5file.create_dataset( "annotated", (n_images, n_keypoints), dtype=bool, data=data, maxshape=(None,) + data.shape[1:], ) h5file.create_dataset("skeleton", skeleton.shape, dtype=np.int32, data=skeleton) h5file.create_dataset( "skeleton_names", (skeleton.shape[0],), dtype="S10", data=skeleton_names.astype("S10"), )
def initialize_skeleton(skeleton)
Initialize the skeleton from input data.
Takes in either a .csv or .xlsx file and makes a DataFrame.
- Filepath of the .csv or .xlsx file that has indexed information on name of the keypoint (part, e.g. head), parent (the direct connecting part, e.g. neck connects to head, parent is head), and swap (swapping positions with a part when reflected over X).
Expand source code
def initialize_skeleton(skeleton): """ Initialize the skeleton from input data. Takes in either a .csv or .xlsx file and makes a DataFrame. Parameters ---------- skeleton: pandas.DataFrame Filepath of the .csv or .xlsx file that has indexed information on name of the keypoint (part, e.g. head), parent (the direct connecting part, e.g. neck connects to head, parent is head), and swap (swapping positions with a part when reflected over X). """ if isinstance(skeleton, str): if skeleton.endswith(".csv"): skeleton = pd.read_csv(skeleton) elif skeleton.endswith(".xlsx"): skeleton = pd.read_excel(skeleton) else: raise ValueError("skeleton must be .csv or .xlsx file") elif isinstance(skeleton, pd.DataFrame): skeleton = skeleton else: raise TypeError("skeleton must be type `str` or pandas.DataFrame") if "name" not in skeleton.columns: raise KeyError("skeleton file must contain a `name` column") elif "parent" not in skeleton.columns: raise KeyError("skeleton file must contain a `parent` column") if "x" not in skeleton.columns: skeleton["x"] = -1 if "y" not in skeleton.columns: skeleton["y"] = -1 if "tree" not in skeleton.columns: skeleton["tree"] = -1 for idx, name in enumerate(skeleton["parent"].values): branch = np.where(skeleton["name"] == name)[0] if branch.shape[0] > 0: branch = branch[0] skeleton.loc[idx, "tree"] = branch if "swap_index" not in skeleton.columns: skeleton["swap_index"] = -1 for idx, name in enumerate(skeleton["name"].values): for jdx, swap_name in enumerate(skeleton["swap"].values): if swap_name == name: skeleton.loc[idx, "swap_index"] = jdx return skeleton
def merge_new_images(datapath, merged_datapath, images, keypoints=None, dataset='images', overwrite=False, mode='full')
Merge new images with an annotation set
- The path to the annotations file. Must be .h5 e.g. '/path/to/file.h5'
- The path to save the merged annotations file. Must be .h5 e.g. '/path/to/merged_file.h5'
)- A numpy array containing image data.
should be np.uint8 keypoints
)- Optionally pass keypoints for initializing annotations for the new images.
, default ="images"
- The dataset within the h5 file to save the images.
, default =False
- Whether to overwrite an existing .h5 file with the same name.
- The mode for loading the existing data. Must be "annotated", or "full" (the full dataset)
Expand source code
def merge_new_images( datapath, merged_datapath, images, keypoints=None, dataset="images", overwrite=False, mode="full", ): """ Merge new images with an annotation set Parameters ---------- datapath : str The path to the annotations file. Must be .h5 e.g. '/path/to/file.h5' merged_datapath : str The path to save the merged annotations file. Must be .h5 e.g. '/path/to/merged_file.h5' images : ndarray, shape (n_images, height, width, channels) A numpy array containing image data. `images.dtype` should be np.uint8 keypoints : None or ndarray, shape (n_images, n_keypoints, 2) Optionally pass keypoints for initializing annotations for the new images. dataset : str, default = "images" The dataset within the h5 file to save the images. overwrite: bool, default = False Whether to overwrite an existing .h5 file with the same name. mode : str The mode for loading the existing data. Must be "annotated", or "full" (the full dataset) """ if os.path.exists(merged_datapath) and overwrite is False: raise OSError( "Annotation set {} already exists. Delete the file or set `overwrite=True`.".format( merged_datapath ) ) if not isinstance(images, np.ndarray): raise TypeError( "images must be ndarray with shape (n_images, height, width, channels)" ) elif images.ndim != 4: raise TypeError( "images must be ndarray with shape (n_images, height, width, channels)" ) elif images.dtype != np.uint8: raise TypeError("`images` must be ndarray with dtype np.uint8") if keypoints is not None: if not isinstance(keypoints, np.ndarray): raise TypeError( "keypoints must be None or ndarray with shape (n_images, n_keypoints, 2)" ) elif keypoints.ndim != 3: raise TypeError( "images must be ndarray with shape (n_images, n_keypoints, 2)" ) elif keypoints.shape[0] != images.shape[0]: raise IndexError( "shape for `images` and `keypoints` must match along axis 0." ) data_generator = DataGenerator(datapath, dataset=dataset, mode="full") if images.shape[1:] != data_generator.image_shape: raise IndexError( "`images` shape {} does not match existing dataset {}".format( images.shape[1:], data_generator.image_shape ) ) if keypoints is not None: if keypoints.shape[-1] == 3: keypoints = keypoints[:, :, :2] if keypoints.shape[1:] != data_generator.keypoints_shape: raise IndexError( "`keypoints` shape {} does not match existing dataset {}".format( keypoints.shape[1:], data_generator.keypoints_shape ) ) h5file = h5py.File(datapath, mode="r") n_samples_merged = h5file[dataset].shape[0] + images.shape[0] merged_h5file = h5py.File(merged_datapath, "w") merged_h5file.create_dataset( dataset, shape=(n_samples_merged,) + data_generator.image_shape, dtype=np.uint8, maxshape=(None,) + data_generator.image_shape, ) merged_h5file.create_dataset( "annotations", shape=(n_samples_merged,) + data_generator.keypoints_shape, dtype=np.float64, maxshape=(None,) + data_generator.keypoints_shape, ) merged_h5file.create_dataset( "annotated", (n_samples_merged, data_generator.keypoints_shape[0]), dtype=bool, maxshape=(None, data_generator.keypoints_shape[0]), ) merged_h5file.create_dataset( "skeleton", h5file["skeleton"].shape, dtype=np.int32, data=h5file["skeleton"][:] ) for idx in range(h5file[dataset].shape[0]): merged_h5file[dataset][idx] = h5file[dataset][idx] merged_h5file["annotations"][idx] = h5file["annotations"][idx] merged_h5file["annotated"][idx] = h5file["annotated"][idx] for idx in range(h5file[dataset].shape[0], n_samples_merged): merged_h5file[dataset][idx] = images[idx - h5file[dataset].shape[0]] if keypoints is not None: merged_h5file["annotations"][idx] = keypoints[ idx - h5file[dataset].shape[0] ] else: merged_h5file["annotations"][idx] = np.zeros(data_generator.keypoints_shape) merged_h5file["annotated"][idx] = np.zeros( data_generator.keypoints_shape[0], dtype=bool ) h5file.close() merged_h5file.close()