Source code for blenderproc.python.postprocessing.PostProcessingUtility

"""A set of function to post process the produced images."""

from typing import Union, List, Optional, Dict, Any

import numpy as np
import bpy
import mathutils
import cv2
from scipy import stats

from blenderproc.python.camera import CameraUtility
from blenderproc.python.utility.BlenderUtility import get_all_blender_mesh_objects



[docs]
def dist2depth(dist: Union[List[np.ndarray], np.ndarray]) -> Union[List[np.ndarray], np.ndarray]:
    """
    Maps a distance image to depth image, also works with a list of images.

    :param dist: The distance data.
    :return: The depth data
    """

    dist = trim_redundant_channels(dist)

    if isinstance(dist, list) or hasattr(dist, "shape") and len(dist.shape) > 2:
        return [dist2depth(img) for img in dist]

    K = CameraUtility.get_intrinsics_as_K_matrix()
    f, cx, cy = K[0, 0], K[0, 2], K[1, 2]

    xs, ys = np.meshgrid(np.arange(dist.shape[1]), np.arange(dist.shape[0]))

    # coordinate distances to principal point
    x_opt = np.abs(xs - cx)
    y_opt = np.abs(ys - cy)

    # Solve 3 equations in Wolfram Alpha:
    # Solve[{X == (x-c0)/f0*Z, Y == (y-c1)/f0*Z, X*X + Y*Y + Z*Z = d*d}, {X,Y,Z}]
    depth = dist * f / np.sqrt(x_opt ** 2 + y_opt ** 2 + f ** 2)

    return depth




[docs]
def depth2dist(depth: Union[List[np.ndarray], np.ndarray]) -> Union[List[np.ndarray], np.ndarray]:
    """
    Maps a depth image to distance image, also works with a list of images.

    :param depth: The depth data.
    :return: The distance data
    """

    depth = trim_redundant_channels(depth)

    if isinstance(depth, list) or hasattr(depth, "shape") and len(depth.shape) > 2:
        return [depth2dist(img) for img in depth]

    K = CameraUtility.get_intrinsics_as_K_matrix()
    f, cx, cy = K[0, 0], K[0, 2], K[1, 2]

    xs, ys = np.meshgrid(np.arange(depth.shape[1]), np.arange(depth.shape[0]))

    # coordinate distances to principal point
    x_opt = np.abs(xs - cx)
    y_opt = np.abs(ys - cy)

    # Solve 3 equations in Wolfram Alpha:
    # Solve[{X == (x-c0)/f0*Z, Y == (y-c1)/f0*Z, X*X + Y*Y + Z*Z = d*d}, {X,Y,Z}]
    dist = depth * np.sqrt(x_opt ** 2 + y_opt ** 2 + f ** 2) / f

    return dist




[docs]
def remove_segmap_noise(image: Union[list, np.ndarray]) -> Union[list, np.ndarray]:
    """
    A function that takes an image and a few 2D indices, where these indices correspond to pixel values in
    segmentation maps, where these values are not real labels, but some deviations from the real labels, that were
    generated as a result of Blender doing some interpolation, smoothing, or other numerical operations.

    Assumes that noise pixel values won't occur more than 100 times.

    :param image: ndarray of the .exr segmap
    :return: The denoised segmap image
    """

    if isinstance(image, list) or hasattr(image, "shape") and len(image.shape) > 3:
        return [remove_segmap_noise(img) for img in image]

    noise_indices = _PostProcessingUtility.determine_noisy_pixels(image)

    for index in noise_indices:
        neighbors = _PostProcessingUtility.get_pixel_neighbors(image, index[0], index[
            1])  # Extracting the indices surrounding 3x3 neighbors
        curr_val = image[index[0]][index[1]][0]  # Current value of the noisy pixel

        neighbor_vals = [image[neighbor[0]][neighbor[1]] for neighbor in
                         neighbors]  # Getting the values of the neighbors
        # Getting the unique values only
        neighbor_vals = np.unique(np.array([np.array(index) for index in neighbor_vals]))

        min_val = 10000000000
        min_idx = 0

        # Here we iterate through the unique values of the neighbor and find the one closest to the current noisy value
        for idx, n in enumerate(neighbor_vals):
            # Is this closer than the current closest value?
            if n - curr_val <= min_val:
                # If so, update
                min_val = n - curr_val
                min_idx = idx

        # Now that we have found the closest value, assign it to the noisy value
        new_val = neighbor_vals[min_idx]
        image[index[0]][index[1]] = np.array([new_val, new_val, new_val])

    return image




[docs]
def oil_paint_filter(image: Union[list, np.ndarray], filter_size: int = 5, edges_only: bool = True,
                     rgb: bool = False) -> Union[list, np.ndarray]:
    """ Applies the oil paint filter on a single channel image (or more than one channel, where each channel is a
        replica of the other). This could be desired for corrupting rendered depth maps to appear more realistic.
        Also trims the redundant channels if they exist.

        :param image: Input image or list of images
        :param filter_size: Filter size, should be an odd number.
        :param edges_only: If true, applies the filter on the edges only.
        :param rgb: Apply the filter on an RGB image (if the image has 3 channels, they're assumed to not be \
                    replicated).
        :return: filtered image
    """

    if rgb:
        if isinstance(image, list) or hasattr(image, "shape") and len(image.shape) > 3:
            return [oil_paint_filter(img, filter_size, edges_only, rgb) for img in image]

        intensity_img = np.sum(image, axis=2) / 3.0

        neighbors = np.array(
            _PostProcessingUtility.get_pixel_neighbors_stacked(image, filter_size, return_list=True))
        neighbors_intensity = _PostProcessingUtility.get_pixel_neighbors_stacked(intensity_img, filter_size)

        mode_intensity = stats.mode(neighbors_intensity, axis=2)[0].reshape(image.shape[0], image.shape[1])

        # keys here would match all instances of the mode value
        mode_keys = np.argwhere(neighbors_intensity == np.expand_dims(mode_intensity, axis=3))
        # Remove the duplicate keys, since they point to the same value, and to be able to use them for indexing
        _, unique_indices = np.unique(mode_keys[:, 0:2], axis=0, return_index=True)
        unique_keys = mode_keys[unique_indices]

        filtered_img = neighbors[unique_keys[:, 2], unique_keys[:, 0], unique_keys[:, 1], :] \
            .reshape(image.shape[0], image.shape[1], image.shape[2])

        if edges_only:
            edges = cv2.Canny(image, 0, np.max(image))  # Assuming "image" is an uint8 array.
            image[edges > 0] = filtered_img[edges > 0]
            filtered_img = image
    else:
        image = trim_redundant_channels(image)
        if isinstance(image, list) or hasattr(image, "shape") and len(image.shape) > 2:
            return [oil_paint_filter(img, filter_size, edges_only, rgb) for img in image]

        if len(image.shape) == 3 and image.shape[2] > 1:
            image = image[:, :, 0]

        filtered_img = stats.mode(_PostProcessingUtility.get_pixel_neighbors_stacked(image, filter_size), axis=2)[0]
        filtered_img = filtered_img.reshape(filtered_img.shape[0], filtered_img.shape[1])

        if edges_only:
            # Handle inf and map input to the range: 0-255
            _image = np.copy(image)
            _max = np.max(_image) if np.max(_image) != np.inf else np.unique(_image)[-2]
            _image[_image > _max] = _max
            _image = (_image / _max) * 255.0

            __img = np.uint8(_image)
            edges = cv2.Canny(__img, 0, np.max(__img))

            image[edges > 0] = filtered_img[edges > 0]
            filtered_img = image

    return filtered_img



[docs]
def add_kinect_azure_noise(depth: Union[list, np.ndarray], color: Optional[Union[list, np.ndarray]] = None,
                           missing_depth_darkness_thres: int = 15) -> Union[list, np.ndarray]:
    """
    Add noise, holes and smooth depth maps according to the noise characteristics of the Kinect Azure sensor.
    https://www.mdpi.com/1424-8220/21/2/413

    For further realism, consider to use the projection from depth to color image in the Azure Kinect SDK:
    https://docs.microsoft.com/de-de/azure/kinect-dk/use-image-transformation

    :param depth: Input depth image(s) in meters
    :param color: Optional color image(s) to add missing depth at close to black surfaces
    :param missing_depth_darkness_thres: uint8 gray value threshold at which depth becomes invalid, i.e. 0
    :return: Noisy depth image(s)
    """

    if isinstance(depth, list) or hasattr(depth, "shape") and len(depth.shape) > 2:
        if color is None:
            color = len(depth) * [None]
        assert len(color) == len(depth), "Enter same number of depth and color images"
        return [add_kinect_azure_noise(d, c, missing_depth_darkness_thres) for d,c in zip(depth, color)]

    # smoothing at borders
    depth = add_gaussian_shifts(depth, 0.25)

    # 0.5mm base noise, 1mm std noise @ 1m, 3.6mm std noise @ 3m
    depth += (5/10000 + np.maximum((depth-0.5) * 1/1000, 0)) * np.random.normal(size=depth.shape)

    # Creates the shape of the kernel
    shape = cv2.MORPH_RECT
    kernel = cv2.getStructuringElement(shape, (3,3))

    # Applies the minimum filter with kernel NxN
    min_depth = cv2.erode(depth, kernel)
    max_depth = cv2.dilate(depth, kernel)

    # missing depth at 0.8m min/max difference
    depth[abs(min_depth-max_depth) > 0.8] = 0

    # create missing depth at dark surfaces
    if color is not None:
        gray = cv2.cvtColor(color, cv2.COLOR_RGB2GRAY)
        depth[gray<missing_depth_darkness_thres] = 0

    return depth




[docs]
def add_gaussian_shifts(image: Union[list, np.ndarray], std: float = 0.5) -> Union[list, np.ndarray]:
    """
    Randomly shifts the pixels of the input depth image in x and y direction.

    :param image: Input depth image(s)
    :param std: Standard deviation of pixel shifts, defaults to 0.5
    :return: Augmented images
    """

    if isinstance(image, list) or hasattr(image, "shape") and len(image.shape) > 2:
        return [add_gaussian_shifts(img, std=std) for img in image]

    rows, cols = image.shape
    gaussian_shifts = np.random.normal(0, std, size=(rows, cols, 2))
    gaussian_shifts = gaussian_shifts.astype(np.float32)

    # creating evenly spaced coordinates
    xx = np.linspace(0, cols-1, cols)
    yy = np.linspace(0, rows-1, rows)

    # get xpixels and ypixels
    xp, yp = np.meshgrid(xx, yy)

    xp = xp.astype(np.float32)
    yp = yp.astype(np.float32)

    xp_interp = np.minimum(np.maximum(xp + gaussian_shifts[:, :, 0], 0.0), cols)
    yp_interp = np.minimum(np.maximum(yp + gaussian_shifts[:, :, 1], 0.0), rows)

    depth_interp = cv2.remap(image, xp_interp, yp_interp, cv2.INTER_LINEAR)

    return depth_interp



[docs]
def trim_redundant_channels(image: Union[list, np.ndarray]) -> Union[list, np.ndarray]:
    """
    Remove redundant channels, this is useful to remove the two of the three channels created for a
    depth or distance image. This also works on a list of images. Be aware that there is no check performed,
    to ensure that all channels are really equal.

    :param image: Input image or list of images
    :return: The trimmed image data with preserved input type
    """

    if isinstance(image, list):
        return [trim_redundant_channels(ele) for ele in image]

    if hasattr(image, "shape") and len(image.shape) > 3:
        return np.array([trim_redundant_channels(ele) for ele in image])

    if hasattr(image, "shape") and len(image.shape) == 3 and image.shape[2] == 3:
        image = image[:, :, 0]  # All channels have the same value, so just extract any single channel

    return image




[docs]
def segmentation_mapping(image: Union[List[np.ndarray], np.ndarray],
                         map_by: Union[str, List[str]],
                         default_values: Optional[Dict[str, int]]) \
        -> Dict[str, Union[np.ndarray, List[np.ndarray], List[Dict[str, Any]]]]:
    """ Maps an image or a list of images to the desired segmentation images plus segmentation dictionary for keys,
    which can not be stored in an image (e.g. `name`).

    :param image: A list or single image of a scene, must contain the pass indices defined in
                 `enable_segmentation_output`.
    :param map_by: The keys which will be extracted from the objects, either a single key or a list of keys.
    :param default_values: If an object does not provide a key a default key must be provided.
    :return: A dict mapping each key in map_by to an output list of images or a dictionary containing the information
    """

    return_dict: Dict[str, Union[np.ndarray, List[np.ndarray], List[Dict[str, Any]]]] = {}
    is_stereo_case = bpy.context.scene.render.use_multiview
    # convert a single image to a list of stereo images
    if isinstance(image, list):
        if len(image) == 0:
            raise RuntimeError("The given image list is empty")
        if hasattr(image[0], "shape") and len(image[0].shape) == 2 and not is_stereo_case:
            # convert list of images to np.ndarray
            image = np.array(image)[:, np.newaxis, :, :]  # reshape for the stereo case
    elif hasattr(image, "shape") and len(image.shape) == 3:
        if not is_stereo_case:
            image = np.array(image)[:, np.newaxis, :, :]  # reshape for stereo case
        else:
            # this is a single image in stereo mode -> make a list out of it
            image = np.array(image)[np.newaxis, :, :, :]
    elif hasattr(image, "shape") and len(image.shape) == 2:
        if is_stereo_case:
            raise RuntimeError("The amount of dimensions for an image must be higher than two in stereo mode!")
        # add stereo case and make a list out of it
        image = np.array(image)[np.newaxis, np.newaxis, :, :]

    # convert to int, to avoid rounding errors
    image = np.array(image).astype(np.int64)

    # convert map by to a list
    if not isinstance(map_by, list):
        map_by = [map_by]

    for frame_image in image:
        non_image_attributes: Dict[int, Dict[str, Any]] = {}
        mapped_results_stereo_dict: Dict[str, List[np.ndarray]] = {}
        for stereo_image in frame_image:

            # map object ids in the image to the used objects
            object_ids = np.unique(stereo_image).astype(int)
            object_ids_to_object = {}
            for obj in get_all_blender_mesh_objects():
                if obj.pass_index in object_ids:
                    object_ids_to_object[obj.pass_index] = obj
            object_ids_to_object[0] = bpy.context.scene.world

            for map_by_attribute in map_by:

                # create result map
                resulting_map = np.zeros((stereo_image.shape[0], stereo_image.shape[1]), dtype=np.float64)

                # save the type of the stored variable in the resulting map
                found_dtype = None

                map_by_attribute = map_by_attribute.lower()
                current_attribute = map_by_attribute
                if map_by_attribute in ["class", "category_id"]:
                    # class mode
                    current_attribute = "category_id"
                if map_by_attribute == "instance":
                    mapped_results_stereo_dict.setdefault(f"{map_by_attribute}_segmaps", []).append(stereo_image)
                else:
                    # check if a default value was specified
                    default_value_set = False
                    default_value = None
                    if default_values and current_attribute in default_values:
                        default_value_set = True
                        default_value = default_values[current_attribute]
                    elif default_values and current_attribute in default_values:
                        default_value_set = True
                        default_value = default_values[current_attribute]

                    for object_id in object_ids:
                        # get current object
                        current_obj = object_ids_to_object[object_id]

                        # if the current obj has an attribute with that name -> get it
                        if hasattr(current_obj, current_attribute):
                            value = getattr(current_obj, current_attribute)
                        # if the current object has a custom property with that name -> get it
                        elif current_attribute in current_obj:
                            value = current_obj[current_attribute]
                        elif current_attribute.startswith("cf_"):
                            if current_attribute == "cf_basename":
                                value = current_obj.name
                                if "." in value:
                                    value = value[:value.rfind(".")]
                            else:
                                raise ValueError(f"The given attribute is a custom function: \"cf_\", but it is not "
                                                 f"defined here: {current_attribute}")
                        elif default_value_set:
                            # if none of the above applies use the default value
                            value = default_value
                        else:
                            # if the requested current_attribute is not a custom property or an attribute
                            # or there is a default value stored
                            # it throws an exception
                            d_error = {current_attribute: None}
                            raise RuntimeError(f"The object \"{current_obj.name}\" does not have the "
                                               f"attribute: \"{current_attribute}\". Either set the attribute for "
                                               f"every object or pass a default value to "
                                               f"bproc.renderer.enable_segmentation_output(default_values={d_error}).")

                        # save everything which is not instance also in the .csv
                        if isinstance(value, (int, float, np.integer, np.floating)):
                            resulting_map[stereo_image == object_id] = value
                            found_dtype = type(value)

                        if isinstance(value, (mathutils.Vector, mathutils.Matrix)):
                            value = np.array(value)

                        if object_id in non_image_attributes:
                            non_image_attributes[object_id][current_attribute] = value
                        else:
                            non_image_attributes[object_id] = {current_attribute: value}

                    # if a value was found the resulting map should be stored
                    if found_dtype is not None:
                        resulting_map = resulting_map.astype(found_dtype)
                        mapped_results_stereo_dict.setdefault(f"{map_by_attribute}_segmaps", []).append(resulting_map)
                    elif "instance" not in map_by:
                        raise ValueError(f"The map_by key \"{map_by_attribute}\" requires that the instance map is "
                                         f"stored as well in the output. Change it to: {map_by + ['instance']}")
        # combine stereo image and add to output
        for key, list_of_stereo_images in mapped_results_stereo_dict.items():
            if len(list_of_stereo_images) == 1:
                return_dict.setdefault(key, []).append(list_of_stereo_images[0])
            else:
                stereo_image = np.stack(list_of_stereo_images, axis=0)
                return_dict.setdefault(key, []).append(stereo_image)

        # combine non image attributes
        mappings = []
        for object_id, attribute_dict in non_image_attributes.items():
            # converting to int to being able to save it to a hdf5 container
            mappings.append({"idx": int(object_id), **attribute_dict})
        return_dict.setdefault("instance_attribute_maps", []).append(mappings)

    # check if only one image was provided as input
    if image.shape[0] == 1:
        # remove the list in the return dict, as there was only a single input image
        # this still works with stereo image as they are fused together in here
        return {key: value[0] for key, value in return_dict.items()}
    return return_dict




[docs]
class _PostProcessingUtility:


[docs]
    @staticmethod
    def get_pixel_neighbors(data: np.ndarray, i: int, j: int) -> np.ndarray:
        """ Returns the valid neighbor pixel indices of the given pixel.

        :param data: The whole image data.
        :param i: The row index of the pixel
        :param j: The col index of the pixel.
        :return: A list of neighbor point indices.
        """
        neighbors = []
        for p in range(max(0, i - 1), min(data.shape[0], i + 2)):
            for q in range(max(0, j - 1), min(data.shape[1], j + 2)):
                if not (p == i and q == j):  # We don't want the current pixel, just the neighbors
                    neighbors.append([p, q])

        return np.array(neighbors)



[docs]
    @staticmethod
    def get_pixel_neighbors_stacked(img: np.ndarray, filter_size: int = 3,
                                    return_list: bool = False) -> Union[list, np.ndarray]:
        """
        Stacks the neighbors of each pixel according to a square filter around each given pixel in the depth dimensions.
        The neighbors are represented by shifting the input image in all directions required to simulate the filter.

        :param img: Input image. Type: blender object of type image.
        :param filter_size: Filter size. Type: int. Default: 5..
        :param return_list: Instead of stacking in the output array, just return a list of the "neighbor" \
                            images along with the input image.
        :return: Either a tensor with the "neighbor" images stacked in a separate additional dimension, or a list of \
                 images of the same shape as the input image, containing the shifted images (simulating the neighbors) \
                 and the input image.
        """
        _min = -int(filter_size / 2)
        _max = _min + filter_size

        rows, cols = img.shape[0], img.shape[1]

        channels = [img]
        for p in range(_min, _max):
            for q in range(_min, _max):
                if p == 0 and q == 0:
                    continue
                shifted = np.zeros_like(img)
                shifted[max(p, 0):min(rows, rows + p), max(q, 0):min(cols, cols + q)] = img[
                                                                                        max(-p, 0):min(rows - p, rows),
                                                                                        max(-q, 0):min(cols - q, cols)]

                channels.append(shifted)

        if return_list:
            return channels
        return np.dstack(tuple(channels))



[docs]
    @staticmethod
    def is_in(element, test_elements, assume_unique=False, invert=False):
        """ As np.isin is only available after v1.13 and blender is using 1.10.1 we have to implement it manually. """
        element = np.asarray(element)
        return np.in1d(element, test_elements, assume_unique=assume_unique, invert=invert).reshape(element.shape)



[docs]
    @staticmethod
    def determine_noisy_pixels(image: np.ndarray) -> np.ndarray:
        """
        :param image: The image data.
        :return: a list of 2D indices that correspond to the noisy pixels. One criterion of finding \
                              these pixels is to use a histogram and find the pixels with frequencies lower than \
                              a threshold, e.g. 100.
        """
        # The map was scaled to be ranging along the entire 16-bit color depth, and this is the scaling down operation
        # that should remove some noise or deviations
        image = (image * 37) / (65536)  # assuming 16 bit color depth
        image = image.astype(np.int32)
        b, counts = np.unique(image.flatten(), return_counts=True)

        # Removing further noise where there are some stray pixel values with very small counts, by assigning them to
        # their closest (numerically, since this deviation is a
        # result of some numerical operation) neighbor.
        hist = sorted((np.asarray((b, counts)).T), key=lambda x: x[1])
        # Assuming the stray pixels wouldn't have a count of more than 100
        noise_vals = [h[0] for h in hist if h[1] <= 100]
        noise_indices = np.argwhere(_PostProcessingUtility.is_in(image, noise_vals))

        return noise_indices