main/source/content_analysis.cc

/*
 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
#include "webrtc/modules/video_processing/main/source/content_analysis.h"

#include <math.h>
#include <stdlib.h>

#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
#include "webrtc/system_wrappers/interface/tick_util.h"

namespace webrtc {

VPMContentAnalysis::VPMContentAnalysis(bool runtime_cpu_detection)
    : orig_frame_(NULL),
      prev_frame_(NULL),
      width_(0),
      height_(0),
      skip_num_(1),
      border_(8),
      motion_magnitude_(0.0f),
      spatial_pred_err_(0.0f),
      spatial_pred_err_h_(0.0f),
      spatial_pred_err_v_(0.0f),
      first_frame_(true),
      ca_Init_(false),
      content_metrics_(NULL) {
  ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_C;
  TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_C;

  if (runtime_cpu_detection) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
    if (WebRtc_GetCPUInfo(kSSE2)) {
      ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_SSE2;
      TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_SSE2;
    }
#endif
  }
  Release();
}

VPMContentAnalysis::~VPMContentAnalysis() {
  Release();
}


VideoContentMetrics* VPMContentAnalysis::ComputeContentMetrics(
  const I420VideoFrame& inputFrame) {
  if (inputFrame.IsZeroSize())
    return NULL;

  // Init if needed (native dimension change).
  if (width_ != inputFrame.width() || height_ != inputFrame.height()) {
    if (VPM_OK != Initialize(inputFrame.width(), inputFrame.height()))
      return NULL;
  }
  // Only interested in the Y plane.
  orig_frame_ = inputFrame.buffer(kYPlane);

  // Compute spatial metrics: 3 spatial prediction errors.
  (this->*ComputeSpatialMetrics)();

  // Compute motion metrics
  if (first_frame_ == false)
    ComputeMotionMetrics();

  // Saving current frame as previous one: Y only.
  memcpy(prev_frame_, orig_frame_, width_ * height_);

  first_frame_ =  false;
  ca_Init_ = true;

  return ContentMetrics();
}

int32_t VPMContentAnalysis::Release() {
  if (content_metrics_ != NULL) {
    delete content_metrics_;
    content_metrics_ = NULL;
  }

  if (prev_frame_ != NULL) {
    delete [] prev_frame_;
    prev_frame_ = NULL;
  }

  width_ = 0;
  height_ = 0;
  first_frame_ = true;

  return VPM_OK;
}

int32_t VPMContentAnalysis::Initialize(int width, int height) {
  width_ = width;
  height_ = height;
  first_frame_ = true;

  // skip parameter: # of skipped rows: for complexity reduction
  //  temporal also currently uses it for column reduction.
  skip_num_ = 1;

  // use skipNum = 2 for 4CIF, WHD
  if ( (height_ >=  576) && (width_ >= 704) ) {
    skip_num_ = 2;
  }
  // use skipNum = 4 for FULLL_HD images
  if ( (height_ >=  1080) && (width_ >= 1920) ) {
    skip_num_ = 4;
  }

  if (content_metrics_ != NULL) {
    delete content_metrics_;
  }

  if (prev_frame_ != NULL) {
    delete [] prev_frame_;
  }

  // Spatial Metrics don't work on a border of 8. Minimum processing
  // block size is 16 pixels.  So make sure the width and height support this.
  if (width_ <= 32 || height_ <= 32) {
    ca_Init_ = false;
    return VPM_PARAMETER_ERROR;
  }

  content_metrics_ = new VideoContentMetrics();
  if (content_metrics_ == NULL) {
    return VPM_MEMORY;
  }

  prev_frame_ = new uint8_t[width_ * height_];  // Y only.
  if (prev_frame_ == NULL) return VPM_MEMORY;

  return VPM_OK;
}


// Compute motion metrics: magnitude over non-zero motion vectors,
//  and size of zero cluster
int32_t VPMContentAnalysis::ComputeMotionMetrics() {
  // Motion metrics: only one is derived from normalized
  //  (MAD) temporal difference
  (this->*TemporalDiffMetric)();
  return VPM_OK;
}

// Normalized temporal difference (MAD): used as a motion level metric
// Normalize MAD by spatial contrast: images with more contrast
//  (pixel variance) likely have larger temporal difference
// To reduce complexity, we compute the metric for a reduced set of points.
int32_t VPMContentAnalysis::TemporalDiffMetric_C() {
  // size of original frame
  int sizei = height_;
  int sizej = width_;
  uint32_t tempDiffSum = 0;
  uint32_t pixelSum = 0;
  uint64_t pixelSqSum = 0;

  uint32_t num_pixels = 0;  // Counter for # of pixels.
  const int width_end = ((width_ - 2*border_) & -16) + border_;

  for (int i = border_; i < sizei - border_; i += skip_num_) {
    for (int j = border_; j < width_end; j++) {
      num_pixels += 1;
      int ssn =  i * sizej + j;

      uint8_t currPixel  = orig_frame_[ssn];
      uint8_t prevPixel  = prev_frame_[ssn];

      tempDiffSum += (uint32_t)abs((int16_t)(currPixel - prevPixel));
      pixelSum += (uint32_t) currPixel;
      pixelSqSum += (uint64_t) (currPixel * currPixel);
    }
  }

  // Default.
  motion_magnitude_ = 0.0f;

  if (tempDiffSum == 0) return VPM_OK;

  // Normalize over all pixels.
  float const tempDiffAvg = (float)tempDiffSum / (float)(num_pixels);
  float const pixelSumAvg = (float)pixelSum / (float)(num_pixels);
  float const pixelSqSumAvg = (float)pixelSqSum / (float)(num_pixels);
  float contrast = pixelSqSumAvg - (pixelSumAvg * pixelSumAvg);

  if (contrast > 0.0) {
    contrast = sqrt(contrast);
    motion_magnitude_ = tempDiffAvg/contrast;
  }
  return VPM_OK;
}

// Compute spatial metrics:
// To reduce complexity, we compute the metric for a reduced set of points.
// The spatial metrics are rough estimates of the prediction error cost for
//  each QM spatial mode: 2x2,1x2,2x1
// The metrics are a simple estimate of the up-sampling prediction error,
// estimated assuming sub-sampling for decimation (no filtering),
// and up-sampling back up with simple bilinear interpolation.
int32_t VPMContentAnalysis::ComputeSpatialMetrics_C() {
  const int sizei = height_;
  const int sizej = width_;

  // Pixel mean square average: used to normalize the spatial metrics.
  uint32_t pixelMSA = 0;

  uint32_t spatialErrSum = 0;
  uint32_t spatialErrVSum = 0;
  uint32_t spatialErrHSum = 0;

  // make sure work section is a multiple of 16
  const int width_end = ((sizej - 2*border_) & -16) + border_;

  for (int i = border_; i < sizei - border_; i += skip_num_) {
    for (int j = border_; j < width_end; j++) {
      int ssn1=  i * sizej + j;
      int ssn2 = (i + 1) * sizej + j; // bottom
      int ssn3 = (i - 1) * sizej + j; // top
      int ssn4 = i * sizej + j + 1;   // right
      int ssn5 = i * sizej + j - 1;   // left

      uint16_t refPixel1  = orig_frame_[ssn1] << 1;
      uint16_t refPixel2  = orig_frame_[ssn1] << 2;

      uint8_t bottPixel = orig_frame_[ssn2];
      uint8_t topPixel = orig_frame_[ssn3];
      uint8_t rightPixel = orig_frame_[ssn4];
      uint8_t leftPixel = orig_frame_[ssn5];

      spatialErrSum  += (uint32_t) abs((int16_t)(refPixel2
          - (uint16_t)(bottPixel + topPixel + leftPixel + rightPixel)));
      spatialErrVSum += (uint32_t) abs((int16_t)(refPixel1
          - (uint16_t)(bottPixel + topPixel)));
      spatialErrHSum += (uint32_t) abs((int16_t)(refPixel1
          - (uint16_t)(leftPixel + rightPixel)));
      pixelMSA += orig_frame_[ssn1];
    }
  }

  // Normalize over all pixels.
  const float spatialErr = (float)(spatialErrSum >> 2);
  const float spatialErrH = (float)(spatialErrHSum >> 1);
  const float spatialErrV = (float)(spatialErrVSum >> 1);
  const float norm = (float)pixelMSA;

  // 2X2:
  spatial_pred_err_ = spatialErr / norm;
  // 1X2:
  spatial_pred_err_h_ = spatialErrH / norm;
  // 2X1:
  spatial_pred_err_v_ = spatialErrV / norm;
  return VPM_OK;
}

VideoContentMetrics* VPMContentAnalysis::ContentMetrics() {
  if (ca_Init_ == false) return NULL;

  content_metrics_->spatial_pred_err = spatial_pred_err_;
  content_metrics_->spatial_pred_err_h = spatial_pred_err_h_;
  content_metrics_->spatial_pred_err_v = spatial_pred_err_v_;
  // Motion metric: normalized temporal difference (MAD).
  content_metrics_->motion_magnitude = motion_magnitude_;

  return content_metrics_;
}

}  // namespace webrtc