cocolib / light field suite Code

CUDA library for continuous optimization and light field analysis

Status: Beta
Brought to you by: goldluecke
[r7]: / trunk / cocolib / tv / tv_l2.cu Maximize Restore History
197 lines (171 with data), 5.8 kB

/* -*-c++-*- */
/** \file tv_l2.cu
   Algorithms to solve the TV-L2 model:

   Workspace handling and access code.

   Copyright (C) 2010 Bastian Goldluecke,
                      <first name>AT<last name>.net

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <iostream>
#include "tv_l2.h"
#include "tv_l2.cuh"

#include "../defs.h"
#include "../cuda/cuda_helper.h"

// Alloc PDE data with sensible defaults
coco::tv_l2_data* coco::tv_l2_data_alloc( gsl_matrix* f, gsl_matrix *g )
{
  tv_l2_data *data = new tv_l2_data;

  // Texture sizes
  data->_W = f->size2;
  data->_H = f->size1;
  data->_N = data->_W * data->_H;
  // Smoothness parameter
  data->_lambda = 1.0f;
  // Maximum step size with proven convergence
  data->_tau = 0.125;
  // Relaxation variable
  data->_alpha = 1.0;
  // Workspace
  data->_workspace = new tv_l2_workspace;
  memset( data->_workspace, 0, sizeof( tv_l2_workspace ));

  // Size of image matrices in bytes
  data->_nfbytes = data->_N * sizeof(float);

  // Alloc fields
  tv_l2_workspace *w = data->_workspace;

  // Primal variable components
  CUDA_SAFE_CALL( cudaMalloc( &w->_u, data->_nfbytes ));
  // Dual variable XI and relaxation states
  CUDA_SAFE_CALL( cudaMalloc( &w->_x1, data->_nfbytes ));
  CUDA_SAFE_CALL( cudaMalloc( &w->_x2, data->_nfbytes ));
  CUDA_SAFE_CALL( cudaMalloc( &w->_x1e, data->_nfbytes ));
  CUDA_SAFE_CALL( cudaMalloc( &w->_x2e, data->_nfbytes ));
  CUDA_SAFE_CALL( cudaMalloc( &w->_y1, data->_nfbytes ));
  CUDA_SAFE_CALL( cudaMalloc( &w->_y2, data->_nfbytes ));
  CUDA_SAFE_CALL( cudaMalloc( &w->_f, data->_nfbytes ));
  w->_g = NULL;
  // Copy f to GPU
  data->_f = gsl_matrix_alloc( data->_H, data->_W );
  memcpy( data->_f->data, f->data, data->_nfbytes );
  assert( f->size2 == data->_W );
  assert( f->size1 == data->_H );
  cuda_memcpy( w->_f, f );
  // Copy TV weight to GPU
  data->_g = NULL;
  if ( g != NULL ) {
    data->_g = gsl_matrix_alloc( data->_H, data->_W );
    memcpy( data->_g->data, g->data, data->_nfbytes );
    CUDA_SAFE_CALL( cudaMalloc( &w->_g, data->_nfbytes ));
    assert( g->size2 == data->_W );
    assert( g->size1 == data->_H );
    cuda_memcpy( w->_g, g );
  }

  return data;
}



// Free up PDE data
bool coco::tv_l2_data_free( tv_l2_data *data )
{
  // Free GPU fields
  tv_l2_workspace *w = data->_workspace;
  CUDA_SAFE_CALL( cudaFree( w->_u ));
  CUDA_SAFE_CALL( cudaFree( w->_f ));
  CUDA_SAFE_CALL( cudaFree( w->_x1 ));
  CUDA_SAFE_CALL( cudaFree( w->_x2 ));
  CUDA_SAFE_CALL( cudaFree( w->_x1e ));
  CUDA_SAFE_CALL( cudaFree( w->_x2e ));
  CUDA_SAFE_CALL( cudaFree( w->_y1 ));
  CUDA_SAFE_CALL( cudaFree( w->_y2 ));
  if ( w->_g != NULL ) {
    CUDA_SAFE_CALL( cudaFree( w->_g ));
  }

  gsl_matrix_free( data->_f );
  if ( data->_g != NULL ) {
    gsl_matrix_free( data->_g );
  }
  delete data->_workspace;
  delete data;
  return true;
}



// Initialize workspace with current solution
bool coco::tv_l2_initialize( tv_l2_data *data,
			     gsl_matrix* u )
{
  data->_alpha = 1.0;
  tv_l2_workspace *w = data->_workspace;
  if ( u != NULL ) {
    cuda_memcpy( w->_u, u );
  }
  CUDA_SAFE_CALL( cudaMemset( w->_x1, 0, data->_nfbytes ));
  CUDA_SAFE_CALL( cudaMemset( w->_x2, 0, data->_nfbytes ));
  CUDA_SAFE_CALL( cudaMemset( w->_x1e, 0, data->_nfbytes ));
  CUDA_SAFE_CALL( cudaMemset( w->_x2e, 0, data->_nfbytes ));
  CUDA_SAFE_CALL( cudaMemset( w->_y1, 0, data->_nfbytes ));
  CUDA_SAFE_CALL( cudaMemset( w->_y2, 0, data->_nfbytes ));
  CUDA_SAFE_CALL( cudaThreadSynchronize() );
  return true;
}

// Get current solution
bool coco::tv_l2_get_solution( tv_l2_data *data,
				  gsl_matrix* u )
{
  tv_l2_workspace *w = data->_workspace;
  assert( u->size2 == data->_W );
  assert( u->size1 == data->_H );
  cuda_memcpy( u, w->_u );
  CUDA_SAFE_CALL( cudaThreadSynchronize() );
  return true;
}


// Get dual variable XI (vector of dimension 2)
bool coco::tv_l2_get_dual_xi( tv_l2_data *data,
			      vector<gsl_matrix*> &XI )
{
  tv_l2_workspace *w = data->_workspace;
  assert( XI.size() == 2 );
  for ( size_t i=0; i<2; i++ ) {
    gsl_matrix *xi = XI[i];
    assert( xi->size2 == data->_W );
    assert( xi->size1 == data->_H );
  }
  cuda_memcpy( XI[0], w->_x1 );
  cuda_memcpy( XI[1], w->_x2 );
  CUDA_SAFE_CALL( cudaThreadSynchronize() );
  return true;
}


double coco::tv_l2_energy( tv_l2_data *data )
{
  // Slow: works on CPU
  // Copy back fields
  tv_l2_workspace *w = data->_workspace;
  size_t W = data->_W;
  size_t H = data->_H;
  float *f = new float[W*H];
  float *u = new float[W*H];
  CUDA_SAFE_CALL( cudaMemcpy( f, w->_f, W*H*sizeof(float), cudaMemcpyDeviceToHost ));
  CUDA_SAFE_CALL( cudaMemcpy( u, w->_u, W*H*sizeof(float), cudaMemcpyDeviceToHost ));
  CUDA_SAFE_CALL( cudaThreadSynchronize() );
  // Compute energy
  double energy = 0.0;
  double ef = 1.0 / (2.0 * data->_lambda);
  size_t n = 0;
  for ( size_t y=0; y<H-1; y++ ) {
    for ( size_t x=0; x<W-1; x++ ) {
      energy += hypotf( u[n+1]-u[n], u[n+W]-u[n] );
      energy += ef * powf( f[n] - u[n], 2.0f );
      n++;
    }
    // Skip one.
    n++;
  }
  // Cleanup and return
  delete[] u;
  delete[] f;
  return energy / double(W*H);
}