cocolib / light field suite Code

CUDA library for continuous optimization and light field analysis

Status: Beta
Brought to you by: goldluecke
[r9]: / trunk / cocolib / tc / tc.cu Maximize Restore History
308 lines (251 with data), 8.1 kB

/* -*-c++-*- */
/** \file tc_linear.cu
   Algorithms to solve the TV model with linear data term.

   Workspace handling and access code.

   Copyright (C) 2010 Bastian Goldluecke,
                      <first name>AT<last name>.net

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <iostream>
#include "tc_linear.h"
#include "tc.cuh"
#include "tc_arrays.cuh"
#include "tc_energies.cuh"

#include "../defs.h"
#include "../cuda/cuda_helper.h"
#include "../common/gsl_image.h"
#include "../common/gsl_matrix_derivatives.h"
#include "../common/menger_curvature.h"
#include "../common/profiler.h"

using namespace gov;


// Alloc PDE data with sensible defaults
coco::tc_data* coco::tc_data_alloc( size_t N, gsl_matrix *a )
{
  tc_data *data = new tc_data;
  size_t W = a->size2;
  size_t H = a->size1;

  data->_W = W;
  data->_H = H;
  data->_N = N;
  data->_iteration = 0;

  // Params
  data->_alpha = 0.1;
  data->_p = 2.0;
  data->_lambda = 0.01;
  data->_inner_iterations = 15;

  // Workspace
  data->_workspace = new tc_workspace;
  memset( data->_workspace, 0, sizeof( tc_workspace ));
  tc_workspace *w = data->_workspace;
  w->_N2 = (N-1)/2;
  assert( w->_N2 * 2 + 1 == (int)data->_N );
  w->_N = data->_N;
  w->_H = data->_H;
  w->_W = data->_W;

  // CUDA arrays
  w->_Nf = W*H*sizeof(stcflt);
  w->_Nv = N*N*N*N*w->_Nf;
  w->_num_dual = NUM_DUAL;

  // Compute total required memory (before allocating)
  TRACE( "Allocating tc data " << W << " x " << H << " x " << N << " ..." << endl );
  size_t bpM = 1048576;
  // u,uq, a,f
  size_t nprimal = ( w->_Nf * 4 ) / bpM;
  TRACE( "  " << nprimal << " Mb for primal and auxiliary variables." << endl );
  // px, py
  size_t ndual = ( 0 * w->_Nv + 2 * w->_Nf ) / bpM;
  ndual += ( w->_num_dual * w->_Nv + w->_Nf ) / bpM;
  TRACE( "  " << ndual << " Mb for dual variables." << endl );
  size_t ntotal_gpu = nprimal + ndual;
  TRACE( "Total memory required (gpu): " << ntotal_gpu << " Mb." << endl );

  // Primal variable
  CUDA_SAFE_CALL( cudaMalloc( &w->_u, w->_Nf ));
  CUDA_SAFE_CALL( cudaMalloc( &w->_uq, w->_Nf ));
  CUDA_SAFE_CALL( cudaMalloc( &w->_u_star, w->_Nf ));
  CUDA_SAFE_CALL( cudaMalloc( &w->_D, w->_Nf ));
  // Data term
  CUDA_SAFE_CALL( cudaMalloc( &w->_a, w->_Nf ));
  CUDA_SAFE_CALL( cudaMalloc( &w->_f, w->_Nf ));
  cuda_memcpy( w->_a, a );
  cuda_memcpy( w->_f, a );

  // Epigraph variable
  //  cvl_alloc_array( w, w->_v );
  //cvl_alloc_array( w, w->_vq );
  //cvl_alloc_array( w, w->_vf );

  // Dual variables
  TRACE( "Alloc dual variables." << endl );
  CUDA_SAFE_CALL( cudaMalloc( &w->_px, w->_Nf ));
  CUDA_SAFE_CALL( cudaMalloc( &w->_py, w->_Nf ));
  for ( size_t i=0; i<w->_num_dual; i++ ) {
    //    for ( size_t j=0; j<2; j++ ) {
    //  for ( size_t k=0; k<2; k++ ) {

    vector<stcflt*> &xiv = w->_p[i]; //cvl_get_dual_variable( w, i,j,k );
    cvl_alloc_array( w, xiv );

  }
  TRACE( "Alloc complete." << endl );

  // Precomputed curvature weights
  int N2 = w->_N2;
  int N4 = N*N*N*N;
  w->_cp_cpu = new stcflt[ N4 ];
  for ( int y0o=-N2; y0o<=N2; y0o++ ) {
    for ( int y1o=-N2; y1o<=N2; y1o++ ) {
      for ( int z0o=-N2; z0o<=N2; z0o++ ) {
	for ( int z1o=-N2; z1o<=N2; z1o++ ) {
	  int idx = AIND( y0o+N2, y1o+N2, z0o+N2, z1o+N2 );
	  assert( idx >= 0 && idx < N4 );
	  w->_cp_cpu[ idx ] = menger_curvature_weight( y0o, y1o, z0o, z1o );
	}
      }
    }
  }

  CUDA_SAFE_CALL( cudaMalloc( &w->_cp, N4*sizeof( stcflt ) ));
  CUDA_SAFE_CALL( cudaMemcpy( w->_cp, w->_cp_cpu, N4*sizeof( stcflt ), cudaMemcpyHostToDevice ));
  CUDA_SAFE_CALL( cudaThreadSynchronize() );

  // Finalize
  TRACE( "Finalizing init." << endl );
  w->_sigma = 1.0;
  w->_lambda = data->_lambda;
  w->_rof_lambda = data->_lambda;
  w->_L = 1.0;
  w->_t = 1.0;
  //
  w->_mask = NULL;
  w->_b = NULL;
  w->_bq = NULL;
  //
  w->_energy_scale_u = 1.0 / double(W*H);
  w->_energy_scale_v = 1.0 / double(W*H*N*N*N*N);

  // Block sizes
  assert( (W%CUDA_BLOCK_SIZE)==0 );
  assert( (H%CUDA_BLOCK_SIZE)==0 );
  w->_dimBlock = dim3(CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE);
  w->_dimGrid = dim3(W / w->_dimBlock.x, H / w->_dimBlock.y);

  CUDA_SAFE_CALL( cudaThreadSynchronize() );
  return data;
}


// Set mask for inpainting model
bool coco::tc_set_inpainting_mask( tc_data *data, gsl_matrix *mask )
{
  tc_workspace *w = data->_workspace;
  CUDA_SAFE_CALL( cudaMalloc( &w->_mask, w->_Nf ));
  cuda_memcpy( w->_mask, mask );
  return true;
}



// Free up PDE data
bool coco::tc_data_free( tc_data *data )
{
  // Free CUDA arrays
  tc_workspace *w = data->_workspace;
  CUDA_SAFE_CALL( cudaFree( w->_u ));
  CUDA_SAFE_CALL( cudaFree( w->_uq ));
  CUDA_SAFE_CALL( cudaFree( w->_u_star ));
  CUDA_SAFE_CALL( cudaFree( w->_D ));
  CUDA_SAFE_CALL( cudaFree( w->_a ));
  CUDA_SAFE_CALL( cudaFree( w->_f ));
  CUDA_SAFE_CALL( cudaFree( w->_cp ));

  // Dual vars
  CUDA_SAFE_CALL( cudaFree( w->_px ));
  CUDA_SAFE_CALL( cudaFree( w->_py ));

  for ( size_t i=0; i<w->_num_dual; i++ ) {
    vector<stcflt*> &xi = w->_p[i];
    cvl_free_array( w, xi );
  }

  // Kernels
#ifdef CUDA_DOUBLE
  if ( w->_b != NULL ) {
    cuda_kernel_dbl_free( w->_b );
    //Copy    cuda_kernel_dbl_free( w->_bq );
  }
#else
  if ( w->_b != NULL ) {
    cuda_kernel_free( w->_b );
    //Copy    cuda_kernel_free( w->_bq );
  }
#endif

  // Free up workspace
  delete[] w->_cp_cpu;
  delete data->_workspace;
  delete data;
  return true;
}


// Get current solution
bool coco::tc_get_solution( tc_data *data,
					  gsl_matrix* u )
{
  tc_workspace *w = data->_workspace;
  cuda_memcpy( u, w->_u );
  // Wait for GPU and return
  CUDA_SAFE_CALL( cudaThreadSynchronize() );
  return true;
}


// Initialize workspace with current solution
bool coco::tc_initialize( tc_data *data,
					gsl_matrix* u )
{
  // Copy u
  tc_workspace *w = data->_workspace;

  // TEST: Neutral init
  //gsl_matrix_set_all( u, 0.0 );
  // Copy current solution to GPU
  data->_iteration = 0;
  cuda_memcpy( w->_u, u );
  cuda_memcpy( w->_uq, u );

  // Init v
  /*
    tc_init_product_field( data, w->_vq, w->_u );
  */

  // Clear dual variables
  CUDA_SAFE_CALL( cudaMemset( w->_px, 0, w->_Nf ));
  CUDA_SAFE_CALL( cudaMemset( w->_py, 0, w->_Nf ));

  // Init dual for v
  //reset_dual_variables( data, w->_v );
  for ( size_t i=0; i<w->_num_dual; i++ ) {
    cvl_clear_array( w, w->_p[i] );
  }

  // Wait for GPU and return
  CUDA_SAFE_CALL( cudaThreadSynchronize() );
  return false;
}


// Init kernel
bool coco::tc_set_separable_kernel( tc_data *data, gsl_vector *kernel )
{
  tc_workspace *w = data->_workspace;
  assert( w->_b == NULL );
  assert( w->_bq == NULL );
#ifdef CUDA_DOUBLE
  w->_b = coco::cuda_kernel_dbl_alloc_separable( kernel, kernel );
  w->_bq = w->_b;
#else
  w->_b = coco::cuda_kernel_alloc_separable( kernel, kernel );
  w->_bq = w->_b;
#endif

  // TODO: Assuming that kernel is symmetric
  cout << "WARNING: ASSUMING SYMMETRIC KERNEL IN DEBLURRING." << endl;
  return true;
}



// Init kernel
bool coco::tc_set_kernel( tc_data *data, gsl_matrix *kernel )
{
  tc_workspace *w = data->_workspace;
  assert( w->_b == NULL );
  assert( w->_bq == NULL );

  gsl_matrix *kq = gsl_matrix_alloc( kernel->size2, kernel->size1 );
  gsl_matrix_transpose_memcpy( kq, kernel );
#ifdef CUDA_DOUBLE
  w->_b = coco::cuda_kernel_dbl_alloc( kernel );
  w->_bq = coco::cuda_kernel_dbl_alloc( kq );
#else
  w->_b = coco::cuda_kernel_alloc( kernel );
  w->_bq = coco::cuda_kernel_alloc( kq );
#endif

  return true;
}