// This file is part of fluid3d - a software for image registration
//
// Copyright (c) Madrid 2013 Gert Wollny
//
// fluid3d is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with fluid3d; if not, see <http://www.gnu.org/licenses/>.
// evaluate the gradient and the per pixel squared difference of the
// image intensities
// the latter will need post-processing to evaluate the actual SSD
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
__constant int4 dx = {1,0,0,0};
__constant int4 dy = {0,1,0,0};
__constant int4 dz = {0,0,1,0};
__kernel void ssd_grad(__read_only image3d_t a, __read_only image3d_t b, __global float4 *grad,
int line_stride, int slice_stride, __global float *sum)
{
int4 pos= {get_global_id(0), get_global_id(1), get_global_id(2), 0};
int lpos = pos.x + pos.y * line_stride + pos.z * slice_stride;
float4 aa = read_imagef(a, pos);
float4 bb = read_imagef(b, pos);
float delta = aa.x - bb.x;
float3 ap, am;
float4 px = read_imagef(a, pos + dx);
float4 mx = read_imagef(a, pos - dx);
float4 py = read_imagef(a, pos + dy);
float4 my = read_imagef(a, pos - dy);
float4 pz = read_imagef(a, pos + dz);
float4 mz = read_imagef(a, pos - dz);
ap.x = px.x;
am.x = mx.x;
ap.y = py.y;
am.y = my.y;
ap.z = pz.z;
am.z = mz.z;
float dh = delta * 0.5f;
float4 g;
g.xyz = dh * (ap - am);
g.w = length(g.xyz);
grad[lpos] = g;
sum[lpos] = delta * delta;
}
__kernel void ssd_value(__read_only image3d_t a, __read_only image3d_t b,
int line_stride, int slice_stride, __global float *diff)
{
int4 pos= {get_global_id(0), get_global_id(1), get_global_id(2), 0};
int lpos = pos.x + pos.y * line_stride + pos.z * slice_stride;
float4 delta = read_imagef(a, pos) - read_imagef(b, pos);
diff[lpos] = delta.x * delta.x;
}
__kernel void ssd_grad_from_buf(__global float *a, __global float *b, __global float4 *grad,
int line_stride, int slice_stride, int4 dim, __global float *sum)
{
int4 pos4 = {get_global_id(0), get_global_id(1), get_global_id(2), 0};
int pos = pos4.x + pos4.y * line_stride + pos4.z * slice_stride;
float aa = a[pos];
float bb = b[pos];
float delta = aa - bb;
float3 ap = {aa, aa, aa};
float3 am = ap;
if (pos4.x > 0)
am.x = a[pos - 1];
if (pos4.x < dim.x - 1)
ap.x = a[pos + 1];
if (pos4.y > 0)
am.y = a[pos - line_stride];
if (pos4.y < dim.y - 1)
ap.y = a[pos + line_stride];
if (pos4.z > 0)
am.z = a[pos - slice_stride];
if (pos4.z < dim.z - 1)
ap.z = a[pos + slice_stride];
float dh = delta * 0.5f;
float4 g;
g.xyz = dh * (ap - am);
g.w = length(g.xyz);
grad[pos] = g;
sum[pos] = delta * delta;
}
__kernel void ssd_value_from_buf(__global float4 *a, __global float4 *b, __global float4 *diff)
{
size_t i = get_global_id(0);
float4 delta = a[i] - b[i];
diff[i] = delta * delta;
}
__kernel void ssd_grad_from_mixed(__read_only image3d_t a, __global float *b, __global float4 *grad,
int line_stride, int slice_stride, __global float *sum)
{
int4 pos4 = {get_global_id(0), get_global_id(1), get_global_id(2), 0};
int pos = pos4.z + pos4.y * line_stride + pos4.x * slice_stride;
float4 aa = read_imagef(a, pos4 + dx);
float bb = b[pos];
float delta = aa.x - bb;
float3 ap, am;
float4 px = read_imagef(a, pos4 + dx);
float4 mx = read_imagef(a, pos4 - dx);
float4 py = read_imagef(a, pos4 + dy);
float4 my = read_imagef(a, pos4 - dy);
float4 pz = read_imagef(a, pos4 + dz);
float4 mz = read_imagef(a, pos4 - dz);
ap.x = px.x;
am.x = mx.x;
ap.y = py.x;
am.y = my.x;
ap.z = pz.x;
am.z = mz.x;
float dh = delta * 0.5f;
float4 g;
g.xyz = dh * (ap - am);
g.w = length(g.xyz);
grad[pos] = g;
sum[pos] = delta * delta;
}
__kernel void ssd_value_from_mixed(__read_only image3d_t a, __global float4 *b, int line_stride, int slice_stride, __global float4 *diff)
{
int4 pos = {get_global_id(0), get_global_id(1), get_global_id(2), 0};
int i = pos.x + pos.y * line_stride + pos.z * slice_stride;
float4 delta = read_imagef(a, pos) - b[i];
diff[i] = delta * delta;
}