[go: up one dir, main page]

Menu

[2694c9]: / src / ssd.py  Maximize  Restore  History

Download this file

82 lines (59 with data), 3.5 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import pyopencl as cl
import pyopencl.array as cl_array
import numpy as np
from OpenCLHelpers import read_cl_program
from reduce4 import CReduce4
class CostSSD:
def __init__(self, device):
self.cldevice = device
self.prog = read_cl_program(self.cldevice.context, "ssd.cl").build()
self.reduce = CReduce4(self.cldevice)
def cost(self, a_dev, b_dev, out_grad_dev):
assert a_dev.shape == b_dev.shape
line_stride = np.int32(a_dev.shape[0])
slice_stride = np.int32(a_dev.shape[1] * a_dev.shape[0])
num_values = int(slice_stride * a_dev.shape[2])
assert num_values & 3 == 0
part_sum_dev = self.cldevice.get_rw_buffer(16 * num_values)
if out_grad_dev is not None:
assert a_dev.shape[0] * a_dev.shape[1] * a_dev.shape[2] * 16 == out_grad_dev.get_info(cl.mem_info.SIZE)
self.prog.ssd_grad(self.cldevice.queue, a_dev.shape, None , a_dev, b_dev,
out_grad_dev, line_stride, slice_stride,
part_sum_dev)
else:
self.prog.ssd_value(self.cldevice.queue, a_dev.shape, None , a_dev, b_dev,
line_stride, slice_stride, part_sum_dev)
return self.reduce.run(part_sum_dev, num_values)
def cost_from_buf(self, shape, a_dev, b_dev, out_grad_dev):
assert a_dev.get_info(cl.mem_info.SIZE) == b_dev.get_info(cl.mem_info.SIZE)
line_stride = np.int32(shape[0])
slice_stride = np.int32(shape[1] * line_stride)
num_values = int(slice_stride * shape[2])
assert num_values & 3 == 0
part_sum_dev = self.cldevice.get_rw_buffer(16 * num_values)
sshape = list(shape)
sshape.append(0)
dim = np.array(tuple(sshape), dtype=cl_array.vec.int4)
if out_grad_dev is not None:
assert a_dev.get_info(cl.mem_info.SIZE) * 4 == out_grad_dev.get_info(cl.mem_info.SIZE)
self.prog.ssd_grad_from_buf(self.cldevice.queue, shape, None , a_dev, b_dev,
out_grad_dev, line_stride, slice_stride, dim,
part_sum_dev)
else:
self.prog.ssd_value_from_buf(self.cldevice.queue, (num_values/4,), None , a_dev, b_dev, part_sum_dev)
return self.reduce.run(part_sum_dev, num_values)
def cost_from_mixed(self, a_dev, b_dev, out_grad_dev):
assert a_dev.get_info(cl.mem_info.SIZE) == b_dev.get_info(cl.mem_info.SIZE)
line_stride = np.int32(a_dev.shape[0])
slice_stride = np.int32(a_dev.shape[1] * a_dev.shape[0])
num_values = int(slice_stride * a_dev.shape[2])
assert num_values & 3 == 0
part_sum_dev = self.cldevice.get_rw_buffer(16 * num_values)
if out_grad_dev is not None:
assert b_dev.get_info(cl.mem_info.SIZE) * 4 == out_grad_dev.get_info(cl.mem_info.SIZE)
self.prog.ssd_grad_from_mixed(self.cldevice.queue, a_dev.shape, None , a_dev, b_dev,
out_grad_dev, line_stride, slice_stride,
part_sum_dev)
else:
self.prog.ssd_value_from_mixed(self.cldevice.queue, a_dev.shape, None , a_dev, b_dev, part_sum_dev)
return self.reduce.run(part_sum_dev, num_values)