-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathparameter.h
172 lines (146 loc) · 3.91 KB
/
parameter.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#pragma once
#ifndef GPGPU_PARAMETER_LIB
#define GPGPU_PARAMETER_LIB
#include "gpgpu_init.hpp"
#include "context.h"
#include <memory>
#include <algorithm>
// forward-declaring for friendship because only friends have access to private parts
namespace GPGPU_LIB
{
struct Parameter;
struct CommandQueue;
}
namespace GPGPU
{
struct Computer;
struct Worker;
// per-program allocated host memory
struct HostParameter
{
friend struct GPGPU_LIB::Parameter;
friend struct Worker;
friend struct GPGPU_LIB::CommandQueue;
friend struct Computer;
private:
std::string name;
size_t n;
size_t elementSize;
size_t elementsPerThr;
std::shared_ptr<int8_t> ptr;
std::vector<std::string> prmList;
// points to 4096-aligned region, has a size of multiple of 4096 bytes (for zero-copy access from CPU, iGPU)
// todo: also make the copies multiple of 4096 bytes (if that is the last chunk of parameter to copy [i.e. last device to run it] )
int8_t* quickPtr;
int8_t* quickPtrVal;
bool readOp;
bool writeOp;
bool readAllOp;
bool writeAllOp;
bool scalar;
public:
HostParameter(
std::string parameterName = "",
size_t nElements = 1,
size_t sizeElement = 1,
size_t elementsPerThread = 1,
bool read = false,
bool write = false,
bool readAll = false,
bool writeAll = false,
bool isScalar = false
);
const bool isScalar() const { return scalar; }
// operator overloading from char buffer
template<typename T>
T& access(size_t index)
{
return *reinterpret_cast<T*>(quickPtr + (index * elementSize));
}
template<typename T>
T* accessPtr(size_t index)
{
return reinterpret_cast<T*>(quickPtr + (index * elementSize));
}
HostParameter next(HostParameter prm);
// read buffer and write to region starting at ptrPrm
// numElements=0 means all elements are copied
template<typename T>
void copyDataToPtr(T * ptrPrm, size_t numElements=0, size_t elementOffset=0)
{
elementOffset = (numElements == 0 ? 0 : elementOffset);
numElements = (numElements == 0 ? n : numElements);
std::copy(
reinterpret_cast<T*>(quickPtr + (elementOffset * elementSize)),
reinterpret_cast<T*>(quickPtr + ((elementOffset + numElements) * elementSize)),
ptrPrm);
}
// read region starting from ptrPrm and write to buffer
// numElements=0 means all elements are copied
template<typename T>
void copyDataFromPtr(T* ptrPrm, size_t numElements=0, size_t elementOffset=0)
{
elementOffset = (numElements == 0 ? 0 : elementOffset);
numElements = (numElements == 0 ? n : numElements);
std::copy(
ptrPrm,
ptrPrm+numElements,
reinterpret_cast<T*>(quickPtr + (elementOffset * elementSize))
);
}
std::string getName();
// number of bytes per element
const size_t getElementSize() const
{
return elementSize;
}
// sets all elements to the newValue value
template<typename T>
void operator = (const T& newValue)
{
std::fill(
reinterpret_cast<T*>(quickPtr),
reinterpret_cast<T*>(quickPtr + (n * elementSize)),
newValue
);
}
void operator = (const HostParameter& hPrm)
{
elementSize = hPrm.elementSize;
name=hPrm.name;
n=hPrm.n;
elementSize=hPrm.elementSize;
elementsPerThr=hPrm.elementsPerThr;
ptr=hPrm.ptr;
prmList=hPrm.prmList;
quickPtr=hPrm.quickPtr;
quickPtrVal=hPrm.quickPtrVal;
readOp=hPrm.readOp;
writeOp=hPrm.writeOp;
readAllOp=hPrm.readAllOp;
writeAllOp = hPrm.writeAllOp;
scalar = hPrm.scalar;
}
};
}
namespace GPGPU_LIB
{
// per-device allocated memory
struct Parameter
{
std::string name;
size_t n;
size_t elementSize;
size_t elementsPerThread;
cl::Buffer buffer;
GPGPU::HostParameter hostPrm;
bool readOp;
bool writeOp;
bool readAll;
bool writeAll;
bool scalar;
Parameter(Context con = Context(), GPGPU::HostParameter hostParameter = GPGPU::HostParameter());
const bool isScalar() const { return scalar; }
};
}
#endif // !GPGPU_PARAMETER_LIB