diff --git a/main.cpp b/main.cpp index e035676c6c2905132c89b4e0c28de105f0cd0408..a36d5e6eec167c3431b2a17203e0ac60835da336 100644 --- a/main.cpp +++ b/main.cpp @@ -106,7 +106,14 @@ int main(int argc, char** argv) { std::cout << "Using Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl; cl::Context context(device); - cl::CommandQueue queue(context, device, CL_QUEUE_PROFILING_ENABLE); // Keep profiling enabled + cl_int err; + cl_command_queue cq = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &err); + if (err != CL_SUCCESS) { + std::cerr << "Failed to create command queue: " << err << std::endl; + exit(1); + } + cl::CommandQueue queue(cq, true); + std::vector<cl::Device> devices_to_init = {device}; try { diff --git a/matrix_opencl.cpp b/matrix_opencl.cpp index 6786c1c829d8c36002fa23d8a597fb59c53d56b2..bd052dc75ac7a1ff3d7837e80c5879db12d49416 100644 --- a/matrix_opencl.cpp +++ b/matrix_opencl.cpp @@ -77,7 +77,7 @@ const std::string kernel_source_transpose = R"( } )"; // NAIVE -/*const std::string kernel_source_matrix_mul = R"( +const std::string kernel_source_matrix_mul = R"( __kernel void matrix_mul(__global const float* A, __global const float* B, __global float* C, int A_rows, int A_cols, int B_cols) { int row = get_global_id(0); int col = get_global_id(1); @@ -85,10 +85,10 @@ const std::string kernel_source_transpose = R"( C[row * B_cols + col] += A[row * A_cols + k] * B[k * B_cols + col]; } } -)";*/ +)"; // FASTER -const std::string kernel_source_matrix_mul = R"( +/*const std::string kernel_source_matrix_mul = R"( __kernel void matrix_mul(__global const float* A, __global const float* B, __global float* C, @@ -120,7 +120,7 @@ const std::string kernel_source_matrix_mul = R"( barrier(CLK_LOCAL_MEM_FENCE); } -})"; +})";*/ const std::string kernel_source_sigmoid = R"( __kernel void sigmoid(__global const float* input, __global float* output, int rows, int cols) { int idx = get_global_id(0); @@ -348,7 +348,7 @@ MatrixCL MatrixCL::operator+(const MatrixCL& other) const { } // NAIVE VERSION -/*MatrixCL MatrixCL::operator*(const MatrixCL& other) const { +MatrixCL MatrixCL::operator*(const MatrixCL& other) const { if (cols_ != other.rows_) throw std::runtime_error("Matrix dimension error."); @@ -368,10 +368,10 @@ MatrixCL MatrixCL::operator+(const MatrixCL& other) const { } return result; -}*/ +} // FASTER VERSION -MatrixCL MatrixCL::operator*(const MatrixCL& other) const { +/*MatrixCL MatrixCL::operator*(const MatrixCL& other) const { if (cols_ != other.rows_) throw std::runtime_error("Matrix dimension error."); @@ -404,7 +404,7 @@ MatrixCL MatrixCL::operator*(const MatrixCL& other) const { } return result; -} +}*/ MatrixCL MatrixCL::transpose() const {