diff --git a/matrix_opencl.cpp b/matrix_opencl.cpp index b1608486c19d12a911956d74b7bda50723572577..6786c1c829d8c36002fa23d8a597fb59c53d56b2 100644 --- a/matrix_opencl.cpp +++ b/matrix_opencl.cpp @@ -76,6 +76,7 @@ const std::string kernel_source_transpose = R"( B[output_idx] = A[input_idx]; } )"; +// NAIVE /*const std::string kernel_source_matrix_mul = R"( __kernel void matrix_mul(__global const float* A, __global const float* B, __global float* C, int A_rows, int A_cols, int B_cols) { int row = get_global_id(0); @@ -85,6 +86,8 @@ const std::string kernel_source_transpose = R"( } } )";*/ + +// FASTER const std::string kernel_source_matrix_mul = R"( __kernel void matrix_mul(__global const float* A, __global const float* B, @@ -98,7 +101,7 @@ const std::string kernel_source_matrix_mul = R"( int nloc = get_local_size(0); float tmp; - float Awrk[10000]; + float Awrk[4096]; for (k = 0; k < K; k++) Awrk[k] = A[i * K + k]; @@ -344,6 +347,7 @@ MatrixCL MatrixCL::operator+(const MatrixCL& other) const { return result; } +// NAIVE VERSION /*MatrixCL MatrixCL::operator*(const MatrixCL& other) const { if (cols_ != other.rows_) throw std::runtime_error("Matrix dimension error."); @@ -366,6 +370,7 @@ MatrixCL MatrixCL::operator+(const MatrixCL& other) const { return result; }*/ +// FASTER VERSION MatrixCL MatrixCL::operator*(const MatrixCL& other) const { if (cols_ != other.rows_) throw std::runtime_error("Matrix dimension error.");