-
JordanHanotiaux a rédigéJordanHanotiaux a rédigé
main.cpp 5,53 Kio
#include "matrix_opencl.hpp"
// Remplace ceci par un header propre si possible
// #include "mlp_sgd.cpp" <-- Éviter cela
#include <iostream>
#include <vector>
#include <cassert>
#include <stdexcept>
#include <cmath>
#include <limits>
#include <chrono>
// Helper function to print a matrix (copies to host first)
void printMatrix(const std::string& label, const MatrixCL& mat) {
std::cout << label << " (" << mat.numRows() << "x" << mat.numCols() << "):\n";
try {
std::vector<float> host_data = mat.copyToHost();
for (int i = 0; i < mat.numRows(); ++i) {
std::cout << " [";
for (int j = 0; j < mat.numCols(); ++j) {
std::cout << " " << host_data[i * mat.numCols() + j];
}
std::cout << " ]\n";
}
std::cout << std::endl;
} catch (const std::runtime_error& e) {
std::cerr << "Error printing matrix: " << e.what() << std::endl;
}
}
// Helper function for approximate float comparison
bool approxEqual(float a, float b, float epsilon = 1e-5f) {
return std::abs(a - b) < epsilon;
}
// Helper function to verify matrix contents
bool verifyMatrix(const std::string& label, const MatrixCL& mat, const std::vector<float>& expected, float epsilon = 1e-5f) {
std::cout << "Verifying " << label << "..." << std::endl;
if (static_cast<size_t>(mat.numRows() * mat.numCols()) != expected.size()) {
std::cerr << "Verification failed: Dimension mismatch for " << label << ". Got "
<< mat.numRows() << "x" << mat.numCols() << ", expected " << expected.size() << " elements." << std::endl;
return false;
}
try {
std::vector<float> actual = mat.copyToHost();
bool match = true;
for (size_t i = 0; i < actual.size(); ++i) {
if (!approxEqual(actual[i], expected[i], epsilon)) {
std::cerr << "Verification failed for " << label << " at index " << i
<< ". Got " << actual[i] << ", expected " << expected[i] << std::endl;
match = false;
// Don't break, report all mismatches if desired, or break here for efficiency
break;
}
}
if (match) {
std::cout << label << " verified successfully." << std::endl;
} else {
std::cout << label << " verification failed." << std::endl;
}
return match;
} catch (const std::runtime_error& e) {
std::cerr << "Error verifying matrix " << label << ": " << e.what() << std::endl;
return false;
}
}
cl_ulong getElapsedTime(const cl::Event& event) {
cl_ulong start = event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
cl_ulong end = event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
return end - start; // In nanoseconds
}
std::vector<float> fill_random(int rows, int cols) {
std::vector<float> data(rows * cols);
for (auto& val : data) {
val = static_cast<float>(rand()) / RAND_MAX;
}
return data;
}
int main(int argc, char** argv) {
std::cout << "--- OpenCL Setup ---" << std::endl;
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
if (platforms.empty()) {
std::cerr << "No OpenCL platforms found." << std::endl;
return 1;
}
cl::Platform platform = platforms.front();
std::cout << "Using Platform: " << platform.getInfo<CL_PLATFORM_NAME>() << std::endl;
std::vector<cl::Device> devices;
platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
if (devices.empty()) {
std::cout << "No GPU found, trying CPU..." << std::endl;
platform.getDevices(CL_DEVICE_TYPE_CPU, &devices);
if (devices.empty()) {
std::cerr << "No OpenCL devices found." << std::endl;
return 1;
}
}
cl::Device device = devices.front();
std::cout << "Using Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
cl::Context context(device);
cl_int err;
cl_command_queue cq = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &err);
if (err != CL_SUCCESS) {
std::cerr << "Failed to create command queue: " << err << std::endl;
exit(1);
}
cl::CommandQueue queue(cq, true);
std::vector<cl::Device> devices_to_init = {device};
try {
MatrixCL::initializeKernels(context, devices_to_init);
} catch (const std::exception& e) {
// Catching std::exception here because initializeKernels wraps cl::Error
std::cerr << "FATAL ERROR during kernel initialization: " << e.what() << std::endl;
// If the error was a BuildError, the log should have been printed
// by the loadAndBuildProgram function within initializeKernels.
return 1;
}
const std::vector<int> sizes = {4096};
const int runs = 1;
for (int size : sizes) {
std::chrono::duration<double, std::milli> total_time(0);
for (int i = 0; i < runs; ++i) {
std::vector<float> dataA = fill_random(size, size);
std::vector<float> dataB = fill_random(size, size);
MatrixCL A(size, size, context, queue, &dataA);
MatrixCL B(size, size, context, queue, &dataB);
auto start = std::chrono::high_resolution_clock::now();
MatrixCL C = A * B;
queue.finish();
auto end = std::chrono::high_resolution_clock::now();
total_time += end - start;
}
double average_time = total_time.count() / runs;
std::cout << size << "\t" << average_time << std::endl;
}
return 0;
}