Skip to content
Extraits de code Groupes Projets
main.cpp 5,53 Kio
#include "matrix_opencl.hpp"
// Remplace ceci par un header propre si possible
// #include "mlp_sgd.cpp"  <-- Éviter cela
#include <iostream>
#include <vector>
#include <cassert>
#include <stdexcept>
#include <cmath>
#include <limits>
#include <chrono>

// Helper function to print a matrix (copies to host first)
void printMatrix(const std::string& label, const MatrixCL& mat) {
    std::cout << label << " (" << mat.numRows() << "x" << mat.numCols() << "):\n";
    try {
        std::vector<float> host_data = mat.copyToHost();
        for (int i = 0; i < mat.numRows(); ++i) {
            std::cout << "  [";
            for (int j = 0; j < mat.numCols(); ++j) {
                std::cout << " " << host_data[i * mat.numCols() + j];
            }
            std::cout << " ]\n";
        }
         std::cout << std::endl;
    } catch (const std::runtime_error& e) {
        std::cerr << "Error printing matrix: " << e.what() << std::endl;
    }
}

// Helper function for approximate float comparison
bool approxEqual(float a, float b, float epsilon = 1e-5f) {
    return std::abs(a - b) < epsilon;
}

// Helper function to verify matrix contents
bool verifyMatrix(const std::string& label, const MatrixCL& mat, const std::vector<float>& expected, float epsilon = 1e-5f) {
    std::cout << "Verifying " << label << "..." << std::endl;
    if (static_cast<size_t>(mat.numRows() * mat.numCols()) != expected.size()) {
        std::cerr << "Verification failed: Dimension mismatch for " << label << ". Got "
                  << mat.numRows() << "x" << mat.numCols() << ", expected " << expected.size() << " elements." << std::endl;
        return false;
    }
    try {
        std::vector<float> actual = mat.copyToHost();
        bool match = true;
        for (size_t i = 0; i < actual.size(); ++i) {
            if (!approxEqual(actual[i], expected[i], epsilon)) {
                std::cerr << "Verification failed for " << label << " at index " << i
                          << ". Got " << actual[i] << ", expected " << expected[i] << std::endl;
                match = false;
                // Don't break, report all mismatches if desired, or break here for efficiency
                 break;
            }
        }
        if (match) {
            std::cout << label << " verified successfully." << std::endl;
        } else {
             std::cout << label << " verification failed." << std::endl;
        }
        return match;
    } catch (const std::runtime_error& e) {
        std::cerr << "Error verifying matrix " << label << ": " << e.what() << std::endl;
        return false;
    }
}


cl_ulong getElapsedTime(const cl::Event& event) {
    cl_ulong start = event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
    cl_ulong end = event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
    return end - start; // In nanoseconds
}

std::vector<float> fill_random(int rows, int cols) {
    std::vector<float> data(rows * cols);
    for (auto& val : data) {
        val = static_cast<float>(rand()) / RAND_MAX;
    }
    return data;
}


int main(int argc, char** argv) {

    std::cout << "--- OpenCL Setup ---" << std::endl;
    std::vector<cl::Platform> platforms;
    cl::Platform::get(&platforms);
    if (platforms.empty()) {
        std::cerr << "No OpenCL platforms found." << std::endl;
        return 1;
    }
    cl::Platform platform = platforms.front();
    std::cout << "Using Platform: " << platform.getInfo<CL_PLATFORM_NAME>() << std::endl;

    std::vector<cl::Device> devices;
    platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
    if (devices.empty()) {
        std::cout << "No GPU found, trying CPU..." << std::endl;
        platform.getDevices(CL_DEVICE_TYPE_CPU, &devices);
        if (devices.empty()) {
            std::cerr << "No OpenCL devices found." << std::endl;
            return 1;
        }
    }
    cl::Device device = devices.front();
    std::cout << "Using Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;

    cl::Context context(device);
    cl_int err;
    cl_command_queue cq = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &err);
    if (err != CL_SUCCESS) {
        std::cerr << "Failed to create command queue: " << err << std::endl;
        exit(1);
    }
    cl::CommandQueue queue(cq, true);


    std::vector<cl::Device> devices_to_init = {device};
    try {
        MatrixCL::initializeKernels(context, devices_to_init);
    } catch (const std::exception& e) {
        // Catching std::exception here because initializeKernels wraps cl::Error
        std::cerr << "FATAL ERROR during kernel initialization: " << e.what() << std::endl;
        // If the error was a BuildError, the log should have been printed
        // by the loadAndBuildProgram function within initializeKernels.
        return 1;
    }

    const std::vector<int> sizes = {4096};
    const int runs = 1;

    for (int size : sizes) {
        std::chrono::duration<double, std::milli> total_time(0);

        for (int i = 0; i < runs; ++i) {
            std::vector<float> dataA = fill_random(size, size);
            std::vector<float> dataB = fill_random(size, size);

            MatrixCL A(size, size, context, queue, &dataA);
            MatrixCL B(size, size, context, queue, &dataB);

            auto start = std::chrono::high_resolution_clock::now();
            MatrixCL C = A * B;
            queue.finish();
            auto end = std::chrono::high_resolution_clock::now();

            total_time += end - start;
        }

        double average_time = total_time.count() / runs;
        std::cout << size << "\t" << average_time << std::endl;
    }

    return 0;
}