diff --git a/carbon.py b/carbon.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3c93f6ca3341679e929beccc41f3249df2cb31f
--- /dev/null
+++ b/carbon.py
@@ -0,0 +1,4 @@
+import pandas as pd
+
+df = pd.read_csv("emissions_naive.csv")
+print(df[["duration", "emissions", "emissions_rate", "cpu_power", "gpu_power"]])
\ No newline at end of file
diff --git a/emissions_naive.csv b/emissions_naive.csv
new file mode 100644
index 0000000000000000000000000000000000000000..58e82658af71a1adc9178d6acc832a28a8e08527
--- /dev/null
+++ b/emissions_naive.csv
@@ -0,0 +1,2 @@
+timestamp,project_name,run_id,experiment_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
+2025-05-20T10:14:21,codecarbon,71f66f8a-476a-4c36-856b-d8f1ae3342b1,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,0.01018060278147459,5.3464753845869e-08,5.25162949517661e-06,216.36224151265606,0.0,10.0,3.7039807410446657e-07,0.0,1.672751580675443e-08,3.87125589911221e-07,Belgium,BEL,brussels capital,,,Linux-5.4.286-1.el8.elrepo.x86_64-x86_64-with-glibc2.28,3.12.8,3.0.1,2,Intel(R) Xeon(R) Gold 6346 CPU @ 3.10GHz,1,1 x NVIDIA A10,4.4383,50.842,2.0,machine,N,1.0
diff --git a/matrix_opencl.cpp b/matrix_opencl.cpp
index bd052dc75ac7a1ff3d7837e80c5879db12d49416..6786c1c829d8c36002fa23d8a597fb59c53d56b2 100644
--- a/matrix_opencl.cpp
+++ b/matrix_opencl.cpp
@@ -77,7 +77,7 @@ const std::string kernel_source_transpose = R"(
     }
 )";
 // NAIVE
-const std::string kernel_source_matrix_mul = R"(
+/*const std::string kernel_source_matrix_mul = R"(
     __kernel void matrix_mul(__global const float* A, __global const float* B, __global float* C, int A_rows, int A_cols, int B_cols) {
         int row = get_global_id(0);
         int col = get_global_id(1);
@@ -85,10 +85,10 @@ const std::string kernel_source_matrix_mul = R"(
             C[row * B_cols + col] += A[row * A_cols + k] * B[k * B_cols + col];
         }
     }
-)";
+)";*/
 
 // FASTER
-/*const std::string kernel_source_matrix_mul = R"(
+const std::string kernel_source_matrix_mul = R"(
     __kernel void matrix_mul(__global const float* A,
                          __global const float* B,
                          __global float* C,
@@ -120,7 +120,7 @@ const std::string kernel_source_matrix_mul = R"(
 
         barrier(CLK_LOCAL_MEM_FENCE);
     }
-})";*/
+})";
 const std::string kernel_source_sigmoid = R"(
     __kernel void sigmoid(__global const float* input, __global float* output, int rows, int cols) {
         int idx = get_global_id(0);
@@ -348,7 +348,7 @@ MatrixCL MatrixCL::operator+(const MatrixCL& other) const {
 }
 
 // NAIVE VERSION
-MatrixCL MatrixCL::operator*(const MatrixCL& other) const {
+/*MatrixCL MatrixCL::operator*(const MatrixCL& other) const {
     if (cols_ != other.rows_)
         throw std::runtime_error("Matrix dimension error.");
 
@@ -368,10 +368,10 @@ MatrixCL MatrixCL::operator*(const MatrixCL& other) const {
     }
 
     return result;
-}
+}*/
 
 // FASTER VERSION
-/*MatrixCL MatrixCL::operator*(const MatrixCL& other) const {
+MatrixCL MatrixCL::operator*(const MatrixCL& other) const {
     if (cols_ != other.rows_)
         throw std::runtime_error("Matrix dimension error.");
 
@@ -404,7 +404,7 @@ MatrixCL MatrixCL::operator*(const MatrixCL& other) const {
     }
 
     return result;
-}*/
+}
 
 
 MatrixCL MatrixCL::transpose() const {
diff --git a/tau_profile_fast.txt b/tau_profile_fast.txt
index 24730fba9f83d71f530231207ee5ef831860a6f7..7f483bf700325e5f06522d2ff9ab52ab0a9e5675 100644
--- a/tau_profile_fast.txt
+++ b/tau_profile_fast.txt
@@ -11,63 +11,4 @@ FUNCTION SUMMARY (total):
  24.5          902          902           1           0     902003 cl_int clFinish(cl_command_queue) C
   2.9          107          107           1           0     107413 cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C
   1.8           66           66           2           0      33116 cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C
-  0.4           14           14           9           0       1624 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C
-  0.3           11           11           2           0       5759 cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C
-  0.3           10           10           2           0       5494 WriteBuffer
-  0.1            2            2           2           0       1276 cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C
-  0.0        0.143        0.143           1           0        143 fill
-  0.0        0.118        0.118           9           0         13 cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C
-  0.0        0.031        0.031           4           0          8 cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C
-  0.0        0.021        0.021           7           0          3 cl_int clReleaseCommandQueue(cl_command_queue) C
-  0.0        0.012        0.012          18           0          1 cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C
-  0.0         0.01         0.01           7           0          1 cl_int clRetainCommandQueue(cl_command_queue) C
-  0.0         0.01         0.01           3           0          3 cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C
-  0.0        0.008        0.008           9           0          1 cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C
-  0.0        0.007        0.007           1           0          7 cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C
-  0.0        0.007        0.007          18           0          0 cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C
-  0.0        0.007        0.007          17           0          0 cl_int clRetainContext(cl_context) C
-  0.0        0.005        0.005          18           0          0 cl_int clReleaseContext(cl_context) C
-  0.0        0.005        0.005           9           0          1 cl_int clReleaseProgram(cl_program) C
-  0.0        0.002        0.002           2           0          1 cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C
-  0.0        0.002        0.002           2           0          1 cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C
-  0.0        0.002        0.002          11           0          0 cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C
-  0.0        0.001        0.001           2           0          0 cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C
-  0.0        0.001        0.001           2           0          0 cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C
-  0.0        0.001        0.001           1           0          1 cl_int clReleaseKernel(cl_kernel) C
-  0.0        0.001        0.001           1           0          1 cl_int clRetainKernel(cl_kernel) C
-
-FUNCTION SUMMARY (mean):
----------------------------------------------------------------------------------------
-%Time    Exclusive    Inclusive       #Call      #Subrs  Inclusive Name
-              msec   total msec                          usec/call 
----------------------------------------------------------------------------------------
-100.0          468        1,841           1         2.5    1841522 .TAU application
- 49.7          363          915         0.5          79    1830958 taupreload_main
- 24.6          452          452         0.5           0     904273 matrix_mul
- 24.5          451          451         0.5           0     902003 cl_int clFinish(cl_command_queue) C
-  2.9           53           53         0.5           0     107413 cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C
-  1.8           33           33           1           0      33116 cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C
-  0.4            7            7         4.5           0       1624 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C
-  0.3            5            5           1           0       5759 cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C
-  0.3            5            5           1           0       5494 WriteBuffer
-  0.1            1            1           1           0       1276 cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C
-  0.0       0.0715       0.0715         0.5           0        143 fill
-  0.0        0.059        0.059         4.5           0         13 cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C
-  0.0       0.0155       0.0155           2           0          8 cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C
-  0.0       0.0105       0.0105         3.5           0          3 cl_int clReleaseCommandQueue(cl_command_queue) C
-  0.0        0.006        0.006           9           0          1 cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C
-  0.0        0.005        0.005         3.5           0          1 cl_int clRetainCommandQueue(cl_command_queue) C
-  0.0        0.005        0.005         1.5           0          3 cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C
-  0.0        0.004        0.004         4.5           0          1 cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C
-  0.0       0.0035       0.0035         0.5           0          7 cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C
-  0.0       0.0035       0.0035           9           0          0 cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C
-  0.0       0.0035       0.0035         8.5           0          0 cl_int clRetainContext(cl_context) C
-  0.0       0.0025       0.0025           9           0          0 cl_int clReleaseContext(cl_context) C
-  0.0       0.0025       0.0025         4.5           0          1 cl_int clReleaseProgram(cl_program) C
-  0.0        0.001        0.001           1           0          1 cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C
-  0.0        0.001        0.001           1           0          1 cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C
-  0.0        0.001        0.001         5.5           0          0 cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C
-  0.0       0.0005       0.0005           1           0          0 cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C
-  0.0       0.0005       0.0005           1           0          0 cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C
-  0.0       0.0005       0.0005         0.5           0          1 cl_int clReleaseKernel(cl_kernel) C
-  0.0       0.0005       0.0005         0.5           0          1 cl_int clRetainKernel(cl_kernel) C
+  0.4           14           14           9           0       1624 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C
\ No newline at end of file
diff --git a/tau_profile_naive.txt b/tau_profile_naive.txt
index 9da6b28a87745e385321ff11e01e53a6dd0fc099..4b94b99bb75f40bc701ad09c7c1a313de83743d0 100644
--- a/tau_profile_naive.txt
+++ b/tau_profile_naive.txt
@@ -11,63 +11,4 @@ FUNCTION SUMMARY (total):
  45.8       11,061       11,061           1           0   11061306 matrix_mul
   0.5          126          126           2           0      63131 cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C
   0.5          115          115           1           0     115365 cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C
-  0.1           15           15           9           0       1723 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C
-  0.0           11           11           2           0       5755 cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C
-  0.0           11           11           2           0       5503 WriteBuffer
-  0.0         0.19         0.19           2           0         95 cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C
-  0.0        0.143        0.143           1           0        143 fill
-  0.0        0.123        0.123           9           0         14 cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C
-  0.0        0.028        0.028           4           0          7 cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C
-  0.0        0.024        0.024           7           0          3 cl_int clReleaseCommandQueue(cl_command_queue) C
-  0.0         0.01         0.01          18           0          1 cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C
-  0.0        0.009        0.009           3           0          3 cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C
-  0.0        0.008        0.008          17           0          0 cl_int clRetainContext(cl_context) C
-  0.0        0.006        0.006           1           0          6 cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C
-  0.0        0.006        0.006          18           0          0 cl_int clReleaseContext(cl_context) C
-  0.0        0.006        0.006           9           0          1 cl_int clReleaseProgram(cl_program) C
-  0.0        0.006        0.006           7           0          1 cl_int clRetainCommandQueue(cl_command_queue) C
-  0.0        0.006        0.006           9           0          1 cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C
-  0.0        0.003        0.003          10           0          0 cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C
-  0.0        0.002        0.002           2           0          1 cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C
-  0.0        0.001        0.001           2           0          0 cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C
-  0.0        0.001        0.001           2           0          0 cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C
-  0.0        0.001        0.001           2           0          0 cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C
-  0.0        0.001        0.001          18           0          0 cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C
-  0.0        0.001        0.001           1           0          1 cl_int clReleaseKernel(cl_kernel) C
-  0.0        0.001        0.001           1           0          1 cl_int clRetainKernel(cl_kernel) C
-
-FUNCTION SUMMARY (mean):
----------------------------------------------------------------------------------------
-%Time    Exclusive    Inclusive       #Call      #Subrs  Inclusive Name
-              msec   total msec                          usec/call 
----------------------------------------------------------------------------------------
-100.0          502       12,066           1         2.5   12066881 .TAU application
- 50.0          362        6,027         0.5        78.5   12055839 taupreload_main
- 45.8        5,530        5,530         0.5           0   11061448 cl_int clFinish(cl_command_queue) C
- 45.8        5,530        5,530         0.5           0   11061306 matrix_mul
-  0.5           63           63           1           0      63131 cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C
-  0.5           57           57         0.5           0     115365 cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C
-  0.1            7            7         4.5           0       1723 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C
-  0.0            5            5           1           0       5755 cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C
-  0.0            5            5           1           0       5503 WriteBuffer
-  0.0        0.095        0.095           1           0         95 cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C
-  0.0       0.0716       0.0716         0.5           0        143 fill
-  0.0       0.0615       0.0615         4.5           0         14 cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C
-  0.0        0.014        0.014           2           0          7 cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C
-  0.0        0.012        0.012         3.5           0          3 cl_int clReleaseCommandQueue(cl_command_queue) C
-  0.0        0.005        0.005           9           0          1 cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C
-  0.0       0.0045       0.0045         1.5           0          3 cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C
-  0.0        0.004        0.004         8.5           0          0 cl_int clRetainContext(cl_context) C
-  0.0        0.003        0.003         0.5           0          6 cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C
-  0.0        0.003        0.003           9           0          0 cl_int clReleaseContext(cl_context) C
-  0.0        0.003        0.003         4.5           0          1 cl_int clReleaseProgram(cl_program) C
-  0.0        0.003        0.003         3.5           0          1 cl_int clRetainCommandQueue(cl_command_queue) C
-  0.0        0.003        0.003         4.5           0          1 cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C
-  0.0       0.0015       0.0015           5           0          0 cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C
-  0.0        0.001        0.001           1           0          1 cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C
-  0.0       0.0005       0.0005           1           0          0 cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C
-  0.0       0.0005       0.0005           1           0          0 cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C
-  0.0       0.0005       0.0005           1           0          0 cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C
-  0.0       0.0005       0.0005           9           0          0 cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C
-  0.0       0.0005       0.0005         0.5           0          1 cl_int clReleaseKernel(cl_kernel) C
-  0.0       0.0005       0.0005         0.5           0          1 cl_int clRetainKernel(cl_kernel) C
+  0.1           15           15           9           0       1723 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C
\ No newline at end of file