diff --git a/carbon.py b/carbon.py new file mode 100644 index 0000000000000000000000000000000000000000..c3c93f6ca3341679e929beccc41f3249df2cb31f --- /dev/null +++ b/carbon.py @@ -0,0 +1,4 @@ +import pandas as pd + +df = pd.read_csv("emissions_naive.csv") +print(df[["duration", "emissions", "emissions_rate", "cpu_power", "gpu_power"]]) \ No newline at end of file diff --git a/emissions_naive.csv b/emissions_naive.csv new file mode 100644 index 0000000000000000000000000000000000000000..58e82658af71a1adc9178d6acc832a28a8e08527 --- /dev/null +++ b/emissions_naive.csv @@ -0,0 +1,2 @@ +timestamp,project_name,run_id,experiment_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue +2025-05-20T10:14:21,codecarbon,71f66f8a-476a-4c36-856b-d8f1ae3342b1,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,0.01018060278147459,5.3464753845869e-08,5.25162949517661e-06,216.36224151265606,0.0,10.0,3.7039807410446657e-07,0.0,1.672751580675443e-08,3.87125589911221e-07,Belgium,BEL,brussels capital,,,Linux-5.4.286-1.el8.elrepo.x86_64-x86_64-with-glibc2.28,3.12.8,3.0.1,2,Intel(R) Xeon(R) Gold 6346 CPU @ 3.10GHz,1,1 x NVIDIA A10,4.4383,50.842,2.0,machine,N,1.0 diff --git a/matrix_opencl.cpp b/matrix_opencl.cpp index bd052dc75ac7a1ff3d7837e80c5879db12d49416..6786c1c829d8c36002fa23d8a597fb59c53d56b2 100644 --- a/matrix_opencl.cpp +++ b/matrix_opencl.cpp @@ -77,7 +77,7 @@ const std::string kernel_source_transpose = R"( } )"; // NAIVE -const std::string kernel_source_matrix_mul = R"( +/*const std::string kernel_source_matrix_mul = R"( __kernel void matrix_mul(__global const float* A, __global const float* B, __global float* C, int A_rows, int A_cols, int B_cols) { int row = get_global_id(0); int col = get_global_id(1); @@ -85,10 +85,10 @@ const std::string kernel_source_matrix_mul = R"( C[row * B_cols + col] += A[row * A_cols + k] * B[k * B_cols + col]; } } -)"; +)";*/ // FASTER -/*const std::string kernel_source_matrix_mul = R"( +const std::string kernel_source_matrix_mul = R"( __kernel void matrix_mul(__global const float* A, __global const float* B, __global float* C, @@ -120,7 +120,7 @@ const std::string kernel_source_matrix_mul = R"( barrier(CLK_LOCAL_MEM_FENCE); } -})";*/ +})"; const std::string kernel_source_sigmoid = R"( __kernel void sigmoid(__global const float* input, __global float* output, int rows, int cols) { int idx = get_global_id(0); @@ -348,7 +348,7 @@ MatrixCL MatrixCL::operator+(const MatrixCL& other) const { } // NAIVE VERSION -MatrixCL MatrixCL::operator*(const MatrixCL& other) const { +/*MatrixCL MatrixCL::operator*(const MatrixCL& other) const { if (cols_ != other.rows_) throw std::runtime_error("Matrix dimension error."); @@ -368,10 +368,10 @@ MatrixCL MatrixCL::operator*(const MatrixCL& other) const { } return result; -} +}*/ // FASTER VERSION -/*MatrixCL MatrixCL::operator*(const MatrixCL& other) const { +MatrixCL MatrixCL::operator*(const MatrixCL& other) const { if (cols_ != other.rows_) throw std::runtime_error("Matrix dimension error."); @@ -404,7 +404,7 @@ MatrixCL MatrixCL::operator*(const MatrixCL& other) const { } return result; -}*/ +} MatrixCL MatrixCL::transpose() const { diff --git a/tau_profile_fast.txt b/tau_profile_fast.txt index 24730fba9f83d71f530231207ee5ef831860a6f7..7f483bf700325e5f06522d2ff9ab52ab0a9e5675 100644 --- a/tau_profile_fast.txt +++ b/tau_profile_fast.txt @@ -11,63 +11,4 @@ FUNCTION SUMMARY (total): 24.5 902 902 1 0 902003 cl_int clFinish(cl_command_queue) C 2.9 107 107 1 0 107413 cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C 1.8 66 66 2 0 33116 cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C - 0.4 14 14 9 0 1624 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C - 0.3 11 11 2 0 5759 cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C - 0.3 10 10 2 0 5494 WriteBuffer - 0.1 2 2 2 0 1276 cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C - 0.0 0.143 0.143 1 0 143 fill - 0.0 0.118 0.118 9 0 13 cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C - 0.0 0.031 0.031 4 0 8 cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C - 0.0 0.021 0.021 7 0 3 cl_int clReleaseCommandQueue(cl_command_queue) C - 0.0 0.012 0.012 18 0 1 cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C - 0.0 0.01 0.01 7 0 1 cl_int clRetainCommandQueue(cl_command_queue) C - 0.0 0.01 0.01 3 0 3 cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C - 0.0 0.008 0.008 9 0 1 cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C - 0.0 0.007 0.007 1 0 7 cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C - 0.0 0.007 0.007 18 0 0 cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C - 0.0 0.007 0.007 17 0 0 cl_int clRetainContext(cl_context) C - 0.0 0.005 0.005 18 0 0 cl_int clReleaseContext(cl_context) C - 0.0 0.005 0.005 9 0 1 cl_int clReleaseProgram(cl_program) C - 0.0 0.002 0.002 2 0 1 cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C - 0.0 0.002 0.002 2 0 1 cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C - 0.0 0.002 0.002 11 0 0 cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C - 0.0 0.001 0.001 2 0 0 cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C - 0.0 0.001 0.001 2 0 0 cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C - 0.0 0.001 0.001 1 0 1 cl_int clReleaseKernel(cl_kernel) C - 0.0 0.001 0.001 1 0 1 cl_int clRetainKernel(cl_kernel) C - -FUNCTION SUMMARY (mean): ---------------------------------------------------------------------------------------- -%Time Exclusive Inclusive #Call #Subrs Inclusive Name - msec total msec usec/call ---------------------------------------------------------------------------------------- -100.0 468 1,841 1 2.5 1841522 .TAU application - 49.7 363 915 0.5 79 1830958 taupreload_main - 24.6 452 452 0.5 0 904273 matrix_mul - 24.5 451 451 0.5 0 902003 cl_int clFinish(cl_command_queue) C - 2.9 53 53 0.5 0 107413 cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C - 1.8 33 33 1 0 33116 cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C - 0.4 7 7 4.5 0 1624 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C - 0.3 5 5 1 0 5759 cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C - 0.3 5 5 1 0 5494 WriteBuffer - 0.1 1 1 1 0 1276 cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C - 0.0 0.0715 0.0715 0.5 0 143 fill - 0.0 0.059 0.059 4.5 0 13 cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C - 0.0 0.0155 0.0155 2 0 8 cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C - 0.0 0.0105 0.0105 3.5 0 3 cl_int clReleaseCommandQueue(cl_command_queue) C - 0.0 0.006 0.006 9 0 1 cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C - 0.0 0.005 0.005 3.5 0 1 cl_int clRetainCommandQueue(cl_command_queue) C - 0.0 0.005 0.005 1.5 0 3 cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C - 0.0 0.004 0.004 4.5 0 1 cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C - 0.0 0.0035 0.0035 0.5 0 7 cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C - 0.0 0.0035 0.0035 9 0 0 cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C - 0.0 0.0035 0.0035 8.5 0 0 cl_int clRetainContext(cl_context) C - 0.0 0.0025 0.0025 9 0 0 cl_int clReleaseContext(cl_context) C - 0.0 0.0025 0.0025 4.5 0 1 cl_int clReleaseProgram(cl_program) C - 0.0 0.001 0.001 1 0 1 cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C - 0.0 0.001 0.001 1 0 1 cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C - 0.0 0.001 0.001 5.5 0 0 cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C - 0.0 0.0005 0.0005 1 0 0 cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C - 0.0 0.0005 0.0005 1 0 0 cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C - 0.0 0.0005 0.0005 0.5 0 1 cl_int clReleaseKernel(cl_kernel) C - 0.0 0.0005 0.0005 0.5 0 1 cl_int clRetainKernel(cl_kernel) C + 0.4 14 14 9 0 1624 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C \ No newline at end of file diff --git a/tau_profile_naive.txt b/tau_profile_naive.txt index 9da6b28a87745e385321ff11e01e53a6dd0fc099..4b94b99bb75f40bc701ad09c7c1a313de83743d0 100644 --- a/tau_profile_naive.txt +++ b/tau_profile_naive.txt @@ -11,63 +11,4 @@ FUNCTION SUMMARY (total): 45.8 11,061 11,061 1 0 11061306 matrix_mul 0.5 126 126 2 0 63131 cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C 0.5 115 115 1 0 115365 cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C - 0.1 15 15 9 0 1723 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C - 0.0 11 11 2 0 5755 cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C - 0.0 11 11 2 0 5503 WriteBuffer - 0.0 0.19 0.19 2 0 95 cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C - 0.0 0.143 0.143 1 0 143 fill - 0.0 0.123 0.123 9 0 14 cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C - 0.0 0.028 0.028 4 0 7 cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C - 0.0 0.024 0.024 7 0 3 cl_int clReleaseCommandQueue(cl_command_queue) C - 0.0 0.01 0.01 18 0 1 cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C - 0.0 0.009 0.009 3 0 3 cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C - 0.0 0.008 0.008 17 0 0 cl_int clRetainContext(cl_context) C - 0.0 0.006 0.006 1 0 6 cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C - 0.0 0.006 0.006 18 0 0 cl_int clReleaseContext(cl_context) C - 0.0 0.006 0.006 9 0 1 cl_int clReleaseProgram(cl_program) C - 0.0 0.006 0.006 7 0 1 cl_int clRetainCommandQueue(cl_command_queue) C - 0.0 0.006 0.006 9 0 1 cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C - 0.0 0.003 0.003 10 0 0 cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C - 0.0 0.002 0.002 2 0 1 cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C - 0.0 0.001 0.001 2 0 0 cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C - 0.0 0.001 0.001 2 0 0 cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C - 0.0 0.001 0.001 2 0 0 cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C - 0.0 0.001 0.001 18 0 0 cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C - 0.0 0.001 0.001 1 0 1 cl_int clReleaseKernel(cl_kernel) C - 0.0 0.001 0.001 1 0 1 cl_int clRetainKernel(cl_kernel) C - -FUNCTION SUMMARY (mean): ---------------------------------------------------------------------------------------- -%Time Exclusive Inclusive #Call #Subrs Inclusive Name - msec total msec usec/call ---------------------------------------------------------------------------------------- -100.0 502 12,066 1 2.5 12066881 .TAU application - 50.0 362 6,027 0.5 78.5 12055839 taupreload_main - 45.8 5,530 5,530 0.5 0 11061448 cl_int clFinish(cl_command_queue) C - 45.8 5,530 5,530 0.5 0 11061306 matrix_mul - 0.5 63 63 1 0 63131 cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C - 0.5 57 57 0.5 0 115365 cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C - 0.1 7 7 4.5 0 1723 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C - 0.0 5 5 1 0 5755 cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C - 0.0 5 5 1 0 5503 WriteBuffer - 0.0 0.095 0.095 1 0 95 cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C - 0.0 0.0716 0.0716 0.5 0 143 fill - 0.0 0.0615 0.0615 4.5 0 14 cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C - 0.0 0.014 0.014 2 0 7 cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C - 0.0 0.012 0.012 3.5 0 3 cl_int clReleaseCommandQueue(cl_command_queue) C - 0.0 0.005 0.005 9 0 1 cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C - 0.0 0.0045 0.0045 1.5 0 3 cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C - 0.0 0.004 0.004 8.5 0 0 cl_int clRetainContext(cl_context) C - 0.0 0.003 0.003 0.5 0 6 cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C - 0.0 0.003 0.003 9 0 0 cl_int clReleaseContext(cl_context) C - 0.0 0.003 0.003 4.5 0 1 cl_int clReleaseProgram(cl_program) C - 0.0 0.003 0.003 3.5 0 1 cl_int clRetainCommandQueue(cl_command_queue) C - 0.0 0.003 0.003 4.5 0 1 cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C - 0.0 0.0015 0.0015 5 0 0 cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C - 0.0 0.001 0.001 1 0 1 cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C - 0.0 0.0005 0.0005 1 0 0 cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C - 0.0 0.0005 0.0005 1 0 0 cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C - 0.0 0.0005 0.0005 1 0 0 cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C - 0.0 0.0005 0.0005 9 0 0 cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C - 0.0 0.0005 0.0005 0.5 0 1 cl_int clReleaseKernel(cl_kernel) C - 0.0 0.0005 0.0005 0.5 0 1 cl_int clRetainKernel(cl_kernel) C + 0.1 15 15 9 0 1723 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C \ No newline at end of file