diff --git a/benchmark b/benchmark new file mode 100755 index 0000000000000000000000000000000000000000..a73799e6c19b7ab5e290194f5fb891ad881e6801 Binary files /dev/null and b/benchmark differ diff --git a/events.0.edf b/events.0.edf new file mode 100644 index 0000000000000000000000000000000000000000..5a3bdf42f2cf8b4d262b067818c3be5738cb2b4d --- /dev/null +++ b/events.0.edf @@ -0,0 +1,135 @@ +134 dynamic_trace_events +# FunctionId Group Tag "Name Type" Parameters +0 TAUEVENT 0 ".TAU <unknown event>" TriggerValue +1 TAU_DEFAULT 0 ".TAU application " EntryExit +12 TAU_DEFAULT 0 "taupreload_main " EntryExit +13 TAU_USER 0 "cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C " EntryExit +14 TAU_USER 0 "cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C " EntryExit +15 TAU_USER 0 "cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C " EntryExit +16 TAU_USER 0 "cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C " EntryExit +17 TAU_USER 0 "cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C " EntryExit +18 TAU_USER 0 "cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C " EntryExit +19 TAU_USER 0 "cl_int clRetainCommandQueue(cl_command_queue) C " EntryExit +20 TAU_USER 0 "cl_int clRetainContext(cl_context) C " EntryExit +21 TAU_USER 0 "cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C " EntryExit +22 TAU_USER 0 "cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C " EntryExit +23 TAU_USER 0 "cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C " EntryExit +24 TAU_USER 0 "cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C " EntryExit +25 TAU_USER 0 "cl_int clReleaseContext(cl_context) C " EntryExit +26 TAU_USER 0 "cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C " EntryExit +27 TAU_USER 0 "cl_int clReleaseProgram(cl_program) C " EntryExit +28 TAU_USER 0 "cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C " EntryExit +29 TAU_USER 0 "cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C " EntryExit +30 TAU_USER 0 "cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C " EntryExit +33 TAU_USER 0 "WriteBuffer " EntryExit +35 TAU_USER 0 "cl_int clReleaseCommandQueue(cl_command_queue) C " EntryExit +36 TAU_USER 0 "cl_int clRetainKernel(cl_kernel) C " EntryExit +37 TAU_USER 0 "cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C " EntryExit +38 TAU_USER 0 "cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C " EntryExit +39 TAU_USER 0 "cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C " EntryExit +40 TAU_USER 0 "cl_int clReleaseKernel(cl_kernel) C " EntryExit +41 TAU_USER 0 "cl_int clFinish(cl_command_queue) C " EntryExit +42 TAU_USER 0 "fill " EntryExit +43 TAU_USER 0 "matrix_mul " EntryExit +2 TAUEVENT 0 "Bytes copied from Host to Device" TriggerValue +3 TAUEVENT 0 "Bytes copied from Device to Host" TriggerValue +4 TAUEVENT 0 "Bytes copied from Device to Device" TriggerValue +5 TAUEVENT 0 "Correlation ID" TriggerValue +6 TAUEVENT 0 "Unified Memory Bytes copied from Host to Device" TriggerValue +7 TAUEVENT 0 "Unified Memory Bytes copied from Device to Host" TriggerValue +8 TAUEVENT 0 "Unified Memory CPU Page Faults" TriggerValue +9 TAUEVENT 0 "Floating Point Operations" TriggerValue +10 TAUEVENT 0 "Memory Operations" TriggerValue +11 TAUEVENT 0 "Control Operations" TriggerValue +31 TAUEVENT 0 "Time in Queue (us)" TriggerValue +32 TAUEVENT 0 "Time in Submitted (us)" TriggerValue +34 TAUEVENT 0 "Correlation ID : WriteBuffer" TriggerValue +44 TAUEVENT 0 "CPU Cores | 16" TriggerValue +45 TAUEVENT 0 "CPU MHz | 3599.999" TriggerValue +46 TAUEVENT 0 "CPU Type | Intel(R) Xeon(R) Gold 6346 CPU @ 3.10GHz" TriggerValue +47 TAUEVENT 0 "CPU Vendor | GenuineIntel" TriggerValue +48 TAUEVENT 0 "CPUs Allowed | 00000001,00000001" TriggerValue +49 TAUEVENT 0 "CPUs Allowed List | 0,32" TriggerValue +50 TAUEVENT 0 "CWD | /auto/home/users/j/h/jhano/P3" TriggerValue +51 TAUEVENT 0 "Cache Size | 36864 KB" TriggerValue +52 TAUEVENT 0 "Command Line | ./main" TriggerValue +53 TAUEVENT 0 "Executable | /auto/home/users/j/h/jhano/P3/main" TriggerValue +54 TAUEVENT 0 "Hostname | mb-icg102.cism.ucl.ac.be" TriggerValue +55 TAUEVENT 0 "Local Time | 2025-05-20T09:39:54+02:00" TriggerValue +56 TAUEVENT 0 "Memories Allowed | 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000003" TriggerValue +57 TAUEVENT 0 "Memories Allowed List | 0-1" TriggerValue +58 TAUEVENT 0 "Memory Size | 263747860 kB" TriggerValue +59 TAUEVENT 0 "Node Name | mb-icg102.cism.ucl.ac.be" TriggerValue +60 TAUEVENT 0 "OS Machine | x86_64" TriggerValue +61 TAUEVENT 0 "OS Name | Linux" TriggerValue +62 TAUEVENT 0 "OS Release | 5.4.286-1.el8.elrepo.x86_64" TriggerValue +63 TAUEVENT 0 "OS Version | #1 SMP Sun Nov 17 11:28:26 EST 2024" TriggerValue +64 TAUEVENT 0 "Starting Timestamp | 1747726794996778" TriggerValue +65 TAUEVENT 0 "TAU Architecture | default" TriggerValue +66 TAUEVENT 0 "TAU Config | -opencl=/opt/sw/arch/easybuild/2023b/software/CUDA/12.1.1/targets/x86_64-linux/" TriggerValue +67 TAUEVENT 0 "TAU Makefile | /opt/sw/noarch/manual/2023b/softwares/TAU2/tau2/x86_64/lib/Makefile.tau" TriggerValue +68 TAUEVENT 0 "TAU Version | 2.34-git" TriggerValue +69 TAUEVENT 0 "TAU_BFD_LOOKUP | on" TriggerValue +70 TAUEVENT 0 "TAU_CALLPATH_DEPTH | 2" TriggerValue +71 TAUEVENT 0 "TAU_CALLSITE_DEPTH | 1" TriggerValue +72 TAUEVENT 0 "TAU_CUDA_BINARY_EXE | " TriggerValue +73 TAUEVENT 0 "TAU_CUPTI_API | runtime" TriggerValue +74 TAUEVENT 0 "TAU_CUPTI_PC | off" TriggerValue +75 TAUEVENT 0 "TAU_CURRENT_TIMER_EXIT_PARAMS | off" TriggerValue +76 TAUEVENT 0 "TAU_EBS_KEEP_UNRESOLVED_ADDR | off" TriggerValue +77 TAUEVENT 0 "TAU_IBM_BG_HWP_COUNTERS | off" TriggerValue +78 TAUEVENT 0 "TAU_MEASURE_TAU | off" TriggerValue +79 TAUEVENT 0 "TAU_MEMDBG_PROTECT_ABOVE | off" TriggerValue +80 TAUEVENT 0 "TAU_MEMDBG_PROTECT_BELOW | off" TriggerValue +81 TAUEVENT 0 "TAU_MEMDBG_PROTECT_FREE | off" TriggerValue +82 TAUEVENT 0 "TAU_MEMMGR_MAX_BLOCKS | 64" TriggerValue +83 TAUEVENT 0 "TAU_OPENMP_RUNTIME | on" TriggerValue +84 TAUEVENT 0 "TAU_OPENMP_RUNTIME_EVENTS | on" TriggerValue +85 TAUEVENT 0 "TAU_OPENMP_RUNTIME_STATES | off" TriggerValue +86 TAUEVENT 0 "TAU_OUTPUT_CUDA_CSV | off" TriggerValue +87 TAUEVENT 0 "TAU_PAPI_MULTIPLEXING | off" TriggerValue +88 TAUEVENT 0 "TAU_PROFILE | off" TriggerValue +89 TAUEVENT 0 "TAU_PROFILE_FORMAT | profile" TriggerValue +90 TAUEVENT 0 "TAU_RECYCLE_THREADS | off" TriggerValue +91 TAUEVENT 0 "TAU_REGION_ADDRESSES | off" TriggerValue +92 TAUEVENT 0 "TAU_SAMPLING | off" TriggerValue +93 TAUEVENT 0 "TAU_SHOW_MEMORY_FUNCTIONS | off" TriggerValue +94 TAUEVENT 0 "TAU_SIGNALS_GDB | off" TriggerValue +95 TAUEVENT 0 "TAU_SYNCHRONIZE_CLOCKS | off" TriggerValue +96 TAUEVENT 0 "TAU_THROTTLE | on" TriggerValue +97 TAUEVENT 0 "TAU_THROTTLE_NUMCALLS | 100000" TriggerValue +98 TAUEVENT 0 "TAU_THROTTLE_PERCALL | 10" TriggerValue +99 TAUEVENT 0 "TAU_TRACE | on" TriggerValue +100 TAUEVENT 0 "TAU_TRACE_FORMAT | tau" TriggerValue +101 TAUEVENT 0 "TAU_TRACK_CUDA_CDP | off" TriggerValue +102 TAUEVENT 0 "TAU_TRACK_CUDA_ENV | off" TriggerValue +103 TAUEVENT 0 "TAU_TRACK_CUDA_INSTRUCTIONS | " TriggerValue +104 TAUEVENT 0 "TAU_TRACK_CUDA_SASS | off" TriggerValue +105 TAUEVENT 0 "TAU_TRACK_HEADROOM | off" TriggerValue +106 TAUEVENT 0 "TAU_TRACK_HEAP | off" TriggerValue +107 TAUEVENT 0 "TAU_TRACK_IO_PARAMS | off" TriggerValue +108 TAUEVENT 0 "TAU_TRACK_MEMORY_FOOTPRINT | off" TriggerValue +109 TAUEVENT 0 "TAU_TRACK_MEMORY_LEAKS | off" TriggerValue +110 TAUEVENT 0 "TAU_TRACK_SIGNALS | off" TriggerValue +111 TAUEVENT 0 "TAU_TRACK_UNIFIED_MEMORY | off" TriggerValue +112 TAUEVENT 0 "TAU_VERBOSE_RANK | -1" TriggerValue +113 TAUEVENT 0 "Timestamp | 1747726794996859" TriggerValue +114 TAUEVENT 0 "UTC Time | 2025-05-20T07:39:54Z" TriggerValue +115 TAUEVENT 0 "pid | 2856919" TriggerValue +116 TAUEVENT 0 "tid | 2856919" TriggerValue +117 TAUEVENT 0 "username | jhano" TriggerValue +60000 TRACER 0 "EV_INIT" none +60001 TRACER 0 "FLUSH" EntryExit +60003 TRACER 0 "FLUSH_CLOSE" none +60004 TRACER 0 "FLUSH_INITM" none +60005 TRACER 0 "WALL_CLOCK" none +60006 TRACER 0 "CONT_EVENT" none +60007 TAU_MESSAGE -7 "MESSAGE_SEND" par +60008 TAU_MESSAGE -8 "MESSAGE_RECV" par +70000 TAUEVENT 0 "ONESIDED_MESSAGE_SEND" TriggerValue +70001 TAUEVENT 0 "ONESIDED_MESSAGE_RECV" TriggerValue +70005 TAUEVENT 0 "ONESIDED_MESSAGE_RECIPROCAL_SEND" TriggerValue +70006 TAUEVENT 0 "ONESIDED_MESSAGE_RECIPROCAL_RECV" TriggerValue +70004 TAUEVENT 0 "ONESIDED_MESSAGE" TriggerValue +70002 TAUEVENT 0 "ONESIDED_MESSAGE_ID_TriggerValueT1" TriggerValue +70003 TAUEVENT 0 "ONESIDED_MESSAGE_ID_TriggerValueT2" TriggerValue diff --git a/main b/main old mode 100644 new mode 100755 index 841b3a3a9ba9d36d0501888218945734f692e8b7..cebc82616920ce1b2b3071bba03e9d01829c6ac9 Binary files a/main and b/main differ diff --git a/profile.0.0.0 b/profile.0.0.0 new file mode 100644 index 0000000000000000000000000000000000000000..d2a5e35e580edf498781ae1f4718016c3102f26f --- /dev/null +++ b/profile.0.0.0 @@ -0,0 +1,33 @@ +27 templated_functions_MULTI_TAUGPU_TIME +# Name Calls Subrs Excl Incl ProfileCalls # <metadata><attribute><name>Metric Name</name><value>TAUGPU_TIME</value></attribute><attribute><name>CPU Cores</name><value>16</value></attribute><attribute><name>CPU MHz</name><value>3578.336</value></attribute><attribute><name>CPU Type</name><value>Intel(R) Xeon(R) Gold 6346 CPU @ 3.10GHz</value></attribute><attribute><name>CPU Vendor</name><value>GenuineIntel</value></attribute><attribute><name>CPUs Allowed</name><value>00020000,00020000</value></attribute><attribute><name>CPUs Allowed List</name><value>17,49</value></attribute><attribute><name>CWD</name><value>/auto/home/users/j/h/jhano/P3</value></attribute><attribute><name>Cache Size</name><value>36864 KB</value></attribute><attribute><name>Command Line</name><value>./main</value></attribute><attribute><name>Ending Timestamp</name><value>1747646983158546</value></attribute><attribute><name>Executable</name><value>/auto/home/users/j/h/jhano/P3/main</value></attribute><attribute><name>Hostname</name><value>mb-icg102.cism.ucl.ac.be</value></attribute><attribute><name>Local Time</name><value>2025-05-19T11:29:41+02:00</value></attribute><attribute><name>Memories Allowed</name><value>00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000003</value></attribute><attribute><name>Memories Allowed List</name><value>0-1</value></attribute><attribute><name>Memory Size</name><value>263747860 kB</value></attribute><attribute><name>Node Name</name><value>mb-icg102.cism.ucl.ac.be</value></attribute><attribute><name>OS Machine</name><value>x86_64</value></attribute><attribute><name>OS Name</name><value>Linux</value></attribute><attribute><name>OS Release</name><value>5.4.286-1.el8.elrepo.x86_64</value></attribute><attribute><name>OS Version</name><value>#1 SMP Sun Nov 17 11:28:26 EST 2024</value></attribute><attribute><name>Starting Timestamp</name><value>1747646981315961</value></attribute><attribute><name>TAU Architecture</name><value>default</value></attribute><attribute><name>TAU Config</name><value> -opencl=/opt/sw/arch/easybuild/2023b/software/CUDA/12.1.1/targets/x86_64-linux/</value></attribute><attribute><name>TAU Makefile</name><value>/opt/sw/noarch/manual/2023b/softwares/TAU2/tau2/x86_64/lib/Makefile.tau</value></attribute><attribute><name>TAU Version</name><value>2.34-git</value></attribute><attribute><name>TAU_BFD_LOOKUP</name><value>on</value></attribute><attribute><name>TAU_CALLPATH</name><value>off</value></attribute><attribute><name>TAU_CALLPATH_DEPTH</name><value>2</value></attribute><attribute><name>TAU_CALLSITE_DEPTH</name><value>1</value></attribute><attribute><name>TAU_COMPENSATE</name><value>off</value></attribute><attribute><name>TAU_CUDA_BINARY_EXE</name><value></value></attribute><attribute><name>TAU_CUPTI_API</name><value>runtime</value></attribute><attribute><name>TAU_CUPTI_PC</name><value>off</value></attribute><attribute><name>TAU_CURRENT_TIMER_EXIT_PARAMS</name><value>on</value></attribute><attribute><name>TAU_EBS_KEEP_UNRESOLVED_ADDR</name><value>off</value></attribute><attribute><name>TAU_ENABLE_THREAD_CONTEXT</name><value>off</value></attribute><attribute><name>TAU_IBM_BG_HWP_COUNTERS</name><value>off</value></attribute><attribute><name>TAU_MEASURE_TAU</name><value>off</value></attribute><attribute><name>TAU_MEMDBG_PROTECT_ABOVE</name><value>off</value></attribute><attribute><name>TAU_MEMDBG_PROTECT_BELOW</name><value>off</value></attribute><attribute><name>TAU_MEMDBG_PROTECT_FREE</name><value>off</value></attribute><attribute><name>TAU_MEMMGR_MAX_BLOCKS</name><value>64</value></attribute><attribute><name>TAU_OPENMP_RUNTIME</name><value>on</value></attribute><attribute><name>TAU_OPENMP_RUNTIME_EVENTS</name><value>on</value></attribute><attribute><name>TAU_OPENMP_RUNTIME_STATES</name><value>off</value></attribute><attribute><name>TAU_OUTPUT_CUDA_CSV</name><value>off</value></attribute><attribute><name>TAU_PAPI_MULTIPLEXING</name><value>off</value></attribute><attribute><name>TAU_PROFILE</name><value>on</value></attribute><attribute><name>TAU_PROFILE_FORMAT</name><value>profile</value></attribute><attribute><name>TAU_RECYCLE_THREADS</name><value>off</value></attribute><attribute><name>TAU_REGION_ADDRESSES</name><value>off</value></attribute><attribute><name>TAU_SAMPLING</name><value>off</value></attribute><attribute><name>TAU_SHOW_MEMORY_FUNCTIONS</name><value>off</value></attribute><attribute><name>TAU_SIGNALS_GDB</name><value>off</value></attribute><attribute><name>TAU_THROTTLE</name><value>on</value></attribute><attribute><name>TAU_THROTTLE_NUMCALLS</name><value>100000</value></attribute><attribute><name>TAU_THROTTLE_PERCALL</name><value>10</value></attribute><attribute><name>TAU_TRACE</name><value>off</value></attribute><attribute><name>TAU_TRACE_FORMAT</name><value>tau</value></attribute><attribute><name>TAU_TRACK_CUDA_CDP</name><value>off</value></attribute><attribute><name>TAU_TRACK_CUDA_ENV</name><value>off</value></attribute><attribute><name>TAU_TRACK_CUDA_INSTRUCTIONS</name><value></value></attribute><attribute><name>TAU_TRACK_CUDA_SASS</name><value>off</value></attribute><attribute><name>TAU_TRACK_HEADROOM</name><value>off</value></attribute><attribute><name>TAU_TRACK_HEAP</name><value>off</value></attribute><attribute><name>TAU_TRACK_IO_PARAMS</name><value>off</value></attribute><attribute><name>TAU_TRACK_MEMORY_FOOTPRINT</name><value>off</value></attribute><attribute><name>TAU_TRACK_MEMORY_LEAKS</name><value>off</value></attribute><attribute><name>TAU_TRACK_SIGNALS</name><value>off</value></attribute><attribute><name>TAU_TRACK_UNIFIED_MEMORY</name><value>off</value></attribute><attribute><name>TAU_VERBOSE_RANK</name><value>-1</value></attribute><attribute><name>Timestamp</name><value>1747646981316029</value></attribute><attribute><name>UTC Time</name><value>2025-05-19T09:29:41Z</value></attribute><attribute><name>pid</name><value>2612340</value></attribute><attribute><name>tid</name><value>2612340</value></attribute><attribute><name>username</name><value>jhano</value></attribute><attribute><name>Ending Timestamp</name><value>1747646983158546</value></attribute></metadata> +".TAU application" 1 1 12295 1843253 0 GROUP="TAU_DEFAULT" +"taupreload_main" 1 158 726369 1830958 0 GROUP="TAU_DEFAULT" +"cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C" 2 0 66232 66232 0 GROUP="TAU_USER" +"cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C" 2 0 2 2 0 GROUP="TAU_USER" +"cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C" 2 0 1 1 0 GROUP="TAU_USER" +"cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C" 4 0 31 31 0 GROUP="TAU_USER" +"cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C" 1 0 107413 107413 0 GROUP="TAU_USER" +"cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C" 1 0 7 7 0 GROUP="TAU_USER" +"cl_int clRetainCommandQueue(cl_command_queue) C" 7 0 10 10 0 GROUP="TAU_USER" +"cl_int clRetainContext(cl_context) C" 17 0 7 7 0 GROUP="TAU_USER" +"cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C" 9 0 8 8 0 GROUP="TAU_USER" +"cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C" 9 0 14619 14619 0 GROUP="TAU_USER" +"cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C" 18 0 12 12 0 GROUP="TAU_USER" +"cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C" 18 0 7 7 0 GROUP="TAU_USER" +"cl_int clReleaseContext(cl_context) C" 18 0 5 5 0 GROUP="TAU_USER" +"cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C" 9 0 118 118 0 GROUP="TAU_USER" +"cl_int clReleaseProgram(cl_program) C" 9 0 5 5 0 GROUP="TAU_USER" +"cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C" 3 0 10 10 0 GROUP="TAU_USER" +"cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C" 2 0 11518 11518 0 GROUP="TAU_USER" +"cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C" 2 0 1 1 0 GROUP="TAU_USER" +"cl_int clReleaseCommandQueue(cl_command_queue) C" 7 0 21 21 0 GROUP="TAU_USER" +"cl_int clRetainKernel(cl_kernel) C" 1 0 1 1 0 GROUP="TAU_USER" +"cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C" 11 0 2 2 0 GROUP="TAU_USER" +"cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C" 2 0 2553 2553 0 GROUP="TAU_USER" +"cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C" 2 0 2 2 0 GROUP="TAU_USER" +"cl_int clReleaseKernel(cl_kernel) C" 1 0 1 1 0 GROUP="TAU_USER" +"cl_int clFinish(cl_command_queue) C" 1 0 902003 902003 0 GROUP="TAU_USER" +0 aggregates +1 userevents +# eventname numevents max min mean sumsqr +"Bytes copied from Host to Device" 4 67108864 67108864 67108864 1.801439850948198E+16 diff --git a/profile.0.0.1 b/profile.0.0.1 new file mode 100644 index 0000000000000000000000000000000000000000..3b9aada052f976a61f6f340fd97e9dca099f73ed --- /dev/null +++ b/profile.0.0.1 @@ -0,0 +1,7 @@ +4 templated_functions_MULTI_TAUGPU_TIME +# Name Calls Subrs Excl Incl ProfileCalls # <metadata><attribute><name>Metric Name</name><value>TAUGPU_TIME</value></attribute><attribute><name>CPU Cores</name><value>16</value></attribute><attribute><name>CPU MHz</name><value>3578.336</value></attribute><attribute><name>CPU Type</name><value>Intel(R) Xeon(R) Gold 6346 CPU @ 3.10GHz</value></attribute><attribute><name>CPU Vendor</name><value>GenuineIntel</value></attribute><attribute><name>CPUs Allowed</name><value>00020000,00020000</value></attribute><attribute><name>CPUs Allowed List</name><value>17,49</value></attribute><attribute><name>CWD</name><value>/auto/home/users/j/h/jhano/P3</value></attribute><attribute><name>Cache Size</name><value>36864 KB</value></attribute><attribute><name>Command Line</name><value>./main</value></attribute><attribute><name>Executable</name><value>/auto/home/users/j/h/jhano/P3/main</value></attribute><attribute><name>Hostname</name><value>mb-icg102.cism.ucl.ac.be</value></attribute><attribute><name>Local Time</name><value>2025-05-19T11:29:41+02:00</value></attribute><attribute><name>Memories Allowed</name><value>00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000003</value></attribute><attribute><name>Memories Allowed List</name><value>0-1</value></attribute><attribute><name>Memory Size</name><value>263747860 kB</value></attribute><attribute><name>Node Name</name><value>mb-icg102.cism.ucl.ac.be</value></attribute><attribute><name>OS Machine</name><value>x86_64</value></attribute><attribute><name>OS Name</name><value>Linux</value></attribute><attribute><name>OS Release</name><value>5.4.286-1.el8.elrepo.x86_64</value></attribute><attribute><name>OS Version</name><value>#1 SMP Sun Nov 17 11:28:26 EST 2024</value></attribute><attribute><name>OpenCL Command Queue</name><value>00</value></attribute><attribute><name>OpenCL Device</name><value>0</value></attribute><attribute><name>Starting Timestamp</name><value>1747646981315961</value></attribute><attribute><name>TAU Architecture</name><value>default</value></attribute><attribute><name>TAU Config</name><value> -opencl=/opt/sw/arch/easybuild/2023b/software/CUDA/12.1.1/targets/x86_64-linux/</value></attribute><attribute><name>TAU Makefile</name><value>/opt/sw/noarch/manual/2023b/softwares/TAU2/tau2/x86_64/lib/Makefile.tau</value></attribute><attribute><name>TAU Version</name><value>2.34-git</value></attribute><attribute><name>TAU_BFD_LOOKUP</name><value>on</value></attribute><attribute><name>TAU_CALLPATH</name><value>off</value></attribute><attribute><name>TAU_CALLPATH_DEPTH</name><value>2</value></attribute><attribute><name>TAU_CALLSITE_DEPTH</name><value>1</value></attribute><attribute><name>TAU_COMPENSATE</name><value>off</value></attribute><attribute><name>TAU_CUDA_BINARY_EXE</name><value></value></attribute><attribute><name>TAU_CUPTI_API</name><value>runtime</value></attribute><attribute><name>TAU_CUPTI_PC</name><value>off</value></attribute><attribute><name>TAU_CURRENT_TIMER_EXIT_PARAMS</name><value>on</value></attribute><attribute><name>TAU_EBS_KEEP_UNRESOLVED_ADDR</name><value>off</value></attribute><attribute><name>TAU_ENABLE_THREAD_CONTEXT</name><value>off</value></attribute><attribute><name>TAU_IBM_BG_HWP_COUNTERS</name><value>off</value></attribute><attribute><name>TAU_MEASURE_TAU</name><value>off</value></attribute><attribute><name>TAU_MEMDBG_PROTECT_ABOVE</name><value>off</value></attribute><attribute><name>TAU_MEMDBG_PROTECT_BELOW</name><value>off</value></attribute><attribute><name>TAU_MEMDBG_PROTECT_FREE</name><value>off</value></attribute><attribute><name>TAU_MEMMGR_MAX_BLOCKS</name><value>64</value></attribute><attribute><name>TAU_OPENMP_RUNTIME</name><value>on</value></attribute><attribute><name>TAU_OPENMP_RUNTIME_EVENTS</name><value>on</value></attribute><attribute><name>TAU_OPENMP_RUNTIME_STATES</name><value>off</value></attribute><attribute><name>TAU_OUTPUT_CUDA_CSV</name><value>off</value></attribute><attribute><name>TAU_PAPI_MULTIPLEXING</name><value>off</value></attribute><attribute><name>TAU_PROFILE</name><value>on</value></attribute><attribute><name>TAU_PROFILE_FORMAT</name><value>profile</value></attribute><attribute><name>TAU_RECYCLE_THREADS</name><value>off</value></attribute><attribute><name>TAU_REGION_ADDRESSES</name><value>off</value></attribute><attribute><name>TAU_SAMPLING</name><value>off</value></attribute><attribute><name>TAU_SHOW_MEMORY_FUNCTIONS</name><value>off</value></attribute><attribute><name>TAU_SIGNALS_GDB</name><value>off</value></attribute><attribute><name>TAU_THROTTLE</name><value>on</value></attribute><attribute><name>TAU_THROTTLE_NUMCALLS</name><value>100000</value></attribute><attribute><name>TAU_THROTTLE_PERCALL</name><value>10</value></attribute><attribute><name>TAU_TRACE</name><value>off</value></attribute><attribute><name>TAU_TRACE_FORMAT</name><value>tau</value></attribute><attribute><name>TAU_TRACK_CUDA_CDP</name><value>off</value></attribute><attribute><name>TAU_TRACK_CUDA_ENV</name><value>off</value></attribute><attribute><name>TAU_TRACK_CUDA_INSTRUCTIONS</name><value></value></attribute><attribute><name>TAU_TRACK_CUDA_SASS</name><value>off</value></attribute><attribute><name>TAU_TRACK_HEADROOM</name><value>off</value></attribute><attribute><name>TAU_TRACK_HEAP</name><value>off</value></attribute><attribute><name>TAU_TRACK_IO_PARAMS</name><value>off</value></attribute><attribute><name>TAU_TRACK_MEMORY_FOOTPRINT</name><value>off</value></attribute><attribute><name>TAU_TRACK_MEMORY_LEAKS</name><value>off</value></attribute><attribute><name>TAU_TRACK_SIGNALS</name><value>off</value></attribute><attribute><name>TAU_TRACK_UNIFIED_MEMORY</name><value>off</value></attribute><attribute><name>TAU_VERBOSE_RANK</name><value>-1</value></attribute><attribute><name>Timestamp</name><value>1747646981316029</value></attribute><attribute><name>UTC Time</name><value>2025-05-19T09:29:41Z</value></attribute><attribute><name>pid</name><value>2612340</value></attribute><attribute><name>tid</name><value>2612340</value></attribute><attribute><name>username</name><value>jhano</value></attribute><attribute><name>Ending Timestamp</name><value>1747646983158546</value></attribute></metadata> +".TAU application" 1 4 924387 1839791 0 GROUP="TAU_DEFAULT" +"WriteBuffer" 2 0 10988 10988 0 GROUP="TAU_USER" +"fill" 1 0 143 143 0 GROUP="TAU_USER" +"matrix_mul" 1 0 904273 904273 0 GROUP="TAU_USER" +0 aggregates diff --git a/tau.edf b/tau.edf new file mode 100644 index 0000000000000000000000000000000000000000..2b2076373156425c8945e9793524f13a8e4ffadd --- /dev/null +++ b/tau.edf @@ -0,0 +1,135 @@ +133 dynamic_trace_events +# FunctionId Group Tag "Name Type" Parameters +1 TAUEVENT 0 ".TAU <unknown event>" TriggerValue +2 TAU_DEFAULT 0 ".TAU application " EntryExit +34 TAUEVENT 0 "Bytes copied from Device to Device" TriggerValue +33 TAUEVENT 0 "Bytes copied from Device to Host" TriggerValue +32 TAUEVENT 0 "Bytes copied from Host to Device" TriggerValue +124 TRACER 0 "CONT_EVENT" none +45 TAUEVENT 0 "CPU Cores | 16" TriggerValue +46 TAUEVENT 0 "CPU MHz | 3599.999" TriggerValue +47 TAUEVENT 0 "CPU Type | Intel(R) Xeon(R) Gold 6346 CPU @ 3.10GHz" TriggerValue +48 TAUEVENT 0 "CPU Vendor | GenuineIntel" TriggerValue +50 TAUEVENT 0 "CPUs Allowed List | 0,32" TriggerValue +49 TAUEVENT 0 "CPUs Allowed | 00000001,00000001" TriggerValue +51 TAUEVENT 0 "CWD | /auto/home/users/j/h/jhano/P3" TriggerValue +52 TAUEVENT 0 "Cache Size | 36864 KB" TriggerValue +53 TAUEVENT 0 "Command Line | ./main" TriggerValue +41 TAUEVENT 0 "Control Operations" TriggerValue +44 TAUEVENT 0 "Correlation ID : WriteBuffer" TriggerValue +35 TAUEVENT 0 "Correlation ID" TriggerValue +119 TRACER 0 "EV_INIT" none +54 TAUEVENT 0 "Executable | /auto/home/users/j/h/jhano/P3/main" TriggerValue +120 TRACER 0 "FLUSH" EntryExit +121 TRACER 0 "FLUSH_CLOSE" none +122 TRACER 0 "FLUSH_INITM" none +39 TAUEVENT 0 "Floating Point Operations" TriggerValue +55 TAUEVENT 0 "Hostname | mb-icg102.cism.ucl.ac.be" TriggerValue +56 TAUEVENT 0 "Local Time | 2025-05-20T09:39:54+02:00" TriggerValue +126 TAU_MESSAGE -8 "MESSAGE_RECV" par +125 TAU_MESSAGE -7 "MESSAGE_SEND" par +58 TAUEVENT 0 "Memories Allowed List | 0-1" TriggerValue +57 TAUEVENT 0 "Memories Allowed | 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000003" TriggerValue +40 TAUEVENT 0 "Memory Operations" TriggerValue +59 TAUEVENT 0 "Memory Size | 263747860 kB" TriggerValue +60 TAUEVENT 0 "Node Name | mb-icg102.cism.ucl.ac.be" TriggerValue +131 TAUEVENT 0 "ONESIDED_MESSAGE" TriggerValue +132 TAUEVENT 0 "ONESIDED_MESSAGE_ID_TriggerValueT1" TriggerValue +133 TAUEVENT 0 "ONESIDED_MESSAGE_ID_TriggerValueT2" TriggerValue +130 TAUEVENT 0 "ONESIDED_MESSAGE_RECIPROCAL_RECV" TriggerValue +129 TAUEVENT 0 "ONESIDED_MESSAGE_RECIPROCAL_SEND" TriggerValue +128 TAUEVENT 0 "ONESIDED_MESSAGE_RECV" TriggerValue +127 TAUEVENT 0 "ONESIDED_MESSAGE_SEND" TriggerValue +61 TAUEVENT 0 "OS Machine | x86_64" TriggerValue +62 TAUEVENT 0 "OS Name | Linux" TriggerValue +63 TAUEVENT 0 "OS Release | 5.4.286-1.el8.elrepo.x86_64" TriggerValue +64 TAUEVENT 0 "OS Version | #1 SMP Sun Nov 17 11:28:26 EST 2024" TriggerValue +65 TAUEVENT 0 "Starting Timestamp | 1747726794996778" TriggerValue +66 TAUEVENT 0 "TAU Architecture | default" TriggerValue +67 TAUEVENT 0 "TAU Config | -opencl=/opt/sw/arch/easybuild/2023b/software/CUDA/12.1.1/targets/x86_64-linux/" TriggerValue +68 TAUEVENT 0 "TAU Makefile | /opt/sw/noarch/manual/2023b/softwares/TAU2/tau2/x86_64/lib/Makefile.tau" TriggerValue +69 TAUEVENT 0 "TAU Version | 2.34-git" TriggerValue +70 TAUEVENT 0 "TAU_BFD_LOOKUP | on" TriggerValue +71 TAUEVENT 0 "TAU_CALLPATH_DEPTH | 2" TriggerValue +72 TAUEVENT 0 "TAU_CALLSITE_DEPTH | 1" TriggerValue +73 TAUEVENT 0 "TAU_CUDA_BINARY_EXE | " TriggerValue +74 TAUEVENT 0 "TAU_CUPTI_API | runtime" TriggerValue +75 TAUEVENT 0 "TAU_CUPTI_PC | off" TriggerValue +76 TAUEVENT 0 "TAU_CURRENT_TIMER_EXIT_PARAMS | off" TriggerValue +77 TAUEVENT 0 "TAU_EBS_KEEP_UNRESOLVED_ADDR | off" TriggerValue +78 TAUEVENT 0 "TAU_IBM_BG_HWP_COUNTERS | off" TriggerValue +79 TAUEVENT 0 "TAU_MEASURE_TAU | off" TriggerValue +80 TAUEVENT 0 "TAU_MEMDBG_PROTECT_ABOVE | off" TriggerValue +81 TAUEVENT 0 "TAU_MEMDBG_PROTECT_BELOW | off" TriggerValue +82 TAUEVENT 0 "TAU_MEMDBG_PROTECT_FREE | off" TriggerValue +83 TAUEVENT 0 "TAU_MEMMGR_MAX_BLOCKS | 64" TriggerValue +84 TAUEVENT 0 "TAU_OPENMP_RUNTIME | on" TriggerValue +85 TAUEVENT 0 "TAU_OPENMP_RUNTIME_EVENTS | on" TriggerValue +86 TAUEVENT 0 "TAU_OPENMP_RUNTIME_STATES | off" TriggerValue +87 TAUEVENT 0 "TAU_OUTPUT_CUDA_CSV | off" TriggerValue +88 TAUEVENT 0 "TAU_PAPI_MULTIPLEXING | off" TriggerValue +89 TAUEVENT 0 "TAU_PROFILE | off" TriggerValue +90 TAUEVENT 0 "TAU_PROFILE_FORMAT | profile" TriggerValue +91 TAUEVENT 0 "TAU_RECYCLE_THREADS | off" TriggerValue +92 TAUEVENT 0 "TAU_REGION_ADDRESSES | off" TriggerValue +93 TAUEVENT 0 "TAU_SAMPLING | off" TriggerValue +94 TAUEVENT 0 "TAU_SHOW_MEMORY_FUNCTIONS | off" TriggerValue +95 TAUEVENT 0 "TAU_SIGNALS_GDB | off" TriggerValue +96 TAUEVENT 0 "TAU_SYNCHRONIZE_CLOCKS | off" TriggerValue +97 TAUEVENT 0 "TAU_THROTTLE | on" TriggerValue +98 TAUEVENT 0 "TAU_THROTTLE_NUMCALLS | 100000" TriggerValue +99 TAUEVENT 0 "TAU_THROTTLE_PERCALL | 10" TriggerValue +100 TAUEVENT 0 "TAU_TRACE | on" TriggerValue +101 TAUEVENT 0 "TAU_TRACE_FORMAT | tau" TriggerValue +102 TAUEVENT 0 "TAU_TRACK_CUDA_CDP | off" TriggerValue +103 TAUEVENT 0 "TAU_TRACK_CUDA_ENV | off" TriggerValue +104 TAUEVENT 0 "TAU_TRACK_CUDA_INSTRUCTIONS | " TriggerValue +105 TAUEVENT 0 "TAU_TRACK_CUDA_SASS | off" TriggerValue +106 TAUEVENT 0 "TAU_TRACK_HEADROOM | off" TriggerValue +107 TAUEVENT 0 "TAU_TRACK_HEAP | off" TriggerValue +108 TAUEVENT 0 "TAU_TRACK_IO_PARAMS | off" TriggerValue +109 TAUEVENT 0 "TAU_TRACK_MEMORY_FOOTPRINT | off" TriggerValue +110 TAUEVENT 0 "TAU_TRACK_MEMORY_LEAKS | off" TriggerValue +111 TAUEVENT 0 "TAU_TRACK_SIGNALS | off" TriggerValue +112 TAUEVENT 0 "TAU_TRACK_UNIFIED_MEMORY | off" TriggerValue +113 TAUEVENT 0 "TAU_VERBOSE_RANK | -1" TriggerValue +42 TAUEVENT 0 "Time in Queue (us)" TriggerValue +43 TAUEVENT 0 "Time in Submitted (us)" TriggerValue +114 TAUEVENT 0 "Timestamp | 1747726794996859" TriggerValue +115 TAUEVENT 0 "UTC Time | 2025-05-20T07:39:54Z" TriggerValue +37 TAUEVENT 0 "Unified Memory Bytes copied from Device to Host" TriggerValue +36 TAUEVENT 0 "Unified Memory Bytes copied from Host to Device" TriggerValue +38 TAUEVENT 0 "Unified Memory CPU Page Faults" TriggerValue +123 TRACER 0 "WALL_CLOCK" none +22 TAU_USER 0 "WriteBuffer " EntryExit +9 TAU_USER 0 "cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C " EntryExit +8 TAU_USER 0 "cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C " EntryExit +13 TAU_USER 0 "cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C " EntryExit +26 TAU_USER 0 "cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C " EntryExit +20 TAU_USER 0 "cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C " EntryExit +29 TAU_USER 0 "cl_int clFinish(cl_command_queue) C " EntryExit +21 TAU_USER 0 "cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C " EntryExit +6 TAU_USER 0 "cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C " EntryExit +7 TAU_USER 0 "cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C " EntryExit +27 TAU_USER 0 "cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C " EntryExit +4 TAU_USER 0 "cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C " EntryExit +5 TAU_USER 0 "cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C " EntryExit +15 TAU_USER 0 "cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C " EntryExit +14 TAU_USER 0 "cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C " EntryExit +23 TAU_USER 0 "cl_int clReleaseCommandQueue(cl_command_queue) C " EntryExit +16 TAU_USER 0 "cl_int clReleaseContext(cl_context) C " EntryExit +28 TAU_USER 0 "cl_int clReleaseKernel(cl_kernel) C " EntryExit +18 TAU_USER 0 "cl_int clReleaseProgram(cl_program) C " EntryExit +10 TAU_USER 0 "cl_int clRetainCommandQueue(cl_command_queue) C " EntryExit +11 TAU_USER 0 "cl_int clRetainContext(cl_context) C " EntryExit +24 TAU_USER 0 "cl_int clRetainKernel(cl_kernel) C " EntryExit +25 TAU_USER 0 "cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C " EntryExit +17 TAU_USER 0 "cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C " EntryExit +19 TAU_USER 0 "cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C " EntryExit +12 TAU_USER 0 "cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C " EntryExit +30 TAU_USER 0 "fill " EntryExit +31 TAU_USER 0 "matrix_mul " EntryExit +116 TAUEVENT 0 "pid | 2856919" TriggerValue +3 TAU_DEFAULT 0 "taupreload_main " EntryExit +117 TAUEVENT 0 "tid | 2856919" TriggerValue +118 TAUEVENT 0 "username | jhano" TriggerValue diff --git a/tau.slog2 b/tau.slog2 new file mode 100644 index 0000000000000000000000000000000000000000..a5b6e9fc9999c64042d4ed0dcc27b1de9dc07b10 Binary files /dev/null and b/tau.slog2 differ diff --git a/tau.trc b/tau.trc new file mode 100644 index 0000000000000000000000000000000000000000..3ad2cfa7ed53b86ec3e11a1d45b11394605ffe4b Binary files /dev/null and b/tau.trc differ diff --git a/tau_profile_fast.txt b/tau_profile_fast.txt new file mode 100644 index 0000000000000000000000000000000000000000..24730fba9f83d71f530231207ee5ef831860a6f7 --- /dev/null +++ b/tau_profile_fast.txt @@ -0,0 +1,73 @@ +Reading Profile files in profile.* + +FUNCTION SUMMARY (total): +--------------------------------------------------------------------------------------- +%Time Exclusive Inclusive #Call #Subrs Inclusive Name + msec total msec usec/call +--------------------------------------------------------------------------------------- +100.0 936 3,683 2 5 1841522 .TAU application + 49.7 726 1,830 1 158 1830958 taupreload_main + 24.6 904 904 1 0 904273 matrix_mul + 24.5 902 902 1 0 902003 cl_int clFinish(cl_command_queue) C + 2.9 107 107 1 0 107413 cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C + 1.8 66 66 2 0 33116 cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C + 0.4 14 14 9 0 1624 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C + 0.3 11 11 2 0 5759 cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C + 0.3 10 10 2 0 5494 WriteBuffer + 0.1 2 2 2 0 1276 cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C + 0.0 0.143 0.143 1 0 143 fill + 0.0 0.118 0.118 9 0 13 cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C + 0.0 0.031 0.031 4 0 8 cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C + 0.0 0.021 0.021 7 0 3 cl_int clReleaseCommandQueue(cl_command_queue) C + 0.0 0.012 0.012 18 0 1 cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C + 0.0 0.01 0.01 7 0 1 cl_int clRetainCommandQueue(cl_command_queue) C + 0.0 0.01 0.01 3 0 3 cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C + 0.0 0.008 0.008 9 0 1 cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C + 0.0 0.007 0.007 1 0 7 cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C + 0.0 0.007 0.007 18 0 0 cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C + 0.0 0.007 0.007 17 0 0 cl_int clRetainContext(cl_context) C + 0.0 0.005 0.005 18 0 0 cl_int clReleaseContext(cl_context) C + 0.0 0.005 0.005 9 0 1 cl_int clReleaseProgram(cl_program) C + 0.0 0.002 0.002 2 0 1 cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C + 0.0 0.002 0.002 2 0 1 cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C + 0.0 0.002 0.002 11 0 0 cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C + 0.0 0.001 0.001 2 0 0 cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C + 0.0 0.001 0.001 2 0 0 cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C + 0.0 0.001 0.001 1 0 1 cl_int clReleaseKernel(cl_kernel) C + 0.0 0.001 0.001 1 0 1 cl_int clRetainKernel(cl_kernel) C + +FUNCTION SUMMARY (mean): +--------------------------------------------------------------------------------------- +%Time Exclusive Inclusive #Call #Subrs Inclusive Name + msec total msec usec/call +--------------------------------------------------------------------------------------- +100.0 468 1,841 1 2.5 1841522 .TAU application + 49.7 363 915 0.5 79 1830958 taupreload_main + 24.6 452 452 0.5 0 904273 matrix_mul + 24.5 451 451 0.5 0 902003 cl_int clFinish(cl_command_queue) C + 2.9 53 53 0.5 0 107413 cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C + 1.8 33 33 1 0 33116 cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C + 0.4 7 7 4.5 0 1624 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C + 0.3 5 5 1 0 5759 cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C + 0.3 5 5 1 0 5494 WriteBuffer + 0.1 1 1 1 0 1276 cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C + 0.0 0.0715 0.0715 0.5 0 143 fill + 0.0 0.059 0.059 4.5 0 13 cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C + 0.0 0.0155 0.0155 2 0 8 cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C + 0.0 0.0105 0.0105 3.5 0 3 cl_int clReleaseCommandQueue(cl_command_queue) C + 0.0 0.006 0.006 9 0 1 cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C + 0.0 0.005 0.005 3.5 0 1 cl_int clRetainCommandQueue(cl_command_queue) C + 0.0 0.005 0.005 1.5 0 3 cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C + 0.0 0.004 0.004 4.5 0 1 cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C + 0.0 0.0035 0.0035 0.5 0 7 cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C + 0.0 0.0035 0.0035 9 0 0 cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C + 0.0 0.0035 0.0035 8.5 0 0 cl_int clRetainContext(cl_context) C + 0.0 0.0025 0.0025 9 0 0 cl_int clReleaseContext(cl_context) C + 0.0 0.0025 0.0025 4.5 0 1 cl_int clReleaseProgram(cl_program) C + 0.0 0.001 0.001 1 0 1 cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C + 0.0 0.001 0.001 1 0 1 cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C + 0.0 0.001 0.001 5.5 0 0 cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C + 0.0 0.0005 0.0005 1 0 0 cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C + 0.0 0.0005 0.0005 1 0 0 cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C + 0.0 0.0005 0.0005 0.5 0 1 cl_int clReleaseKernel(cl_kernel) C + 0.0 0.0005 0.0005 0.5 0 1 cl_int clRetainKernel(cl_kernel) C diff --git a/tau_profile_naive.txt b/tau_profile_naive.txt new file mode 100644 index 0000000000000000000000000000000000000000..9da6b28a87745e385321ff11e01e53a6dd0fc099 --- /dev/null +++ b/tau_profile_naive.txt @@ -0,0 +1,73 @@ +Reading Profile files in profile.* + +FUNCTION SUMMARY (total): +--------------------------------------------------------------------------------------- +%Time Exclusive Inclusive #Call #Subrs Inclusive Name + msec total msec usec/call +--------------------------------------------------------------------------------------- +100.0 1,005 24,133 2 5 12066881 .TAU application + 50.0 725 12,055 1 157 12055839 taupreload_main + 45.8 11,061 11,061 1 0 11061448 cl_int clFinish(cl_command_queue) C + 45.8 11,061 11,061 1 0 11061306 matrix_mul + 0.5 126 126 2 0 63131 cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C + 0.5 115 115 1 0 115365 cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C + 0.1 15 15 9 0 1723 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C + 0.0 11 11 2 0 5755 cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C + 0.0 11 11 2 0 5503 WriteBuffer + 0.0 0.19 0.19 2 0 95 cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C + 0.0 0.143 0.143 1 0 143 fill + 0.0 0.123 0.123 9 0 14 cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C + 0.0 0.028 0.028 4 0 7 cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C + 0.0 0.024 0.024 7 0 3 cl_int clReleaseCommandQueue(cl_command_queue) C + 0.0 0.01 0.01 18 0 1 cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C + 0.0 0.009 0.009 3 0 3 cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C + 0.0 0.008 0.008 17 0 0 cl_int clRetainContext(cl_context) C + 0.0 0.006 0.006 1 0 6 cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C + 0.0 0.006 0.006 18 0 0 cl_int clReleaseContext(cl_context) C + 0.0 0.006 0.006 9 0 1 cl_int clReleaseProgram(cl_program) C + 0.0 0.006 0.006 7 0 1 cl_int clRetainCommandQueue(cl_command_queue) C + 0.0 0.006 0.006 9 0 1 cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C + 0.0 0.003 0.003 10 0 0 cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C + 0.0 0.002 0.002 2 0 1 cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C + 0.0 0.001 0.001 2 0 0 cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C + 0.0 0.001 0.001 2 0 0 cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C + 0.0 0.001 0.001 2 0 0 cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C + 0.0 0.001 0.001 18 0 0 cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C + 0.0 0.001 0.001 1 0 1 cl_int clReleaseKernel(cl_kernel) C + 0.0 0.001 0.001 1 0 1 cl_int clRetainKernel(cl_kernel) C + +FUNCTION SUMMARY (mean): +--------------------------------------------------------------------------------------- +%Time Exclusive Inclusive #Call #Subrs Inclusive Name + msec total msec usec/call +--------------------------------------------------------------------------------------- +100.0 502 12,066 1 2.5 12066881 .TAU application + 50.0 362 6,027 0.5 78.5 12055839 taupreload_main + 45.8 5,530 5,530 0.5 0 11061448 cl_int clFinish(cl_command_queue) C + 45.8 5,530 5,530 0.5 0 11061306 matrix_mul + 0.5 63 63 1 0 63131 cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C + 0.5 57 57 0.5 0 115365 cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C + 0.1 7 7 4.5 0 1723 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C + 0.0 5 5 1 0 5755 cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C + 0.0 5 5 1 0 5503 WriteBuffer + 0.0 0.095 0.095 1 0 95 cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C + 0.0 0.0716 0.0716 0.5 0 143 fill + 0.0 0.0615 0.0615 4.5 0 14 cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C + 0.0 0.014 0.014 2 0 7 cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C + 0.0 0.012 0.012 3.5 0 3 cl_int clReleaseCommandQueue(cl_command_queue) C + 0.0 0.005 0.005 9 0 1 cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C + 0.0 0.0045 0.0045 1.5 0 3 cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C + 0.0 0.004 0.004 8.5 0 0 cl_int clRetainContext(cl_context) C + 0.0 0.003 0.003 0.5 0 6 cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C + 0.0 0.003 0.003 9 0 0 cl_int clReleaseContext(cl_context) C + 0.0 0.003 0.003 4.5 0 1 cl_int clReleaseProgram(cl_program) C + 0.0 0.003 0.003 3.5 0 1 cl_int clRetainCommandQueue(cl_command_queue) C + 0.0 0.003 0.003 4.5 0 1 cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C + 0.0 0.0015 0.0015 5 0 0 cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C + 0.0 0.001 0.001 1 0 1 cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C + 0.0 0.0005 0.0005 1 0 0 cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C + 0.0 0.0005 0.0005 1 0 0 cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C + 0.0 0.0005 0.0005 1 0 0 cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C + 0.0 0.0005 0.0005 9 0 0 cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C + 0.0 0.0005 0.0005 0.5 0 1 cl_int clReleaseKernel(cl_kernel) C + 0.0 0.0005 0.0005 0.5 0 1 cl_int clRetainKernel(cl_kernel) C diff --git a/tau_summary.txt b/tau_summary.txt new file mode 100644 index 0000000000000000000000000000000000000000..7139307646817455a58467c71361a0de183baa4c --- /dev/null +++ b/tau_summary.txt @@ -0,0 +1,77 @@ +Reading Profile files in profile.* + +FUNCTION SUMMARY (total): +--------------------------------------------------------------------------------------- +%Time Exclusive Inclusive #Call #Subrs Inclusive Name + msec total msec usec/call +--------------------------------------------------------------------------------------- +100.0 272 21,994 2 7 10997020 .TAU application + 49.9 10,741 10,985 1 179 10985515 taupreload_main + 48.8 10,735 10,735 1 0 10735516 matrix_mul + 0.5 114 114 1 0 114368 cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C + 0.5 112 112 2 0 56124 cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C + 0.1 13 13 9 0 1541 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C + 0.0 2 2 3 0 801 cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C + 0.0 1 1 2 0 508 cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C + 0.0 0.672 0.672 2 0 336 WriteBuffer + 0.0 0.127 0.127 1 0 127 cl_int clEnqueueCopyBuffer(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event *, cl_event *) C + 0.0 0.119 0.119 9 0 13 cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C + 0.0 0.0275 0.0275 2 0 14 fill + 0.0 0.0243 0.0243 1 0 24 CopyBuffer + 0.0 0.024 0.024 10 0 2 cl_int clReleaseCommandQueue(cl_command_queue) C + 0.0 0.018 0.018 4 0 4 cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C + 0.0 0.011 0.011 18 0 1 cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C + 0.0 0.008 0.008 9 0 1 cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C + 0.0 0.007 0.007 1 0 7 cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C + 0.0 0.007 0.007 21 0 0 cl_int clReleaseContext(cl_context) C + 0.0 0.007 0.007 5 0 1 cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C + 0.0 0.005 0.005 9 0 1 cl_int clReleaseProgram(cl_program) C + 0.0 0.005 0.005 9 0 1 cl_int clRetainCommandQueue(cl_command_queue) C + 0.0 0.005 0.005 20 0 0 cl_int clRetainContext(cl_context) C + 0.0 0.005 0.005 14 0 0 cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C + 0.0 0.004 0.004 18 0 0 cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C + 0.0 0.003 0.003 1 0 3 cl_int clFinish(cl_command_queue) C + 0.0 0.002 0.002 2 0 1 cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C + 0.0 0.002 0.002 2 0 1 cl_int clRetainKernel(cl_kernel) C + 0.0 0.001 0.001 2 0 0 cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C + 0.0 0.001 0.001 3 0 0 cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C + 0.0 0.001 0.001 2 0 0 cl_int clReleaseKernel(cl_kernel) C + 0.0 0 0 2 0 0 cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C + +FUNCTION SUMMARY (mean): +--------------------------------------------------------------------------------------- +%Time Exclusive Inclusive #Call #Subrs Inclusive Name + msec total msec usec/call +--------------------------------------------------------------------------------------- +100.0 136 10,997 1 3.5 10997020 .TAU application + 49.9 5,370 5,492 0.5 89.5 10985515 taupreload_main + 48.8 5,367 5,367 0.5 0 10735516 matrix_mul + 0.5 57 57 0.5 0 114368 cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *) C + 0.5 56 56 1 0 56124 cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *) C + 0.1 6 6 4.5 0 1541 cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *) C + 0.0 1 1 1.5 0 801 cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) C + 0.0 0.507 0.507 1 0 508 cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) C + 0.0 0.336 0.336 1 0 336 WriteBuffer + 0.0 0.0635 0.0635 0.5 0 127 cl_int clEnqueueCopyBuffer(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event *, cl_event *) C + 0.0 0.0595 0.0595 4.5 0 13 cl_kernel clCreateKernel(cl_program, const char *, cl_int *) C + 0.0 0.0138 0.0138 1 0 14 fill + 0.0 0.0121 0.0121 0.5 0 24 CopyBuffer + 0.0 0.012 0.012 5 0 2 cl_int clReleaseCommandQueue(cl_command_queue) C + 0.0 0.009 0.009 2 0 4 cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *) C + 0.0 0.0055 0.0055 9 0 1 cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *) C + 0.0 0.004 0.004 4.5 0 1 cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *) C + 0.0 0.0035 0.0035 0.5 0 7 cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *) C + 0.0 0.0035 0.0035 10.5 0 0 cl_int clReleaseContext(cl_context) C + 0.0 0.0035 0.0035 2.5 0 1 cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *) C + 0.0 0.0025 0.0025 4.5 0 1 cl_int clReleaseProgram(cl_program) C + 0.0 0.0025 0.0025 4.5 0 1 cl_int clRetainCommandQueue(cl_command_queue) C + 0.0 0.0025 0.0025 10 0 0 cl_int clRetainContext(cl_context) C + 0.0 0.0025 0.0025 7 0 0 cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *) C + 0.0 0.002 0.002 9 0 0 cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) C + 0.0 0.0015 0.0015 0.5 0 3 cl_int clFinish(cl_command_queue) C + 0.0 0.001 0.001 1 0 1 cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *) C + 0.0 0.001 0.001 1 0 1 cl_int clRetainKernel(cl_kernel) C + 0.0 0.0005 0.0005 1 0 0 cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) C + 0.0 0.0005 0.0005 1.5 0 0 cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *) C + 0.0 0.0005 0.0005 1 0 0 cl_int clReleaseKernel(cl_kernel) C + 0.0 0 0 1 0 0 cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) C diff --git a/tautrace.0.0.0.trc b/tautrace.0.0.0.trc new file mode 100644 index 0000000000000000000000000000000000000000..86d787d9f36f5aec24fea44fbd861095204f454f Binary files /dev/null and b/tautrace.0.0.0.trc differ diff --git a/tautrace.0.0.1.trc b/tautrace.0.0.1.trc new file mode 100644 index 0000000000000000000000000000000000000000..e4bd1dcb4e9142db8c1975325bb189147a5c67e3 Binary files /dev/null and b/tautrace.0.0.1.trc differ