diff --git a/P2/Makefile b/P2/Makefile
index a69e083f85e5b1661f77d4428a3c5a321f5ca9a6..9cd92f65bb187388a19a83033ad143a9a24e6be4 100644
--- a/P2/Makefile
+++ b/P2/Makefile
@@ -1,7 +1,7 @@
 CXX = mpic++
 CXXFLAGS = -std=c++17 -Wall -Wextra -O0
-TARGET = distributedtests
-OBJ = matrix.o distributedmatrix.o distributedtests.o mlp_sgd_distributed.o globals.o
+TARGET = profile.cpp
+OBJ = matrix.o distributedmatrix.o profile.o globals.o
 HEADERS = abstractmatrix.hpp matrix.hpp distributedmatrix.hpp globals.hpp
 
 all:
@@ -16,6 +16,9 @@ matrix.o: matrix.cpp matrix.hpp abstractmatrix.hpp
 distributedmatrix.o: distributedmatrix.cpp distributedmatrix.hpp matrix.hpp abstractmatrix.hpp
 	$(CXX) $(CXXFLAGS) -c distributedmatrix.cpp
 
+profile.o: profile.cpp distributedmatrix.hpp matrix.hpp abstractmatrix.hpp
+	$(CXX) $(CXXFLAGS) -c profile.cpp
+
 distributedtests.o: distributedtests.cpp distributedmatrix.hpp matrix.hpp abstractmatrix.hpp
 	$(CXX) $(CXXFLAGS) -c distributedtests.cpp
 
diff --git a/P2/profile.cpp b/P2/profile.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..acb5ee6dc677df848fc98991ed90f3d5d24ed31d
--- /dev/null
+++ b/P2/profile.cpp
@@ -0,0 +1,104 @@
+#include <iostream>
+#include <mpi.h>
+#include <cassert>
+#include <cmath>
+#include <nvToolsExt.h>
+#include <functional>
+#include "matrix.hpp"
+#include "distributedmatrix.hpp"
+
+// Fonction utilitaire pour comparer deux matrices
+bool matricesEqual(const Matrix& a, const Matrix& b, double tol = 1e-8) {
+    if (a.numRows() != b.numRows() || a.numCols() != b.numCols()) return false;
+    for (int i = 0; i < a.numRows(); ++i) {
+        for (int j = 0; j < a.numCols(); ++j) {
+            if (std::abs(a.get(i, j) - b.get(i, j)) > tol) return false;
+        }
+    }
+    return true;
+}
+
+void testMultiplyTransposed(int rowsA, int colsA, int rowsB) {
+    int rank, numProcs;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
+
+    // Générer matrices pleines
+    Matrix matrix1Full(rowsA, colsA);
+    Matrix matrix2Full(rowsB, colsA); 
+
+    for (int i = 0; i < rowsA; i++) {
+        for (int j = 0; j < colsA; j++) {
+            matrix1Full.set(i, j, sin(i * colsA + j));
+        }
+    }
+
+    for (int i = 0; i < rowsB; i++) {
+        for (int j = 0; j < colsA; j++) {
+            matrix2Full.set(i, j, cos(i * colsA + j));
+        }
+    }
+
+    DistributedMatrix distA(matrix1Full, numProcs);
+    DistributedMatrix distB(matrix2Full, numProcs);
+
+    // Calcule de référence
+    Matrix expected = matrix1Full * matrix2Full.transpose();
+
+    if (rank == 0) {
+        std::cout << "Expected result [0][0]: " << expected.get(0, 0) << std::endl;
+    }
+
+    // Test de multiplyTransposed avec marquage NVTX
+    nvtxRangePushA("DistributedMatrix::multiplyTransposed");
+    Matrix result = distA.multiplyTransposed(distB);
+    nvtxRangePop();
+
+    if (rank == 0) {
+        std::cout << "Result [0][0]: " << result.get(0, 0) << std::endl;
+    }
+
+    // Vérification du résultat
+    assert(matricesEqual(result, expected, 1e-8));
+    if (rank == 0) {
+        std::cout << "Test passed! Matrices are equal." << std::endl;
+    }
+}
+
+int main(int argc, char** argv) {
+    int initialized;
+    MPI_Initialized(&initialized);
+    if (!initialized) {
+        MPI_Init(&argc, &argv);
+    }
+    
+    int rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    if (rank == 0) {
+        std::cout << "Starting MultiplyTransposed tests..." << std::endl;
+    }
+
+    try {
+        testMultiplyTransposed(512, 256, 512);  // Taille paramétrable
+        if (rank == 0) {
+            std::cout << "Tests passed successfully!" << std::endl;
+        }
+    } 
+    catch (std::exception& e) {
+        if (rank == 0) {
+            std::cerr << "Test failed with exception: " << e.what() << std::endl;
+        }
+        MPI_Abort(MPI_COMM_WORLD, 1);
+    }
+    
+    // Finalize MPI if we initialized it
+    // int finalized;
+    // MPI_Finalized(&finalized);
+    // if (!finalized && initialized) {
+    //     MPI_Finalize();
+    // }
+
+    MPI_Finalize();
+    return 0;
+}