distributedmatrix.cpp

#include <iostream>
#include <cassert>
#include "distributedmatrix.hpp"

using namespace std;

// Constructor to initialize the distributed matrix
DistributedMatrix::DistributedMatrix(const Matrix& matrix, int numProcesses)
    : localData(0, 0){

    this->globalRows = matrix.numRows();
    this->globalCols = matrix.numCols();
    this->numProcesses = numProcesses;
    this->rank = 0;
    this->localCols = this->globalCols / this->numProcesses;
    this->startCol = 0;

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    int remainingCols = globalCols % numProcesses;
    this->startCol = rank * localCols + min(rank, remainingCols);

    if (rank < remainingCols) {
        localCols++;
    }

    this->localData = Matrix(this->globalRows, this->localCols);
    for (int i = 0; i < this->globalRows; ++i) {
        for (int j = 0; j < this->localCols; ++j) {
            this->localData.set(i, j, matrix.get(i, this->startCol + j));
        }
    }
}

// Copy constructor
DistributedMatrix::DistributedMatrix(const DistributedMatrix& other)
    : localData(0, 0){
    this->globalRows = other.globalRows;
    this->globalCols = other.globalCols;
    this->localCols = other.localCols;
    this->startCol = other.startCol;
    this->numProcesses = other.numProcesses;
    this->rank = other.rank;
    this->localData = other.localData;
}

int DistributedMatrix::numRows() const {
    return this->globalRows;
}

int DistributedMatrix::numCols() const {
    return this->globalCols;
}

double DistributedMatrix::get(int i, int j) const {
    if (j < startCol || j >= startCol + localCols) {
        throw std::out_of_range("Out of range.");
    }
    return localData.get(i, j - startCol);
}

void DistributedMatrix::set(int i, int j, double value) {
    if (j < startCol || j >= startCol + localCols) {
        throw std::out_of_range("Out of range.)");
    }
    localData.set(i, j - startCol, value);
}


int DistributedMatrix::globalColIndex(int localColIndex) const {
    return startCol + localColIndex;
}

int DistributedMatrix::localColIndex(int globalColIndex) const {
    if (globalColIndex >= startCol && globalColIndex < startCol + localCols) {
        return globalColIndex - startCol;
    }
    return -1;
}

int DistributedMatrix::ownerProcess(int globalColIndex) const {
    std::vector<int> localStartProcesses(numProcesses);

    MPI_Allgather(&startCol, 1, MPI_INT, localStartProcesses.data(), 1, MPI_INT, MPI_COMM_WORLD);

    for (int p = 1; p < numProcesses; ++p) {
        if (globalColIndex >= localStartProcesses[p-1] && globalColIndex < localStartProcesses[p]) {
            return p - 1;
        }
    }
    return numProcesses - 1;
    
}

const Matrix& DistributedMatrix::getLocalData() const {
    return localData;
}

Matrix DistributedMatrix::gather() const {

    vector<double> localBuffer(localCols * globalRows);
    for (int i = 0; i < globalRows; ++i) {
        for (int j = 0; j < localCols; ++j) {
            localBuffer[i * localCols + j] = localData.get(i, j);
        }
    }

    vector<int> counts(numProcesses);
    vector<int> displacements(numProcesses);

    int remainingCols = globalCols % numProcesses;
    for (int p = 0; p < numProcesses; ++p) {
        int p_cols = (globalCols / numProcesses) + (p < remainingCols ? 1 : 0);
        counts[p] = globalRows * p_cols;
        displacements[p] = (p == 0) ? 0 : displacements[p - 1] + counts[p - 1];
    }

    vector<double> buffer(globalRows * globalCols);

    MPI_Allgatherv(
        localBuffer.data(),
        localCols * globalRows,
        MPI_DOUBLE,
        buffer.data(),
        counts.data(),
        displacements.data(),
        MPI_DOUBLE,
        MPI_COMM_WORLD
    );

    Matrix fullMatrix(globalRows, globalCols);
    int currentCol = 0;
    for (int p = 0; p < numProcesses; ++p) {
        int cols = (globalCols/numProcesses) + (p < remainingCols ? 1 : 0);
        for (int j = 0; j < cols; ++j) {
            for (int i = 0; i < globalRows; ++i) {
                double val = buffer[displacements[p] + j + i * cols];
                fullMatrix.set(i, currentCol, val);
            }
            currentCol++;
        }
    }

    return fullMatrix;
}


DistributedMatrix DistributedMatrix::apply(const std::function<double(double)> &func) const {

    DistributedMatrix result(*this);

    for (int i = 0; i < localData.numRows(); ++i) {
        for (int j = 0; j < localData.numCols(); ++j) {
            double value = localData.get(i, j);
            result.localData.set(i, j, func(value));
        }
    }

    return result;
}

DistributedMatrix DistributedMatrix::applyBinary(
    const DistributedMatrix& a,
    const DistributedMatrix& b,
    const std::function<double(double, double)>& func) {

    DistributedMatrix result(a);

    for (int i = 0; i < a.localData.numRows(); ++i) {
        for (int j = 0; j < a.localData.numCols(); ++j) {
            double valA = a.localData.get(i, j);
            double valB = b.localData.get(i, j);
            result.localData.set(i, j, func(valA, valB));
        }
    }

    return result;
}


double DistributedMatrix::sum() const {
    
    double localSum = 0.0;
    for (int i = 0; i < globalRows; ++i) {
        for (int j = 0; j < localCols; ++j) {
            localSum += localData.get(i, j);
        }
    }

    double globalSum = 0.0;
    MPI_Allreduce(&localSum, &globalSum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

    return globalSum;
}

DistributedMatrix multiply(const Matrix& left, const DistributedMatrix& right) {
    int globalRows = left.numRows();
    int globalCols = right.numCols();
    int localCols = right.localCols;
    
    Matrix resultMatrix(globalRows, globalCols);
    DistributedMatrix result(resultMatrix, right.numProcesses);

    for (int i = 0; i < globalRows; ++i) {
        for (int j = 0; j < localCols; ++j) {
            double sum = 0.0;
            for (int k = 0; k < left.numCols(); ++k) {
                sum += left.get(i, k) * right.localData.get(k, j);
            }
            result.localData.set(i, j, sum);
        }
    }

    return result;
}

Matrix DistributedMatrix::multiplyTransposed(const DistributedMatrix &other) const {

    Matrix result = (*this).localData * other.getLocalData().transpose();

    std::vector<double> buffer(this->globalRows * other.globalRows);

    MPI_Allreduce(
        result.getData().data(),
        buffer.data(),
        globalRows * other.globalRows,
        MPI_DOUBLE,
        MPI_SUM,
        MPI_COMM_WORLD
    );

    Matrix fullMatrix(globalRows, other.globalRows);
    
    for (int i = 0; i < globalRows; ++i) {
        for (int j = 0; j < other.globalRows; ++j) {
            fullMatrix.set(i, j, buffer[i * other.globalRows + j]); 
        }
    }

    return fullMatrix;
}

void sync_matrix(Matrix *matrix, int rank, int src) {
    MPI_Bcast(const_cast<double*>(matrix->getData().data()), matrix->numRows() * matrix->numCols(), MPI_DOUBLE, src, MPI_COMM_WORLD);
}