Better_Software_Header_MobileBetter_Software_Header_Web

Find what you need - explore our website and developer resources

Little Trouble in Big Data - Part 1

how to use mmap() to load a large data set into RAM

void Data::mapPreprocessBedFile(const string &preprocessedBedFile)
{
    // Calculate the expected file sizes - cast to size_t so that we don't overflow the unsigned int's
    // that we would otherwise get as intermediate variables!
    const size_t ppBedSize = size_t(numInds) * size_t(numIncdSnps) * sizeof(float);

    // Open and mmap the preprocessed bed file
    ppBedFd = open(preprocessedBedFile.c_str(), O_RDONLY);
    if (ppBedFd == -1)
        throw("Error: Failed to open preprocessed bed file [" + preprocessedBedFile + "]");

    ppBedMap = reinterpret_cast<float *>(mmap(nullptr, ppBedSize, PROT_READ, MAP_SHARED, ppBedFd, 0));
    if (ppBedMap == MAP_FAILED)
        throw("Error: Failed to mmap preprocessed bed file");

    ...
}
class Data
{
public:
    Data();

    // mmap related data
    int ppBedFd;
    float *ppBedMap;
    Map<MatrixXf> mappedZ;
}

void Data::mapPreprocessBedFile(const string &preprocessedBedFile)
{
    ...

    ppBedMap = reinterpret_cast<float *>(mmap(nullptr, ppBedSize, PROT_READ, MAP_SHARED, ppBedFd, 0));
    if (ppBedMap == MAP_FAILED)
        throw("Error: Failed to mmap preprocessed bed file");

    new (&mappedZ) Map<MatrixXf>(ppBedMap, numRows, numCols);
}

1 Comment

27 - May - 2019

Rolf Eike Beer

const size_t ppBedSize = size_t(numInds) * size_t(numIncdSnps) * sizeof(float);
ppBedMap = reinterpret_cast(mmap(...
const size_t ppBedSize = static_cast(numInds) * static_cast(numIncdSnps) * sizeof(float);
ppBedMap = static_cast(mmap(...
SeanHarmer

Sean Harmer

Managing Director KDAB UK

Learn Modern C++

Learn more