diff --git a/src/RdaReader/include/RdaReader.hpp b/src/RdaReader/include/RdaReader.hpp index a06635e..18b893e 100644 --- a/src/RdaReader/include/RdaReader.hpp +++ b/src/RdaReader/include/RdaReader.hpp @@ -6,11 +6,25 @@ #include #include #include +#include class RdaReader { public: RdaReader(); RdaReader(std::function logger); + + size_t readDataset( + const std::vector &datasetName, + const std::filesystem::path &rootDir, + const std::function output + ); + size_t readDataset( + const std::vector &datasetName, + const std::filesystem::path &rootDir, + const std::function output, + std::mutex &outputMutex + ); + size_t readDataset( const std::vector &datasetName, const std::vector &sharedIndex, diff --git a/src/RdaReader/src/RdaReader.cpp b/src/RdaReader/src/RdaReader.cpp index e9d8ae9..78129da 100644 --- a/src/RdaReader/src/RdaReader.cpp +++ b/src/RdaReader/src/RdaReader.cpp @@ -83,4 +83,50 @@ size_t RdaReader::readDataset(const std::vector &datasetName, const std::v size_t RdaReader::readDataset(const std::vector &datasetName, const std::vector &sharedIndex, const std::vector &rdas, const std::function output) { std::mutex dummyMutex; return readDataset(datasetName, sharedIndex, rdas, output, dummyMutex); +} + +inline std::optional> readSharedIndex(const std::filesystem::path &filePath) { + std::ifstream sharedIndexFile(filePath, std::ios::binary | std::ios::ate); + if(!sharedIndexFile.good()) { + return {}; + } + std::vector sharedIndexData(sharedIndexFile.tellg()); + sharedIndexFile.seekg(0, std::ios::beg); + + sharedIndexFile.read(sharedIndexData.data(), sharedIndexData.size()); + return sharedIndexData; +} + +size_t RdaReader::readDataset(const std::vector &datasetName, const std::filesystem::path &rootDir, const std::function output, std::mutex &outputMutex) { + const std::filesystem::path sharedIndexPath(rootDir / "sharedindex.shi"); + std::vector sharedIndexData; + if(auto data = readSharedIndex(sharedIndexPath)) { + sharedIndexData = data.value(); + } else { + log("cannot find '" + sharedIndexPath.string() + "'\n"); + return 0; + } + + std::vector rdasIfstreams; + for(const std::filesystem::directory_entry &file : std::filesystem::directory_iterator(rootDir)) { + if(file.path().extension() == ".rda") rdasIfstreams.emplace_back(std::ifstream(file.path(), std::ios::binary)); + } + + std::vector rdaRefs; + for(std::ifstream &stream : rdasIfstreams) { + rdaRefs.push_back(&stream); + } + + if(size_t totalRead = readDataset(datasetName, sharedIndexData, rdaRefs, [](const char *s, size_t n){std::cout.write(s, n);}, outputMutex)) { + log("Found a total of " + std::to_string(totalRead) + " entries\n"); + return totalRead; + } else { + log("Cannot find '" + std::string(datasetName.data()) + "' in the shared index\n"); + return 0; + } +} + +size_t RdaReader::readDataset(const std::vector &datasetName, const std::filesystem::path &rootDir, const std::function output) { + std::mutex dummyMutex; + return readDataset(datasetName, rootDir, output, dummyMutex); } \ No newline at end of file diff --git a/src/rdaExtractor/src/rdaExtractor.cpp b/src/rdaExtractor/src/rdaExtractor.cpp index afa7693..bb13fef 100644 --- a/src/rdaExtractor/src/rdaExtractor.cpp +++ b/src/rdaExtractor/src/rdaExtractor.cpp @@ -52,29 +52,6 @@ int main(int argc, char **argv) { std::vector datasetName(datesetString.begin(), datesetString.end()); const std::filesystem::path rootDirectory(argv[2]); - const std::filesystem::path sharedIndexPath(rootDirectory / "sharedindex.shi"); - - std::vector sharedIndexData; - if(auto data = readSharedIndex(sharedIndexPath)) { - sharedIndexData = data.value(); - } else { - std::cerr << "cannot find '" << sharedIndexPath << "'" << std::endl; - return 2; - } - - std::vector rdasIfstreams; - for(const std::filesystem::directory_entry &file : std::filesystem::directory_iterator(rootDirectory)) { - if(file.path().extension() == ".rda") rdasIfstreams.emplace_back(std::ifstream(file.path(), std::ios::binary)); - } - - std::vector rdaRefs; - for(std::ifstream &stream : rdasIfstreams) { - rdaRefs.push_back(&stream); - } RdaReader rdaReader(threadedLog); - if(size_t totalRead = rdaReader.readDataset(datasetName, sharedIndexData, rdaRefs, [](const char *s, size_t n){std::cout.write(s, n);})) { - std::cerr << "Found a total of " << totalRead << " entries" << std::endl; - } else { - std::cerr << "Cannot find '" << argv[1] << "' in the shared index" << std::endl; - } + rdaReader.readDataset(datasetName, rootDirectory, [](const char *s, size_t n){std::cout.write(s, n);}); } \ No newline at end of file