small refactor

This commit is contained in:
Finn Dane 2023-04-15 15:51:53 +02:00
parent 12f70eb783
commit 8d2a879785
3 changed files with 15 additions and 12 deletions

View File

@ -8,26 +8,26 @@
constexpr int headerSize = sizeof(uint32_t);
constexpr int indexEntrySize = sizeof(uint64_t)*2;
BucketedZstdData::BucketedZstdData(std::ifstream &file) : file(file) {}
BucketedZstdData::BucketedZstdData(std::istream &input) : input(input) {}
std::optional<std::vector<char>> BucketedZstdData::getDatasetWithId(std::uint32_t id) {
file.seekg(0, std::ios::beg);
input.seekg(0, std::ios::beg);
uint32_t indexSize;
file.read((char *)&indexSize, sizeof(indexSize));
input.read((char *)&indexSize, sizeof(indexSize));
if(indexSize < id) return {};
//seek to index entry
file.seekg(sizeof(uint32_t) + indexEntrySize*id, std::ios::beg);
input.seekg(sizeof(uint32_t) + indexEntrySize*id, std::ios::beg);
uint64_t offset, length;
file.read((char *)&offset, sizeof(offset));
file.read((char *)&length, sizeof(length));
input.read((char *)&offset, sizeof(offset));
input.read((char *)&length, sizeof(length));
if(length == 0) return {};
file.seekg(offset + headerSize + indexSize * indexEntrySize, std::ios::beg);
input.seekg(offset + headerSize + indexSize * indexEntrySize, std::ios::beg);
std::vector<char> inBuf(length);
file.read(inBuf.data(), inBuf.size());
input.read(inBuf.data(), inBuf.size());
std::vector<char> output(ZSTD_getFrameContentSize(inBuf.data(), inBuf.size()));
if(!ZSTD_isError(ZSTD_decompress(output.data(), output.size(), inBuf.data(), inBuf.size()))) {

View File

@ -8,11 +8,11 @@
class BucketedZstdData {
public:
BucketedZstdData(std::ifstream &file);
BucketedZstdData(std::istream &input);
std::optional<std::vector<char>> getDatasetWithId(std::uint32_t id);
std::optional<std::vector<std::vector<char>>> getEntriesByID(std::uint32_t id);
private:
std::ifstream &file;
std::istream &input;
};
#endif

View File

@ -46,17 +46,20 @@ std::optional<std::vector<char>> readSharedIndex(const std::filesystem::path &fi
void processRDA(const std::filesystem::directory_entry &file, std::atomic_size_t &totalEntries, uint64_t id, std::mutex &outputMutex) {
if(file.path().extension() == ".rda") {
std::string fileName(file.path().filename());
std::ifstream fileStream(file.path(), std::ios::binary);
CERRLOG("Reading %s\n", std::string(file.path().filename()).c_str());
CERRLOG("Reading %s\n", fileName.c_str());
BucketedZstdData bucket(fileStream);
if(std::optional<std::vector<std::vector<char>>> data = bucket.getEntriesByID(id)) {
totalEntries += data.value().size();
const std::lock_guard lock(outputMutex);
CERRLOG("Writing %s\n", std::string(file.path().filename()).c_str());
CERRLOG("Writing %s\n", fileName.c_str());
for(const auto &entry : data.value()) {
std::cout.write(entry.data(), entry.size()) << '\n';
}
} else {
CERRLOG("No entries found in %s\n", fileName.c_str());
}
}
}