small refactor
This commit is contained in:
parent
12f70eb783
commit
8d2a879785
@ -8,26 +8,26 @@
|
||||
constexpr int headerSize = sizeof(uint32_t);
|
||||
constexpr int indexEntrySize = sizeof(uint64_t)*2;
|
||||
|
||||
BucketedZstdData::BucketedZstdData(std::ifstream &file) : file(file) {}
|
||||
BucketedZstdData::BucketedZstdData(std::istream &input) : input(input) {}
|
||||
|
||||
std::optional<std::vector<char>> BucketedZstdData::getDatasetWithId(std::uint32_t id) {
|
||||
file.seekg(0, std::ios::beg);
|
||||
input.seekg(0, std::ios::beg);
|
||||
|
||||
uint32_t indexSize;
|
||||
file.read((char *)&indexSize, sizeof(indexSize));
|
||||
input.read((char *)&indexSize, sizeof(indexSize));
|
||||
if(indexSize < id) return {};
|
||||
|
||||
//seek to index entry
|
||||
file.seekg(sizeof(uint32_t) + indexEntrySize*id, std::ios::beg);
|
||||
input.seekg(sizeof(uint32_t) + indexEntrySize*id, std::ios::beg);
|
||||
|
||||
uint64_t offset, length;
|
||||
file.read((char *)&offset, sizeof(offset));
|
||||
file.read((char *)&length, sizeof(length));
|
||||
input.read((char *)&offset, sizeof(offset));
|
||||
input.read((char *)&length, sizeof(length));
|
||||
if(length == 0) return {};
|
||||
|
||||
file.seekg(offset + headerSize + indexSize * indexEntrySize, std::ios::beg);
|
||||
input.seekg(offset + headerSize + indexSize * indexEntrySize, std::ios::beg);
|
||||
std::vector<char> inBuf(length);
|
||||
file.read(inBuf.data(), inBuf.size());
|
||||
input.read(inBuf.data(), inBuf.size());
|
||||
|
||||
std::vector<char> output(ZSTD_getFrameContentSize(inBuf.data(), inBuf.size()));
|
||||
if(!ZSTD_isError(ZSTD_decompress(output.data(), output.size(), inBuf.data(), inBuf.size()))) {
|
||||
|
@ -8,11 +8,11 @@
|
||||
|
||||
class BucketedZstdData {
|
||||
public:
|
||||
BucketedZstdData(std::ifstream &file);
|
||||
BucketedZstdData(std::istream &input);
|
||||
std::optional<std::vector<char>> getDatasetWithId(std::uint32_t id);
|
||||
std::optional<std::vector<std::vector<char>>> getEntriesByID(std::uint32_t id);
|
||||
private:
|
||||
std::ifstream &file;
|
||||
std::istream &input;
|
||||
};
|
||||
|
||||
#endif
|
@ -46,17 +46,20 @@ std::optional<std::vector<char>> readSharedIndex(const std::filesystem::path &fi
|
||||
|
||||
void processRDA(const std::filesystem::directory_entry &file, std::atomic_size_t &totalEntries, uint64_t id, std::mutex &outputMutex) {
|
||||
if(file.path().extension() == ".rda") {
|
||||
std::string fileName(file.path().filename());
|
||||
std::ifstream fileStream(file.path(), std::ios::binary);
|
||||
CERRLOG("Reading %s\n", std::string(file.path().filename()).c_str());
|
||||
CERRLOG("Reading %s\n", fileName.c_str());
|
||||
BucketedZstdData bucket(fileStream);
|
||||
|
||||
if(std::optional<std::vector<std::vector<char>>> data = bucket.getEntriesByID(id)) {
|
||||
totalEntries += data.value().size();
|
||||
const std::lock_guard lock(outputMutex);
|
||||
CERRLOG("Writing %s\n", std::string(file.path().filename()).c_str());
|
||||
CERRLOG("Writing %s\n", fileName.c_str());
|
||||
for(const auto &entry : data.value()) {
|
||||
std::cout.write(entry.data(), entry.size()) << '\n';
|
||||
}
|
||||
} else {
|
||||
CERRLOG("No entries found in %s\n", fileName.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user