small refactor
This commit is contained in:
parent
12f70eb783
commit
8d2a879785
@ -8,26 +8,26 @@
|
|||||||
constexpr int headerSize = sizeof(uint32_t);
|
constexpr int headerSize = sizeof(uint32_t);
|
||||||
constexpr int indexEntrySize = sizeof(uint64_t)*2;
|
constexpr int indexEntrySize = sizeof(uint64_t)*2;
|
||||||
|
|
||||||
BucketedZstdData::BucketedZstdData(std::ifstream &file) : file(file) {}
|
BucketedZstdData::BucketedZstdData(std::istream &input) : input(input) {}
|
||||||
|
|
||||||
std::optional<std::vector<char>> BucketedZstdData::getDatasetWithId(std::uint32_t id) {
|
std::optional<std::vector<char>> BucketedZstdData::getDatasetWithId(std::uint32_t id) {
|
||||||
file.seekg(0, std::ios::beg);
|
input.seekg(0, std::ios::beg);
|
||||||
|
|
||||||
uint32_t indexSize;
|
uint32_t indexSize;
|
||||||
file.read((char *)&indexSize, sizeof(indexSize));
|
input.read((char *)&indexSize, sizeof(indexSize));
|
||||||
if(indexSize < id) return {};
|
if(indexSize < id) return {};
|
||||||
|
|
||||||
//seek to index entry
|
//seek to index entry
|
||||||
file.seekg(sizeof(uint32_t) + indexEntrySize*id, std::ios::beg);
|
input.seekg(sizeof(uint32_t) + indexEntrySize*id, std::ios::beg);
|
||||||
|
|
||||||
uint64_t offset, length;
|
uint64_t offset, length;
|
||||||
file.read((char *)&offset, sizeof(offset));
|
input.read((char *)&offset, sizeof(offset));
|
||||||
file.read((char *)&length, sizeof(length));
|
input.read((char *)&length, sizeof(length));
|
||||||
if(length == 0) return {};
|
if(length == 0) return {};
|
||||||
|
|
||||||
file.seekg(offset + headerSize + indexSize * indexEntrySize, std::ios::beg);
|
input.seekg(offset + headerSize + indexSize * indexEntrySize, std::ios::beg);
|
||||||
std::vector<char> inBuf(length);
|
std::vector<char> inBuf(length);
|
||||||
file.read(inBuf.data(), inBuf.size());
|
input.read(inBuf.data(), inBuf.size());
|
||||||
|
|
||||||
std::vector<char> output(ZSTD_getFrameContentSize(inBuf.data(), inBuf.size()));
|
std::vector<char> output(ZSTD_getFrameContentSize(inBuf.data(), inBuf.size()));
|
||||||
if(!ZSTD_isError(ZSTD_decompress(output.data(), output.size(), inBuf.data(), inBuf.size()))) {
|
if(!ZSTD_isError(ZSTD_decompress(output.data(), output.size(), inBuf.data(), inBuf.size()))) {
|
||||||
|
@ -8,11 +8,11 @@
|
|||||||
|
|
||||||
class BucketedZstdData {
|
class BucketedZstdData {
|
||||||
public:
|
public:
|
||||||
BucketedZstdData(std::ifstream &file);
|
BucketedZstdData(std::istream &input);
|
||||||
std::optional<std::vector<char>> getDatasetWithId(std::uint32_t id);
|
std::optional<std::vector<char>> getDatasetWithId(std::uint32_t id);
|
||||||
std::optional<std::vector<std::vector<char>>> getEntriesByID(std::uint32_t id);
|
std::optional<std::vector<std::vector<char>>> getEntriesByID(std::uint32_t id);
|
||||||
private:
|
private:
|
||||||
std::ifstream &file;
|
std::istream &input;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
@ -46,17 +46,20 @@ std::optional<std::vector<char>> readSharedIndex(const std::filesystem::path &fi
|
|||||||
|
|
||||||
void processRDA(const std::filesystem::directory_entry &file, std::atomic_size_t &totalEntries, uint64_t id, std::mutex &outputMutex) {
|
void processRDA(const std::filesystem::directory_entry &file, std::atomic_size_t &totalEntries, uint64_t id, std::mutex &outputMutex) {
|
||||||
if(file.path().extension() == ".rda") {
|
if(file.path().extension() == ".rda") {
|
||||||
|
std::string fileName(file.path().filename());
|
||||||
std::ifstream fileStream(file.path(), std::ios::binary);
|
std::ifstream fileStream(file.path(), std::ios::binary);
|
||||||
CERRLOG("Reading %s\n", std::string(file.path().filename()).c_str());
|
CERRLOG("Reading %s\n", fileName.c_str());
|
||||||
BucketedZstdData bucket(fileStream);
|
BucketedZstdData bucket(fileStream);
|
||||||
|
|
||||||
if(std::optional<std::vector<std::vector<char>>> data = bucket.getEntriesByID(id)) {
|
if(std::optional<std::vector<std::vector<char>>> data = bucket.getEntriesByID(id)) {
|
||||||
totalEntries += data.value().size();
|
totalEntries += data.value().size();
|
||||||
const std::lock_guard lock(outputMutex);
|
const std::lock_guard lock(outputMutex);
|
||||||
CERRLOG("Writing %s\n", std::string(file.path().filename()).c_str());
|
CERRLOG("Writing %s\n", fileName.c_str());
|
||||||
for(const auto &entry : data.value()) {
|
for(const auto &entry : data.value()) {
|
||||||
std::cout.write(entry.data(), entry.size()) << '\n';
|
std::cout.write(entry.data(), entry.size()) << '\n';
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
CERRLOG("No entries found in %s\n", fileName.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user