small refactor
This commit is contained in:
		@@ -8,26 +8,26 @@
 | 
			
		||||
constexpr int headerSize = sizeof(uint32_t);
 | 
			
		||||
constexpr int indexEntrySize = sizeof(uint64_t)*2;
 | 
			
		||||
 | 
			
		||||
BucketedZstdData::BucketedZstdData(std::ifstream &file) : file(file) {}
 | 
			
		||||
BucketedZstdData::BucketedZstdData(std::istream &input) : input(input) {}
 | 
			
		||||
 | 
			
		||||
std::optional<std::vector<char>> BucketedZstdData::getDatasetWithId(std::uint32_t id) {
 | 
			
		||||
	file.seekg(0, std::ios::beg);
 | 
			
		||||
	input.seekg(0, std::ios::beg);
 | 
			
		||||
 | 
			
		||||
	uint32_t indexSize;
 | 
			
		||||
	file.read((char *)&indexSize, sizeof(indexSize));
 | 
			
		||||
	input.read((char *)&indexSize, sizeof(indexSize));
 | 
			
		||||
	if(indexSize < id) return {};
 | 
			
		||||
 | 
			
		||||
	//seek to index entry
 | 
			
		||||
	file.seekg(sizeof(uint32_t) + indexEntrySize*id, std::ios::beg);
 | 
			
		||||
	input.seekg(sizeof(uint32_t) + indexEntrySize*id, std::ios::beg);
 | 
			
		||||
 | 
			
		||||
	uint64_t offset, length;
 | 
			
		||||
	file.read((char *)&offset, sizeof(offset));
 | 
			
		||||
	file.read((char *)&length, sizeof(length));
 | 
			
		||||
	input.read((char *)&offset, sizeof(offset));
 | 
			
		||||
	input.read((char *)&length, sizeof(length));
 | 
			
		||||
	if(length == 0) return {};
 | 
			
		||||
 | 
			
		||||
	file.seekg(offset + headerSize + indexSize * indexEntrySize, std::ios::beg);
 | 
			
		||||
	input.seekg(offset + headerSize + indexSize * indexEntrySize, std::ios::beg);
 | 
			
		||||
	std::vector<char> inBuf(length);
 | 
			
		||||
	file.read(inBuf.data(), inBuf.size());
 | 
			
		||||
	input.read(inBuf.data(), inBuf.size());
 | 
			
		||||
 | 
			
		||||
	std::vector<char> output(ZSTD_getFrameContentSize(inBuf.data(), inBuf.size()));
 | 
			
		||||
	if(!ZSTD_isError(ZSTD_decompress(output.data(), output.size(), inBuf.data(), inBuf.size()))) {
 | 
			
		||||
 
 | 
			
		||||
@@ -8,11 +8,11 @@
 | 
			
		||||
 | 
			
		||||
class BucketedZstdData {
 | 
			
		||||
	public:
 | 
			
		||||
	BucketedZstdData(std::ifstream &file);
 | 
			
		||||
	BucketedZstdData(std::istream &input);
 | 
			
		||||
	std::optional<std::vector<char>> getDatasetWithId(std::uint32_t id);
 | 
			
		||||
	std::optional<std::vector<std::vector<char>>> getEntriesByID(std::uint32_t id);
 | 
			
		||||
	private:
 | 
			
		||||
	std::ifstream &file;
 | 
			
		||||
	std::istream &input;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -46,17 +46,20 @@ std::optional<std::vector<char>> readSharedIndex(const std::filesystem::path &fi
 | 
			
		||||
 | 
			
		||||
void processRDA(const std::filesystem::directory_entry &file, std::atomic_size_t &totalEntries, uint64_t id, std::mutex &outputMutex) {
 | 
			
		||||
	if(file.path().extension() == ".rda") {
 | 
			
		||||
		std::string fileName(file.path().filename());
 | 
			
		||||
		std::ifstream fileStream(file.path(), std::ios::binary);
 | 
			
		||||
		CERRLOG("Reading %s\n", std::string(file.path().filename()).c_str());
 | 
			
		||||
		CERRLOG("Reading %s\n", fileName.c_str());
 | 
			
		||||
		BucketedZstdData bucket(fileStream);
 | 
			
		||||
 | 
			
		||||
		if(std::optional<std::vector<std::vector<char>>> data = bucket.getEntriesByID(id)) {
 | 
			
		||||
			totalEntries += data.value().size();
 | 
			
		||||
			const std::lock_guard lock(outputMutex);
 | 
			
		||||
			CERRLOG("Writing %s\n", std::string(file.path().filename()).c_str());
 | 
			
		||||
			CERRLOG("Writing %s\n", fileName.c_str());
 | 
			
		||||
			for(const auto &entry : data.value()) {
 | 
			
		||||
				std::cout.write(entry.data(), entry.size()) << '\n';
 | 
			
		||||
			}
 | 
			
		||||
		} else {
 | 
			
		||||
			CERRLOG("No entries found in %s\n", fileName.c_str());
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user