308 lines
7.4 KiB
C++
308 lines
7.4 KiB
C++
//
|
|
// Created by ricardo on 22-12-11.
|
|
//
|
|
#include "file_io.h"
|
|
#include "logging.h"
|
|
#include "cstdio"
|
|
#include "cstdlib"
|
|
#include "const.h"
|
|
#include "huffman.h"
|
|
#include "unistd.h"
|
|
#include "sys/stat.h"
|
|
|
|
int *FileIO::ReadCharFrequency(const std::string &fileName)
|
|
{
|
|
FILE *file = fopen(fileName.c_str(), "r");
|
|
|
|
if (file == nullptr)
|
|
{
|
|
// 文件打开失败
|
|
Logging::LoggingInfo(fileName + "is not a valid filename");
|
|
exit(0);
|
|
}
|
|
|
|
int* frequencyArray = new int[ASCII_LENGTH];
|
|
|
|
for (int i = 0; i < ASCII_LENGTH; i++)
|
|
{
|
|
// 将所有频率初始化为0
|
|
frequencyArray[i] = 0;
|
|
}
|
|
|
|
while (true)
|
|
{
|
|
int temp = fgetc(file);
|
|
|
|
if (temp == EOF)
|
|
{
|
|
// 文件结束
|
|
break;
|
|
}
|
|
|
|
if (temp >= ASCII_LENGTH || temp < 0)
|
|
{
|
|
// 读取到非法字符
|
|
Logging::LoggingWarning(
|
|
"Read illegal char " + std::to_string(temp) + " in file. Ignore it");
|
|
}
|
|
|
|
frequencyArray[temp]++;
|
|
}
|
|
|
|
fclose(file);
|
|
return frequencyArray;
|
|
}
|
|
|
|
void FileIO::WriteZipFile(const std::string &inputFile, const std::string &outputFile)
|
|
{
|
|
int* frequencyArray = FileIO::ReadCharFrequency(inputFile);
|
|
|
|
auto huffmanCode = new HuffmanCode(frequencyArray);
|
|
// 创建哈夫曼树
|
|
huffmanCode->createHuffmanTree();
|
|
auto dictionary = huffmanCode->getHuffmanCode();
|
|
|
|
FILE* input = fopen(inputFile.c_str(), "r");
|
|
FILE* output = fopen(outputFile.c_str(), "wb");
|
|
|
|
// 判断文件打开ia是否成功
|
|
if (input == nullptr)
|
|
{
|
|
Logging::LoggingError(inputFile + " is not an valid file name.");
|
|
exit(0);
|
|
}
|
|
if (output == nullptr)
|
|
{
|
|
Logging::LoggingError(outputFile + " is not an valid file name.");
|
|
exit(0);
|
|
}
|
|
|
|
// 首先写入文件的元信息
|
|
// 虽然目前元信息中部分信息还没有拿到
|
|
// 但是先把文件中的空间占据了再说
|
|
MetaData metaDataT{};
|
|
fwrite(&metaDataT, sizeof(MetaData), 1, output);
|
|
|
|
// 写入哈夫曼数组
|
|
fwrite(huffmanCode->nodes->data(), sizeof(HuffmanNode), huffmanCode->nodes->size(), output);
|
|
|
|
// 写入文件时的缓冲区
|
|
int buffer = 0;
|
|
int bufferPos = 0;
|
|
|
|
while (true)
|
|
{
|
|
int temp = fgetc(input);
|
|
|
|
// 读取到文件末尾
|
|
if (temp == EOF)
|
|
{
|
|
|
|
buffer = buffer << (32 - bufferPos);
|
|
fwrite(&buffer, sizeof(int), 1, output);
|
|
|
|
metaDataT.LastBufferUsedLength = bufferPos;
|
|
|
|
break;
|
|
}
|
|
|
|
if (temp >= ASCII_LENGTH || temp < 0)
|
|
{
|
|
// 读取到非法字符
|
|
Logging::LoggingWarning(
|
|
"Read illegal char " + std::to_string(temp) + " in file. Ignore it");
|
|
}
|
|
|
|
auto code = (*dictionary)[temp];
|
|
|
|
for (auto iter = code.begin(); iter < code.end(); iter++)
|
|
{
|
|
// 缓冲区已经满了
|
|
if (bufferPos == 32)
|
|
{
|
|
fwrite(&buffer, sizeof(int), 1, output);
|
|
bufferPos = 0;
|
|
buffer = 0;
|
|
}
|
|
|
|
buffer = (buffer << 1) + *iter;
|
|
bufferPos++;
|
|
}
|
|
}
|
|
|
|
metaDataT.HuffmanRoot = huffmanCode->root;
|
|
metaDataT.HuffmanNodeLength = (int )huffmanCode->nodes->size();
|
|
// 写入元信息
|
|
fseek(output, 0, SEEK_SET);
|
|
fwrite(&metaDataT, sizeof(MetaData), 1, output);
|
|
|
|
delete frequencyArray;
|
|
delete huffmanCode;
|
|
delete dictionary;
|
|
}
|
|
|
|
void FileIO::WriteUnzipFile(const std::string &inputFile, const std::string &outputFile)
|
|
{
|
|
FILE* input = fopen(inputFile.c_str(), "rb");
|
|
FILE* output = fopen(outputFile.c_str(), "w");
|
|
|
|
// 检查文件是否正常打开
|
|
if (input == nullptr)
|
|
{
|
|
Logging::LoggingError(inputFile + " is not a valid file name.");
|
|
exit(0);
|
|
}
|
|
if (output == nullptr)
|
|
{
|
|
Logging::LoggingError(outputFile + " is not a valid file name.");
|
|
exit(0);
|
|
}
|
|
|
|
// 读取元信息
|
|
MetaData metaData{};
|
|
fread(&metaData, sizeof(MetaData), 1, input);
|
|
|
|
// 读取哈夫曼节点数组
|
|
auto nodes = new HuffmanNode[metaData.HuffmanNodeLength];
|
|
fread(nodes, sizeof(HuffmanNode), metaData.HuffmanNodeLength, input);
|
|
|
|
// 读取文件的缓冲区
|
|
int buffer;
|
|
fread(&buffer, sizeof(int), 1, input);
|
|
int bufferPos;
|
|
int nextBuffer;
|
|
|
|
HuffmanNode node = nodes[metaData.HuffmanRoot];
|
|
|
|
while (true)
|
|
{
|
|
if (buffer == EOF)
|
|
{
|
|
// 读取结束
|
|
break;
|
|
}
|
|
|
|
// 这里为了处理最后一个缓冲区的问题
|
|
// 设置了双缓冲
|
|
size_t readResult = fread(&nextBuffer, sizeof(int), 1, input);
|
|
|
|
if (readResult != 1)
|
|
{
|
|
// 读取到文件末尾
|
|
nextBuffer = EOF;
|
|
bufferPos = metaData.LastBufferUsedLength;
|
|
}
|
|
else
|
|
{
|
|
bufferPos = 32;
|
|
}
|
|
|
|
while (bufferPos > 0)
|
|
{
|
|
if (node.data == -1)
|
|
{
|
|
// 非叶子节点
|
|
int value = (buffer >> 31) & 1;
|
|
buffer = buffer << 1;
|
|
bufferPos--;
|
|
|
|
if (value == 0)
|
|
{
|
|
node = nodes[node.lIndex];
|
|
}
|
|
else
|
|
{
|
|
node = nodes[node.rIndex];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// 叶子节点
|
|
fputc(node.data, output);
|
|
node = nodes[metaData.HuffmanRoot];
|
|
}
|
|
}
|
|
|
|
buffer = nextBuffer;
|
|
}
|
|
|
|
delete[] nodes;
|
|
fclose(input);
|
|
fclose(output);
|
|
}
|
|
|
|
double FileIO::CalculateZipRate(const std::string &inputFileName, const std::string &outputFileName)
|
|
{
|
|
struct stat originFileStat{};
|
|
struct stat zipFileStat{};
|
|
|
|
stat(inputFileName.c_str(), &originFileStat);
|
|
stat(outputFileName.c_str(), &zipFileStat);
|
|
|
|
auto originFileSize = (double )originFileStat.st_size;
|
|
auto zipFileSize = (double )zipFileStat.st_size;
|
|
|
|
return zipFileSize / originFileSize;
|
|
}
|
|
|
|
BinaryBuffer::BinaryBuffer(std::string &inputFileName)
|
|
{
|
|
file = fopen(inputFileName.c_str(), "rb");
|
|
|
|
if (file == nullptr)
|
|
{
|
|
// 读取文件失败
|
|
Logging::LoggingError(inputFileName + " is not a valid file name.");
|
|
exit(0);
|
|
}
|
|
|
|
buffer = 0;
|
|
bufferPos = 0;
|
|
readFinishedFlag = false;
|
|
|
|
// 读取文件开头的元信息和哈夫曼数组
|
|
MetaData metaData{};
|
|
fread(&metaData, sizeof(MetaData), 1, file);
|
|
position = position + (int )sizeof(MetaData) * 8;
|
|
|
|
// 读取哈夫曼节点数组
|
|
HuffmanNode nodes[metaData.HuffmanNodeLength];
|
|
fread(nodes, sizeof(HuffmanNode), metaData.HuffmanNodeLength, file);
|
|
position = position + (int )sizeof(HuffmanNode) * metaData.HuffmanNodeLength * 8;
|
|
}
|
|
|
|
BinaryBuffer::~BinaryBuffer()
|
|
{
|
|
fclose(file);
|
|
file = nullptr;
|
|
}
|
|
|
|
char BinaryBuffer::read()
|
|
{
|
|
if (readFinishedFlag)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
if (bufferPos == 0)
|
|
{
|
|
// 当前缓冲区读取结束
|
|
int result = (int )fread(&buffer, sizeof(int), 1, file);
|
|
|
|
if (result == 0)
|
|
{
|
|
readFinishedFlag = true;
|
|
// 文件读取结束
|
|
return -1;
|
|
}
|
|
|
|
bufferPos = 32;
|
|
}
|
|
|
|
int result = (buffer >> 31) & 1;
|
|
buffer = buffer << 1;
|
|
bufferPos--;
|
|
position++;
|
|
return (char )result;
|
|
}
|