add: data-structure-lab & compiler-lab

2024-10-30 17:23:52 +08:00
commit eb8c4fa451
35 changed files with 4266 additions and 0 deletions
--- a/zip-unzip-search/.gitignore
+++ b/zip-unzip-search/.gitignore
@@ -0,0 +1,3 @@
+.idea/
+cmake-*/
+build/
--- a/zip-unzip-search/CMakeLists.txt
+++ b/zip-unzip-search/CMakeLists.txt
@@ -0,0 +1,10 @@
+cmake_minimum_required(VERSION 3.22)
+project(zip_unzip_search)
+
+set(CMAKE_CXX_STANDARD 11)
+
+include_directories(${PROJECT_SOURCE_DIR}/include)
+
+aux_source_directory(${PROJECT_SOURCE_DIR}/src SRCS)
+
+add_executable(zip_unzip_search main.cpp ${SRCS})
--- a/zip-unzip-search/include/const.h
+++ b/zip-unzip-search/include/const.h
@@ -0,0 +1,11 @@
+//
+// Created by ricardo on 22-12-11.
+//
+
+#ifndef ZIP_UNZIP_SEARCH_CONST_H
+#define ZIP_UNZIP_SEARCH_CONST_H
+
+// ASCII码的长度
+#define ASCII_LENGTH 128
+
+#endif //ZIP_UNZIP_SEARCH_CONST_H
--- a/zip-unzip-search/include/file_io.h
+++ b/zip-unzip-search/include/file_io.h
@@ -0,0 +1,82 @@
+//
+// Created by ricardo on 22-12-11.
+//
+
+#ifndef ZIP_UNZIP_SEARCH_FILE_IO_H
+#define ZIP_UNZIP_SEARCH_FILE_IO_H
+#include "string"
+
+/**
+ * 文件元信息
+ */
+struct MetaData
+{
+    /**
+     * 哈夫曼树节点数组长度
+     */
+    int HuffmanNodeLength;
+    /**
+     * 哈夫曼树根节点长度
+     */
+    int HuffmanRoot;
+    /**
+     * 文件中最后一个缓冲区被使用的位数
+     */
+    int LastBufferUsedLength;
+};
+
+class FileIO
+{
+public:
+    /**
+     * 统计文件的字符的出现频率
+     * @param fileName 文件名称
+     * @return 字符频率数组 需要delete
+     */
+    static int* ReadCharFrequency(const std::string& fileName);
+
+    /**
+     * 写入压缩文件
+     * @param inputFile 被压缩文件名称
+     * @param outputFile 输出压缩文件名称
+     */
+    static void WriteZipFile(const std::string& inputFile, const std::string& outputFile);
+
+    /**
+     * 写入解压缩文件
+     * @param inputFile 压缩文件名称
+     * @param outputFile 解压文件名称
+     */
+    static void WriteUnzipFile(const std::string& inputFile, const std::string& outputFile);
+
+    /**
+     * 计算文件的压缩率
+     * @param inputFileName
+     * @param outputFileName
+     * @return
+     */
+    static double CalculateZipRate(const std::string& inputFileName, const std::string& outputFileName);
+};
+
+class BinaryBuffer
+{
+public:
+    explicit BinaryBuffer(std::string& inputFileName);
+
+    ~BinaryBuffer();
+
+    char read();
+
+    int position = 0;
+
+private:
+    FILE* file = nullptr;
+
+    int buffer;
+    int bufferPos;
+    bool readFinishedFlag;
+
+
+};
+
+#endif //ZIP_UNZIP_SEARCH_FILE_IO_H
--- a/zip-unzip-search/include/huffman.h
+++ b/zip-unzip-search/include/huffman.h
@@ -0,0 +1,103 @@
+//
+// Created by ricardo on 22-12-11.
+//
+
+#ifndef ZIP_UNZIP_SEARCH_HUFFMAN_H
+#define ZIP_UNZIP_SEARCH_HUFFMAN_H
+#include "vector"
+#include "array"
+#include "const.h"
+
+/**
+ * 哈夫曼树中的节点结构体
+ */
+struct HuffmanNode
+{
+    /**
+     * 节点的编号
+     */
+    int id;
+    /**
+     * 节点表示的字符
+     * 如果不是叶子节点
+     * 值为-1
+     */
+    char data;
+    /**
+     * 字符在文件中出现的频率
+     * 也就是哈夫曼树中节点的权值
+     */
+    int frequency;
+    /**
+     * 左子结点在节点数组中的索引
+     */
+    int lIndex;
+    /**
+     * 右子结点在节点中的索引
+     */
+    int rIndex;
+};
+
+class HuffmanCode
+{
+public:
+    /**
+     * 树中节点列表
+     */
+    std::vector<HuffmanNode>* nodes = new std::vector<HuffmanNode>();
+
+    /**
+     * 哈夫曼树根节点索引
+     */
+    int root = -1;
+
+    /**
+     * 从字符的频率数组出发创建
+     * @param frequencyArray
+     */
+    explicit HuffmanCode(const int * frequencyArray);
+
+    /**
+     * 从哈夫曼数组节点出发创建
+     * @param nodeArray 哈夫曼节点数组
+     * @param length 节点数组的长度
+     */
+    HuffmanCode(HuffmanNode *nodeArray, int length);
+
+    ~HuffmanCode();
+
+    /**
+     * 创建哈夫曼树
+     */
+    void createHuffmanTree();
+
+    /**
+     * 打印哈夫曼树
+     */
+    void printHuffmanTree();
+
+    /**
+     * 得到哈夫曼编码
+     * @return 哈夫曼编码字典
+     */
+    std::array<std::vector<char>, ASCII_LENGTH> * getHuffmanCode();
+
+    /**
+     * 打印哈夫曼编码字典
+     * @param dictionary 字典
+     */
+    static void printHuffmanCode(const std::array<std::vector<char>, ASCII_LENGTH>& dictionary);
+
+private:
+    /**
+     * 对森林列表按权值排序
+     * @param forests
+     */
+    static void sortForests(std::vector<HuffmanNode>& forests);
+
+    void printHuffmanTreeR(int nodeId);
+
+    void getHuffmanCodeR(std::array<std::vector<char>, ASCII_LENGTH> &dictionary, int nodeId, std::vector<char> &code);
+
+};
+#endif //ZIP_UNZIP_SEARCH_HUFFMAN_H
--- a/zip-unzip-search/include/logging.h
+++ b/zip-unzip-search/include/logging.h
@@ -0,0 +1,31 @@
+//
+// Created by ricardo on 22-12-11.
+//
+
+#ifndef ZIP_UNZIP_SEARCH_LOGGING_H
+#define ZIP_UNZIP_SEARCH_LOGGING_H
+#include "string"
+
+class Logging
+{
+public:
+    /**
+     * 输出信息
+     * @param info
+     */
+    static void LoggingInfo(const std::string& info);
+
+    /**
+     * 输出警告
+     * @param warning
+     */
+    static void LoggingWarning(const std::string& warning);
+
+    /**
+     * 输出错误
+     * @param error
+     */
+    static void LoggingError(const std::string& error);
+};
+
+#endif //ZIP_UNZIP_SEARCH_LOGGING_H
--- a/zip-unzip-search/include/search.h
+++ b/zip-unzip-search/include/search.h
@@ -0,0 +1,55 @@
+//
+// Created by ricardo on 22-12-16.
+//
+
+#ifndef ZIP_UNZIP_SEARCH_SEARCH_H
+#define ZIP_UNZIP_SEARCH_SEARCH_H
+#include "vector"
+#include "array"
+#include "string"
+
+/**
+ * BM算法搜索实现类
+ */
+class BMSearch
+{
+public:
+    explicit BMSearch(std::vector<char>& sample);
+
+    ~BMSearch();
+
+    /**
+     * 匹配二进制文件
+     * @param fileName 指向需要进行匹配的二进制文件 需要读取元信息和哈夫曼数组
+     */
+    void matchFile(std::string &fileName);
+
+private:
+    // 坏字符规则数组
+    // 字符串为01串
+    int* badCharArray;
+
+    // 好后缀规则数组
+    int* goodSuffixArray;
+
+    std::vector<char>* sample;
+
+    /**
+     * 生成坏字符数组
+     * @param s 模板字符串
+     */
+    void generateBrokenCharArray(std::vector<char>& s);
+
+    /**
+     * 生成好后缀数组
+     * @param s 模板字符串
+     */
+    void generateGoodSuffixArray(std::vector<char>& s);
+
+    static int max(int a, int b);
+
+};
+
+void SearchInFile(char* fileName, char* sample);
+
+#endif //ZIP_UNZIP_SEARCH_SEARCH_H
--- a/zip-unzip-search/main.cpp
+++ b/zip-unzip-search/main.cpp
@@ -0,0 +1,67 @@
+#include "file_io.h"
+#include "logging.h"
+#include "cstring"
+#include "search.h"
+
+/**
+ * 输出帮助信息
+ */
+void PrintHelpMessage()
+{
+    printf("Usage: \n");
+    printf("Zip File: -z [In-File-Name] [Out-File-Name]\n");
+    printf("Unzip File: -u [In-File-Name] [Out-File-Name]\n");
+    printf("Search In Zip File: -s [Zip-File-Name] [Sample-String]\n");
+    printf("Print Help Message: -h\n");
+}
+
+
+int main(int argc, char *argv[])
+{
+    if (argc == 4)
+    {
+        std::string inputFileName = std::string(argv[2]);
+        std::string outputFileName = std::string(argv[3]);
+
+        if (strcmp(argv[1], "-z") == 0)
+        {
+            Logging::LoggingInfo("Start Zip File: " + inputFileName + " to zip file: " + outputFileName);
+
+            FileIO::WriteZipFile(inputFileName, outputFileName);
+
+            double zipRate = FileIO::CalculateZipRate(inputFileName, outputFileName) * 100.0;
+            Logging::LoggingInfo("The Zip Rate is: " + std::to_string(zipRate) + "%");
+            Logging::LoggingInfo("Zip Success!");
+        }
+        else if(strcmp(argv[1], "-u") == 0)
+        {
+            Logging::LoggingInfo("Start Unzip File: " + inputFileName + " to text file: " + outputFileName);
+
+            FileIO::WriteUnzipFile(inputFileName, outputFileName);
+
+            Logging::LoggingInfo("Unzip Success!");
+        }
+        else if(strcmp(argv[1], "-s") == 0)
+        {
+            Logging::LoggingInfo("Start to search in file " + inputFileName);
+            SearchInFile(argv[2], argv[3]);
+            Logging::LoggingInfo("Search finished");
+        }
+        else
+        {
+            printf("Unknown Usage!\n");
+            PrintHelpMessage();
+        }
+    }
+    else if (argc == 2 && strcmp(argv[1], "-h") == 0)
+    {
+        PrintHelpMessage();
+    }
+    else
+    {
+        printf("Unknown Usage!\n");
+        PrintHelpMessage();
+    }
+
+    return 0;
+}
--- a/zip-unzip-search/src/file_io.cpp
+++ b/zip-unzip-search/src/file_io.cpp
@@ -0,0 +1,307 @@
+//
+// Created by ricardo on 22-12-11.
+//
+#include "file_io.h"
+#include "logging.h"
+#include "cstdio"
+#include "cstdlib"
+#include "const.h"
+#include "huffman.h"
+#include "unistd.h"
+#include "sys/stat.h"
+
+int *FileIO::ReadCharFrequency(const std::string &fileName)
+{
+    FILE *file = fopen(fileName.c_str(), "r");
+
+    if (file == nullptr)
+    {
+        // 文件打开失败
+        Logging::LoggingInfo(fileName + "is not a valid filename");
+        exit(0);
+    }
+
+    int* frequencyArray = new int[ASCII_LENGTH];
+
+    for (int i = 0; i < ASCII_LENGTH; i++)
+    {
+        // 将所有频率初始化为0
+        frequencyArray[i] = 0;
+    }
+
+    while (true)
+    {
+        int temp = fgetc(file);
+
+        if (temp == EOF)
+        {
+            // 文件结束
+            break;
+        }
+
+        if (temp >= ASCII_LENGTH || temp < 0)
+        {
+            // 读取到非法字符
+            Logging::LoggingWarning(
+                    "Read illegal char " + std::to_string(temp) + " in file. Ignore it");
+        }
+
+        frequencyArray[temp]++;
+    }
+
+    fclose(file);
+    return frequencyArray;
+}
+
+void FileIO::WriteZipFile(const std::string &inputFile, const std::string &outputFile)
+{
+    int* frequencyArray = FileIO::ReadCharFrequency(inputFile);
+
+    auto huffmanCode = new HuffmanCode(frequencyArray);
+    // 创建哈夫曼树
+    huffmanCode->createHuffmanTree();
+    auto dictionary = huffmanCode->getHuffmanCode();
+
+    FILE* input = fopen(inputFile.c_str(), "r");
+    FILE* output = fopen(outputFile.c_str(), "wb");
+
+    // 判断文件打开ia是否成功
+    if (input == nullptr)
+    {
+        Logging::LoggingError(inputFile + " is not an valid file name.");
+        exit(0);
+    }
+    if (output == nullptr)
+    {
+        Logging::LoggingError(outputFile + " is not an valid file name.");
+        exit(0);
+    }
+
+    // 首先写入文件的元信息
+    // 虽然目前元信息中部分信息还没有拿到
+    // 但是先把文件中的空间占据了再说
+    MetaData metaDataT{};
+    fwrite(&metaDataT, sizeof(MetaData), 1, output);
+
+    // 写入哈夫曼数组
+    fwrite(huffmanCode->nodes->data(), sizeof(HuffmanNode), huffmanCode->nodes->size(), output);
+
+    // 写入文件时的缓冲区
+    int buffer = 0;
+    int bufferPos = 0;
+
+    while (true)
+    {
+        int temp = fgetc(input);
+
+        // 读取到文件末尾
+        if (temp == EOF)
+        {
+
+            buffer = buffer << (32 - bufferPos);
+            fwrite(&buffer, sizeof(int), 1, output);
+
+            metaDataT.LastBufferUsedLength = bufferPos;
+
+            break;
+        }
+
+        if (temp >= ASCII_LENGTH || temp < 0)
+        {
+            // 读取到非法字符
+            Logging::LoggingWarning(
+                    "Read illegal char " + std::to_string(temp) + " in file. Ignore it");
+        }
+
+        auto code = (*dictionary)[temp];
+
+        for (auto iter = code.begin(); iter < code.end(); iter++)
+        {
+            // 缓冲区已经满了
+            if (bufferPos == 32)
+            {
+                fwrite(&buffer, sizeof(int), 1, output);
+                bufferPos = 0;
+                buffer = 0;
+            }
+
+            buffer = (buffer << 1) + *iter;
+            bufferPos++;
+        }
+    }
+
+    metaDataT.HuffmanRoot = huffmanCode->root;
+    metaDataT.HuffmanNodeLength = (int )huffmanCode->nodes->size();
+    // 写入元信息
+    fseek(output, 0, SEEK_SET);
+    fwrite(&metaDataT, sizeof(MetaData), 1, output);
+
+    delete frequencyArray;
+    delete huffmanCode;
+    delete dictionary;
+}
+
+void FileIO::WriteUnzipFile(const std::string &inputFile, const std::string &outputFile)
+{
+    FILE* input = fopen(inputFile.c_str(), "rb");
+    FILE* output = fopen(outputFile.c_str(), "w");
+
+    // 检查文件是否正常打开
+    if (input == nullptr)
+    {
+        Logging::LoggingError(inputFile + " is not a valid file name.");
+        exit(0);
+    }
+    if (output == nullptr)
+    {
+        Logging::LoggingError(outputFile + " is not a valid file name.");
+        exit(0);
+    }
+
+    // 读取元信息
+    MetaData metaData{};
+    fread(&metaData, sizeof(MetaData), 1, input);
+
+    // 读取哈夫曼节点数组
+    auto nodes = new HuffmanNode[metaData.HuffmanNodeLength];
+    fread(nodes, sizeof(HuffmanNode), metaData.HuffmanNodeLength, input);
+
+    // 读取文件的缓冲区
+    int buffer;
+    fread(&buffer, sizeof(int), 1, input);
+    int bufferPos;
+    int nextBuffer;
+
+    HuffmanNode node = nodes[metaData.HuffmanRoot];
+
+    while (true)
+    {
+        if (buffer == EOF)
+        {
+            // 读取结束
+            break;
+        }
+
+        // 这里为了处理最后一个缓冲区的问题
+        // 设置了双缓冲
+        size_t readResult = fread(&nextBuffer, sizeof(int), 1, input);
+
+        if (readResult != 1)
+        {
+            // 读取到文件末尾
+            nextBuffer = EOF;
+            bufferPos = metaData.LastBufferUsedLength;
+        }
+        else
+        {
+            bufferPos = 32;
+        }
+
+        while (bufferPos > 0)
+        {
+            if (node.data == -1)
+            {
+                // 非叶子节点
+                int value = (buffer >> 31) & 1;
+                buffer = buffer << 1;
+                bufferPos--;
+
+                if (value == 0)
+                {
+                    node = nodes[node.lIndex];
+                }
+                else
+                {
+                    node = nodes[node.rIndex];
+                }
+            }
+            else
+            {
+                // 叶子节点
+                fputc(node.data, output);
+                node = nodes[metaData.HuffmanRoot];
+            }
+        }
+
+        buffer = nextBuffer;
+    }
+
+    delete[] nodes;
+    fclose(input);
+    fclose(output);
+}
+
+double FileIO::CalculateZipRate(const std::string &inputFileName, const std::string &outputFileName)
+{
+    struct stat originFileStat{};
+    struct stat zipFileStat{};
+
+    stat(inputFileName.c_str(), &originFileStat);
+    stat(outputFileName.c_str(), &zipFileStat);
+
+    auto originFileSize = (double )originFileStat.st_size;
+    auto zipFileSize = (double )zipFileStat.st_size;
+
+    return zipFileSize / originFileSize;
+}
+
+BinaryBuffer::BinaryBuffer(std::string &inputFileName)
+{
+    file = fopen(inputFileName.c_str(), "rb");
+
+    if (file == nullptr)
+    {
+        // 读取文件失败
+        Logging::LoggingError(inputFileName + " is not a valid file name.");
+        exit(0);
+    }
+
+    buffer = 0;
+    bufferPos = 0;
+    readFinishedFlag = false;
+
+    // 读取文件开头的元信息和哈夫曼数组
+    MetaData metaData{};
+    fread(&metaData, sizeof(MetaData), 1, file);
+    position = position + (int )sizeof(MetaData) * 8;
+
+    // 读取哈夫曼节点数组
+    HuffmanNode nodes[metaData.HuffmanNodeLength];
+    fread(nodes, sizeof(HuffmanNode), metaData.HuffmanNodeLength, file);
+    position = position + (int )sizeof(HuffmanNode) * metaData.HuffmanNodeLength * 8;
+}
+
+BinaryBuffer::~BinaryBuffer()
+{
+    fclose(file);
+    file = nullptr;
+}
+
+char BinaryBuffer::read()
+{
+    if (readFinishedFlag)
+    {
+        return -1;
+    }
+
+    if (bufferPos == 0)
+    {
+        // 当前缓冲区读取结束
+        int result = (int )fread(&buffer, sizeof(int), 1, file);
+
+        if (result == 0)
+        {
+            readFinishedFlag = true;
+            // 文件读取结束
+            return -1;
+        }
+
+        bufferPos = 32;
+    }
+
+    int result = (buffer >> 31) & 1;
+    buffer = buffer << 1;
+    bufferPos--;
+    position++;
+    return (char )result;
+}
--- a/zip-unzip-search/src/huffman.cpp
+++ b/zip-unzip-search/src/huffman.cpp
@@ -0,0 +1,176 @@
+//
+// Created by ricardo on 22-12-11.
+//
+#include "huffman.h"
+#include "const.h"
+#include "cstdio"
+
+HuffmanCode::HuffmanCode(const int *frequencyArray)
+{
+    for (int i = 0; i < ASCII_LENGTH; i++)
+    {
+        HuffmanNode node{};
+        node.data = (char )i;
+        node.frequency = frequencyArray[i];
+        node.id = i;
+        node.lIndex = -1;
+        node.rIndex = -1;
+
+        nodes->push_back(node);
+    }
+}
+
+HuffmanCode::HuffmanCode(HuffmanNode *nodeArray, int length)
+{
+    delete nodes;
+
+    nodes = new std::vector<HuffmanNode>(nodeArray, nodeArray + length);
+}
+
+HuffmanCode::~HuffmanCode()
+{
+    delete nodes;
+}
+
+void HuffmanCode::sortForests(std::vector<HuffmanNode> &forests)
+{
+    std::size_t length = forests.size();
+    bool sorted = false;
+
+    for (std::size_t i = 1; i < length and !sorted; i++)
+    {
+        sorted = true;
+
+        for (std::size_t j = 0; j < length - i; j++)
+        {
+            if (forests[j].frequency > forests[j + 1].frequency)
+            {
+                HuffmanNode node = forests[j];
+                forests[j] = forests[j + 1];
+                forests[j + 1] = node;
+                sorted = false;
+            }
+        }
+    }
+}
+
+void HuffmanCode::createHuffmanTree()
+{
+    auto forests = new std::vector<HuffmanNode>(*nodes);
+    // 节点数组里的编号
+    int pos = (*nodes).rbegin()->id + 1;
+
+    while (forests->size() != 1)
+    {
+        // 反复执行建树的过程
+        sortForests(*forests);
+
+        HuffmanNode node{};
+        node.frequency = (*forests)[0].frequency + (*forests)[1].frequency;
+        node.data = -1;
+        // 权值大的节点为左子结点
+        // 权值小的节点为右子结点
+        node.rIndex = (*forests)[0].id;
+        node.lIndex = (*forests)[1].id;
+        node.id = pos;
+        pos++;
+        nodes->push_back(node);
+
+        // 在森里中删除已经合并的两棵树
+        // 新建一颗树
+        forests->erase(forests->begin(), forests->begin() + 2);
+        forests->push_back(node);
+    }
+
+    root = forests->begin()->id;
+    delete forests;
+}
+
+void HuffmanCode::printHuffmanTree()
+{
+    if (root == -1)
+    {
+        return;
+    }
+
+    printHuffmanTreeR(root);
+}
+
+void HuffmanCode::printHuffmanTreeR(int nodeId)
+{
+    HuffmanNode node = (*nodes)[nodeId];
+
+    // 不打印权值为0的节点
+    if (node.lIndex != -1 and node.frequency != 0)
+    {
+        printf("%d %d\n", node.id, node.lIndex);
+        printHuffmanTreeR(node.lIndex);
+    }
+
+    if (node.rIndex != -1 and node.frequency != 0)
+    {
+        printf("%d %d\n", node.id, node.rIndex);
+        printHuffmanTreeR(node.rIndex);
+    }
+}
+
+std::array<std::vector<char>, 128> * HuffmanCode::getHuffmanCode()
+{
+    if (root == -1)
+    {
+        return nullptr;
+    }
+
+    auto dictionary = new std::array<std::vector<char>, ASCII_LENGTH>();
+
+    std::vector<char> code;
+
+    getHuffmanCodeR(*dictionary, root, code);
+
+    return dictionary;
+}
+
+void HuffmanCode::getHuffmanCodeR(std::array<std::vector<char>, ASCII_LENGTH> &dictionary, int nodeId,
+                                  std::vector<char> &code)
+{
+    HuffmanNode node = (*nodes)[nodeId];
+
+    if (node.data != -1)
+    {
+        for (auto iterator = code.begin(); iterator < code.end(); iterator++)
+        {
+            dictionary[node.data].push_back(*iterator);
+        }
+    }
+
+    if (node.lIndex != -1)
+    {
+        // 遍历左子树
+        code.push_back(0);
+        getHuffmanCodeR(dictionary, node.lIndex, code);
+        code.pop_back();
+    }
+
+    if (node.rIndex != -1)
+    {
+        // 遍历右子树
+        code.push_back(1);
+        getHuffmanCodeR(dictionary, node.rIndex, code);
+        code.pop_back();
+    }
+}
+
+void HuffmanCode::printHuffmanCode(const std::array<std::vector<char>, ASCII_LENGTH>& dictionary)
+{
+    for (int i = 0; i < ASCII_LENGTH; i++)
+    {
+        auto code = dictionary[i];
+
+        printf("%d: ", i);
+        for (auto iter = code.begin(); iter < code.end(); iter++)
+        {
+            putc(*iter + 48, stdout);
+        }
+        putc('\n', stdout);
+    }
+}
--- a/zip-unzip-search/src/logging.cpp
+++ b/zip-unzip-search/src/logging.cpp
@@ -0,0 +1,20 @@
+//
+// Created by ricardo on 22-12-11.
+//
+#include "logging.h"
+#include "cstdio"
+
+void Logging::LoggingInfo(const std::string &info)
+{
+    printf("[Info] %s\n", info.c_str());
+}
+
+void Logging::LoggingWarning(const std::string &warning)
+{
+    printf("[warning] %s\n", warning.c_str());
+}
+
+void Logging::LoggingError(const std::string &error)
+{
+    printf("[error] %s\n", error.c_str());
+}
--- a/zip-unzip-search/src/search.cpp
+++ b/zip-unzip-search/src/search.cpp
@@ -0,0 +1,194 @@
+//
+// Created by ricardo on 22-12-16.
+//
+#include "search.h"
+#include "cstdio"
+#include "cstring"
+#include "logging.h"
+#include "file_io.h"
+#include "huffman.h"
+
+BMSearch::BMSearch(std::vector<char> &sample)
+{
+    int length = (int )sample.size();
+
+    badCharArray = new int[2];
+    goodSuffixArray = new int[length];
+    this->sample = new std::vector<char>(sample);
+
+    generateBrokenCharArray(sample);
+    generateGoodSuffixArray(sample);
+}
+
+
+BMSearch::~BMSearch()
+{
+    delete badCharArray;
+    delete goodSuffixArray;
+}
+
+void BMSearch::generateBrokenCharArray(std::vector<char> &s)
+{
+    int length = (int )s.size();
+
+    // 输入字符串为01串
+    for (int i = 0; i < 2; i++)
+    {
+        badCharArray[i] = length;
+    }
+
+    for (int i = 0; i < length - 1; i++)
+    {
+        badCharArray[s[i]] = length - i - 1;
+    }
+}
+
+void BMSearch::generateGoodSuffixArray(std::vector<char> &s)
+{
+    int length = (int )s.size();
+
+    int suffix[length];
+
+    suffix[length - 1] = length;
+
+    for (int i = length - 2; i >= 0; i--)
+    {
+        int pos = i;
+        while (pos >= 0 and s[pos] == s[length - 1 - i + pos])
+        {
+            pos--;
+        }
+        suffix[i] = i - pos;
+    }
+
+    for (int i = 0; i < length; i++)
+    {
+        goodSuffixArray[i] = length;
+    }
+
+    int j = 0;
+    for (int i = length - 1; i >= 0 ; i--)
+    {
+        if (suffix[i] == i + 1)
+        {
+            for (; j < length - 1 - i; j++)
+            {
+                if (goodSuffixArray[j] == length)
+                {
+                    goodSuffixArray[j] = length - 1 - i;
+                }
+            }
+        }
+    }
+
+    for (int i = 0; i < length - 1; i++)
+    {
+        goodSuffixArray[length - 1 - suffix[i]] = length - 1 - i;
+    }
+}
+
+void BMSearch::matchFile(std::string &fileName)
+{
+    auto buffer = new BinaryBuffer(fileName);
+
+    std::vector<char> inputArray;
+
+    while (true)
+    {
+        if (inputArray.size() != sample->size())
+        {
+            // bm算法要求后缀匹配
+            // 所以开始之间需要读取一个长度和模式字符串长度相同的缓冲区
+            char temp = buffer->read();
+            if (temp == -1)
+            {
+                break;
+            }
+            else
+            {
+                inputArray.push_back(temp);
+                continue;
+            }
+        }
+        else
+        {
+            // 正式开始匹配
+            int pos = (int )sample->size() - 1;
+            for(; pos >= 0 and (*sample)[pos] == inputArray[pos]; pos--);
+            if (pos < 0)
+            {
+                // 完成一次匹配
+                Logging::LoggingInfo("Found at " + std::to_string(buffer->position));
+                auto begin = inputArray.begin();
+                auto end = begin + goodSuffixArray[0];
+                inputArray.erase(begin, end);
+            }
+            else
+            {
+                // 匹配失败
+                auto begin = inputArray.begin();
+                int teleport = max(goodSuffixArray[pos],
+                                   badCharArray[inputArray[pos]] - (int )sample->size() + 1 + pos);
+                auto end = begin + teleport;
+                inputArray.erase(begin, end);
+            }
+        }
+    }
+    delete buffer;
+}
+
+int BMSearch::max(int a, int b)
+{
+    return a >= b ? a : b;
+}
+
+void SearchInFile(char* fileName, char* sample)
+{
+    FILE* file = fopen(fileName, "rb");
+
+    if (file == nullptr)
+    {
+        Logging::LoggingError(std::string(fileName) + " is not a valid file name.");
+        exit(0);
+    }
+
+    // 读取元信息
+    MetaData metaData{};
+    fread(&metaData, sizeof(MetaData), 1, file);
+
+    // 读取哈夫曼节点数组
+    auto nodes = new HuffmanNode[metaData.HuffmanNodeLength];
+    fread(nodes, sizeof(HuffmanNode), metaData.HuffmanNodeLength, file);
+    fclose(file);
+    // 从哈夫曼节点数组创建哈夫曼编码
+    auto huffmanCode = new HuffmanCode(nodes, metaData.HuffmanNodeLength);
+    huffmanCode->root = metaData.HuffmanRoot;
+    auto dictionary = huffmanCode->getHuffmanCode();
+
+    // 获得模板字符串的哈夫曼编码
+    std::vector<char> sampleCode;
+    int sampleLength = (int )strlen(sample);
+
+    Logging::LoggingInfo("The binary representation of " + std::string(sample) + " is ");
+    for (int i = 0; i < sampleLength; i++)
+    {
+        auto code = (*dictionary)[sample[i]];
+        for (auto iter = code.begin(); iter < code.end(); iter++)
+        {
+            sampleCode.push_back(*iter);
+            putc(*iter + 48, stdout);
+        }
+    }
+    putc('\n', stdout);
+
+    // 开始查找
+    auto bm = new BMSearch(sampleCode);
+
+    std::string str = std::string(fileName);
+    bm->matchFile(str);
+
+    delete bm;
+    delete[] nodes;
+    delete huffmanCode;
+    delete dictionary;
+}