RubbishBin/GrammarParser/LrParser.cpp

// ReSharper disable CppTooWideScopeInitStatement
// ReSharper disable CppUseStructuredBinding
#include <iostream>
#include <sstream>
#include <memory>
#include <unordered_map>
#include <string>
#include <unordered_set>
#include <algorithm>
#include <vector>
#include <list>

#define END L'$'

struct Expression
{
    char Left;
    char LookAhead;
    std::string Right;
    int Pos;

    Expression(const char left, const std::string& right, const char lookAhead)
    {
        Left = left;
        LookAhead = lookAhead;
        Right = std::string(right);
        Pos = 0;
    }

    std::string GetHashCode() const
    {
        std::stringstream hash;

        hash << Left << Right << Pos << LookAhead;

        return hash.str();
    }
};

struct ExpressionHash
{
    std::size_t operator() (const Expression & expression) const
    {
        return std::hash<std::string>()(expression.GetHashCode());
    }
};

struct ExpressionEqual
{
    bool operator() (const Expression& a, const Expression& b) const
    {
        return a.GetHashCode() == b.GetHashCode();
    }
};

struct State
{
    std::unordered_set<Expression, ExpressionHash, ExpressionEqual> Expressions;
    std::unordered_map<char, std::shared_ptr<State>> Transformers;

    explicit State(const std::unordered_set<Expression, ExpressionHash, ExpressionEqual>& expressions)
    {
        Expressions = expressions;
    }

    std::string GetHashCode() const
    {
        std::unordered_map<std::string, std::vector<char>> map;

        for (const auto &e : Expressions)
        {
            std::stringstream stream;
            stream << e.Left;

            for (size_t i =0 ; i < e.Pos; ++i)
            {
                stream << e.Right[i];
            }
            stream << '^';
            for (size_t i = e.Pos; i < e.Right.size(); ++i)
            {
                stream << e.Right[i];
            }

            const auto expression = stream.str();
            if (map.count(expression) == 0)
            {
                map[expression] = std::vector<char>();
                map[expression].emplace_back(e.LookAhead);
            }
            else
            {
                map[expression].emplace_back(e.LookAhead);
            }
        }

        std::vector<std::string> list;
        for (auto& pair: map)
        {
            std::string hash(pair.first);
            std::sort(pair.second.begin(), pair.second.end());

            for (const auto c : pair.second)
            {
                hash += c;
            }

            list.emplace_back(hash);
        }

        std::sort(list.begin(), list.end());

        std::string hash;

        for (const auto& s : list)
        {
            hash += s;
        }

        return hash;
    }
};

struct StateHash
{
    std::size_t operator() (const std::shared_ptr<State>& s) const
    {
        return std::hash<std::string>()(s->GetHashCode());
    }
};

struct StateEqual
{
    bool operator() (const std::shared_ptr<State>& a, const std::shared_ptr<State>& b) const
    {
        return a->GetHashCode() == b->GetHashCode();
    }
};

struct Grammar
{
    std::unordered_map<char, std::vector<std::string>> Generators;
    char Begin;
    std::shared_ptr<State> BeginState;

    std::unordered_set<char> Nonterminators;
    std::unordered_map<char, std::unordered_set<char>> FirstSet;
    std::unordered_set<std::shared_ptr<State>, StateHash, StateEqual> DFA;

    Grammar(const std::unordered_map<char, std::vector<std::string>>& map, const char begin)
    {
        Generators = map;
        Begin = begin;

        for (const auto& pair : Generators)
        {
            Nonterminators.emplace(pair.first);
        }

        // 构造FirstSet
        bool changed = true;
        while (changed)
        {
            changed = false;

            for (const auto& pair: Generators)
            {
                for (const auto&expression: pair.second)
                {
                    if (Nonterminators.count(expression.front()) != 0)
                    {
                        // 合并其他非终结符的FIRST集合
                        for (const auto&c: FirstSet[expression.front()])
                        {
                            if (FirstSet[pair.first].count(c) == 0)
                            {
                                FirstSet[pair.first].emplace(c);
                                changed = true;
                            }
                        }
                    }
                    else
                    {
                        if (FirstSet[pair.first].count(expression.front()) == 0)
                        {
                            FirstSet[pair.first].emplace(expression.front());
                            changed = true;
                        }
                    }
                }
            }
        }
    }

    ~Grammar()
    {
        DFA.clear();
    }

    /**
     * \brief 计算句子的First集合
     * \param expression 需要计算的句子
     * \return 句子的First集合
     */
    std::unordered_set<char> GetFirstSet(const std::string& expression) const
    {
        std::unordered_set<char> result;

        if (Nonterminators.count(expression.front()) != 0)
        {
            // 起手是非终结符
            for (const char c : FirstSet.at(expression.front()))
            {
                result.emplace(c);
            }
        }
        else
        {
            result.emplace(expression.front());
        }

        return result;
    }

    std::unordered_set<Expression, ExpressionHash, ExpressionEqual> ConstructClosure(const Expression& expression) const
    {
        std::unordered_set<Expression, ExpressionHash, ExpressionEqual> result;
        result.emplace(expression);

        bool changed = true;
        while (changed)
        {
            changed = false;

            for (const auto& e : result)
            {
                const char next = e.Right[e.Pos];

                if (Nonterminators.count(next) == 0)
                {
                    continue;
                }

                std::string ahead;

                for (size_t i = e.Pos + 1; i < e.Right.size(); ++i)
                {
                    ahead += e.Right[i];
                }
                ahead += e.LookAhead;

                std::unordered_set<char> lookAheadSet = GetFirstSet(ahead);

                for (const auto& i : Generators.at(next))
                {
                    for (const char lookAhead : lookAheadSet)
                    {
                        Expression newExpression(next, i, lookAhead);
                        if (result.count(newExpression) == 0)
                        {
                            result.emplace(newExpression);
                            changed = true;
                        }
                    }
                }
            }
        }

        return result;
    }

    void ConstructDFA()
    {
        const Expression begin = Expression(Begin, Generators.at(Begin).front(), END);
        int id = 0;

        BeginState = std::make_shared<State>(ConstructClosure(begin));
        DFA.emplace(BeginState);
        ++id;

        bool added = true;
        while (added)
        {
            added = false;

            for (const auto& state : DFA)
            {
                // 表示使用key 进行移进可以生成的新LR(1)句型
                std::unordered_map<char, std::vector<Expression>> nextExpressions;

                for (const auto& e : state->Expressions)
                {
                    Expression nextExpression = Expression(e);
                    if (nextExpression.Pos >= nextExpression.Right.size())
                    {
                        // 移进符号已经到达句型的末尾
                        continue;
                    }
                    nextExpression.Pos++;

                    if (nextExpressions.count(e.Right[e.Pos]) == 0)
                    {
                        std::vector<Expression> list;
                        list.emplace_back(nextExpression);
                        nextExpressions.emplace(e.Right[e.Pos], list);
                    }
                    else
                    {
                        nextExpressions.at(e.Right[e.Pos]).emplace_back(nextExpression);
                    }
                }

                for (const auto& pair : nextExpressions)
                {
                    // 针对每个构建项目集闭包
                    std::unordered_set<Expression, ExpressionHash, ExpressionEqual> closure;

                    for (const auto& i : pair.second)
                    {
                        for (const auto& e : ConstructClosure(i))
                        {
                            closure.emplace(e);
                        }
                    }

                    auto nextState = std::make_shared<State>(closure);

                    auto iter = DFA.find(nextState);
                    if (iter == DFA.end())
                    {
                        // 不存在这个项目集闭包
                        DFA.emplace(nextState);
                        state->Transformers.emplace(pair.first, nextState);

                        ++id;
                        added = true;
                    }
                    else
                    {
                        // 存在这个项目集闭包
                        state->Transformers.emplace(pair.first, *iter);
                    }
                }
            }
        }
    }

    void Analyse(const std::unordered_map<std::string, int>& numbers, const std::string& input) const
    {
        std::list<std::shared_ptr<State>> stateStack;
        stateStack.emplace_back(BeginState);

        std::string buffer(input);
        buffer += END;
        auto iter = buffer.begin();

        while (true)
        {
            const auto& top = stateStack.back();

            // 尝试进行移进
            bool acceptFlag = false;
            bool reduceFlag = false;
            for (const auto& expression : top->Expressions)
            {
                if (expression.Pos == expression.Right.size() and expression.LookAhead == *iter)
                {
                    if (expression.Left == Begin)
                    {
                        acceptFlag = true;
                        std::cout << "accept" << std::endl;
                    }
                    else
                    {
                        reduceFlag = true;
                        std::cout << numbers.at(expression.Left + expression.Right) << std::endl;

                        for (size_t i = 0; i < expression.Right.size(); ++i)
                        {
                            stateStack.pop_back();
                        }

                        stateStack.emplace_back(stateStack.back()->Transformers.at(expression.Left));
                    }
                }
            }

            if (acceptFlag)
            {
                // 接受
                break;
            }
            if (reduceFlag)
            {
                // reduce
                continue;
            }

            if (top->Transformers.count(*iter) != 0)
            {
                stateStack.emplace_back(top->Transformers.at(*iter));
                ++iter;

                std::cout << "shift" << std::endl;
                continue;
            }

            std::cout << "error" << std::endl;
            break;
        }
    }
};

int main()
{
    const std::unordered_map<char, std::vector<std::string>> map = {
        {'S', {"E"}},
        {'E', {"E+T", "E-T", "T"}},
        {'T', {"T*F", "T/F", "F"}},
        {'F', {"(E)", "n"}}
    };

    const std::unordered_map<std::string, int> numbers = {
        {"SE", 0},
        {"EE+T", 1},
        {"EE-T", 2},
        {"ET", 3},
        {"TT*F", 4},
        {"TT/F", 5},
        {"TF", 6},
        {"F(E)", 7},
        {"Fn", 8}
    };

    Grammar grammar(map, 'S');
    grammar.ConstructDFA();

    std::string input;
    std::cin >> input;

    grammar.Analyse(numbers, input);
    return 0;
}