RubbishBin/GrammarParser/LlParser.cpp

#include <iostream>
#include <vector>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <list>

#define EMPTY L'ε'
#define END L'$'

struct Grammar
{
    std::unordered_map<wchar_t, std::vector<std::wstring>> Generators;
    std::unordered_set<wchar_t> Nonterminators;
    wchar_t Begin;
    std::unordered_map<wchar_t, std::unordered_set<wchar_t>> FirstSet;
    std::unordered_map<wchar_t, std::unordered_set<wchar_t>> FollowSet;
    std::unordered_map<wchar_t, std::unordered_map<wchar_t, std::wstring>> AnalysisTable;

    Grammar(const std::unordered_map<wchar_t, std::vector<std::wstring>>&generators, const wchar_t begin)
    {
        Generators = generators;
        Begin = begin;

        for (const auto&[key, value]: generators)
        {
            Nonterminators.emplace(key);
            FirstSet.emplace(key, std::unordered_set<wchar_t>());
            FollowSet.emplace(key, std::unordered_set<wchar_t>());
            AnalysisTable.emplace(key, std::unordered_map<wchar_t, std::wstring>());
        }

        // 求非终结符的FIRST集合
        bool changed = true;
        while (changed)
        {
            changed = false;

            for (const auto&[key, value]: generators)
            {
                for (const auto&expression: value)
                {
                    if (Nonterminators.count(expression.front()) != 0)
                    {
                        // 合并其他非终结符的FIRST集合
                        for (const auto&c: FirstSet[expression.front()])
                        {
                            if (c != EMPTY and FirstSet[key].count(c) == 0)
                            {
                                FirstSet[key].emplace(c);
                                changed = true;
                            }
                        }
                    }
                    else
                    {
                        if (FirstSet[key].count(expression.front()) == 0)
                        {
                            FirstSet[key].emplace(expression.front());
                            changed = true;
                        }
                    }
                }
            }
        }

        // 求非终结符的FOLLOW集合
        changed = true;
        FollowSet[begin].emplace(END);
        while (changed)
        {
            changed = false;

            for (const auto&[key, value]: generators)
            {
                for (const auto&expression: value)
                {
                    for (auto i = expression.begin(); i != expression.end(); ++i)
                    {
                        if (Nonterminators.count(*i) != 0)
                        {
                            // 发现非终结符
                            auto next = i;
                            ++next;

                            if (next == expression.end())
                            {
                                // 非终结符在表达式末尾
                                for (const auto&c: FollowSet[key])
                                {
                                    if (FollowSet[*i].count(c) == 0)
                                    {
                                        FollowSet[*i].emplace(c);
                                        changed = true;
                                    }
                                }

                                continue;
                            }

                            std::wstring newExpression;
                            while (next != expression.end())
                            {
                                newExpression += *next;
                                ++next;
                            }

                            const auto&firstSet = GetFirstSet(newExpression);

                            for (const auto&c: firstSet)
                            {
                                if (c == EMPTY)
                                {
                                    for (const auto&j: FollowSet[key])
                                    {
                                        if (FollowSet[*i].count(j) == 0)
                                        {
                                            FollowSet[*i].emplace(j);
                                            changed = true;
                                        }
                                    }
                                }
                                else
                                {
                                    if (FollowSet[*i].count(c) == 0)
                                    {
                                        FollowSet[*i].emplace(c);
                                        changed = true;
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        // 生成预测分析表
        for (const auto&[key, array]: Generators)
        {
            for (const auto&expression: array)
            {
                const auto&firstSet = GetFirstSet(expression);

                for (const auto&c: firstSet)
                {
                    if (c == EMPTY)
                    {
                        for (const auto&i: FollowSet.at(key))
                        {
                            if (!AnalysisTable[key].emplace(i, expression).second)
                            {
                                std::cout << "Error, not LL(1) grammar!" << std::endl;
                            }
                        }
                    }
                    else
                    {
                        if (!AnalysisTable[key].emplace(c, expression).second)
                        {
                            std::cout << "Error, not LL(1) grammar!" << std::endl;
                        }
                    }
                }
            }
        }
    }

    std::unordered_set<wchar_t> GetFirstSet(const std::wstring&expression) const
    {
        std::unordered_set<wchar_t> set;

        if (Nonterminators.count(expression.front()) != 0)
        {
            // 起手是非终结符
            for (const auto&c: FirstSet.at(expression.front()))
            {
                // 只有所有的符号都能推出空串 才添加
                if (c == EMPTY)
                {
                    bool flag = true;
                    for (const auto&i: expression)
                    {
                        if (Nonterminators.count(i) != 0)
                        {
                            flag = FirstSet.at(i).count(EMPTY) != 0;
                        }
                        else
                        {
                            flag = i == EMPTY;
                        }

                        if (!flag)
                        {
                            break;
                        }
                    }

                    if (flag)
                    {
                        set.emplace(EMPTY);
                    }
                }
                else
                {
                    set.emplace(c);
                }
            }
        }
        else
        {
            set.emplace(expression.front());
        }

        return set;
    }

    void Analyse(const std::wstring& input, const std::unordered_map<std::wstring, int>& numbers)
    {
        std::list<wchar_t> states;
        auto i = input.begin();

        states.emplace_back(END);
        states.emplace_back(Begin);

        do
        {
            for (const auto c : states)
            {
                std::wcout << c;
            }
            std::wcout << L'\t';

            for (auto j = i; j != input.end(); ++j)
            {
                std::wcout << *j;
            }
            std::wcout << L'\t';

            if (auto top = states.back(); Nonterminators.count(top) != 0)
            {
                // 栈顶是非终结符
                if (AnalysisTable[top].count(*i) == 0)
                {
                    std::wcout << L"error" << std::endl;
                    break;
                }

                states.pop_back();
                if (AnalysisTable[top][*i].front() != EMPTY)
                {
                    // 如果是空串还是不要加了
                    for (auto j = AnalysisTable[top][*i].rbegin(); j != AnalysisTable[top][*i].rend(); ++j)
                    {
                        states.emplace_back(*j);
                    }
                }

                auto expression = top + AnalysisTable[top][*i];
                std::wcout << numbers.at(expression);
            }
            else
            {
                if (top == *i)
                {
                    if (top == END)
                    {
                        std::wcout << L"accept";
                    }
                    else
                    {
                        std::wcout << L"match";
                    }

                    states.pop_back();
                    ++i;
                }
                else
                {
                    std::wcout << L"error" << std::endl;
                    break;
                }
            }

            std::wcout << std::endl;
        }
        while (!states.empty());
    }
};

int main()
{
    const std::unordered_map<wchar_t, std::vector<std::wstring>> map = {
        {'E', {L"TA"}},
        {'A', {L"+TA", L"-TA", L"ε"}},
        {'T', {L"FB"}},
        {'B', {L"ε", L"*FB", L"/FB"}},
        {'F', {L"(E)", L"n"}}
    };

    // 通过另外的表输入产生式编号
    const std::unordered_map<std::wstring, int> numbers = {
        {L"ETA", 1},
        {L"A+TA", 2},
        {L"A-TA", 3},
        {L"Aε", 4},
        {L"TFB", 5},
        {L"B*FB", 6},
        {L"B/FB", 7},
        {L"Bε", 8},
        {L"F(E)", 9},
        {L"Fn", 10}
    };

    /**
    FIRST
    E -> n (
    A -> e - +
    T -> ( n
    B -> / * e
    F -> n (
    FOLLOW
    E -> ) $
    A -> $ )
    T -> $ ) - +
    B -> + - ) $
    F -> + - ) $ / *
     */
    Grammar grammar(map, 'E');

    std::wstring input;
    std::wcin >> input;

    input += END;

    grammar.Analyse(input, numbers);
    return 0;
}