RubbishBin/GrammarParser/LlParser.cpp

339 lines
10 KiB
C++
Raw Normal View History

2024-10-30 17:23:52 +08:00
#include <iostream>
#include <vector>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <list>
#define EMPTY L'ε'
#define END L'$'
struct Grammar
{
std::unordered_map<wchar_t, std::vector<std::wstring>> Generators;
std::unordered_set<wchar_t> Nonterminators;
wchar_t Begin;
std::unordered_map<wchar_t, std::unordered_set<wchar_t>> FirstSet;
std::unordered_map<wchar_t, std::unordered_set<wchar_t>> FollowSet;
std::unordered_map<wchar_t, std::unordered_map<wchar_t, std::wstring>> AnalysisTable;
Grammar(const std::unordered_map<wchar_t, std::vector<std::wstring>>&generators, const wchar_t begin)
{
Generators = generators;
Begin = begin;
for (const auto&[key, value]: generators)
{
Nonterminators.emplace(key);
FirstSet.emplace(key, std::unordered_set<wchar_t>());
FollowSet.emplace(key, std::unordered_set<wchar_t>());
AnalysisTable.emplace(key, std::unordered_map<wchar_t, std::wstring>());
}
// 求非终结符的FIRST集合
bool changed = true;
while (changed)
{
changed = false;
for (const auto&[key, value]: generators)
{
for (const auto&expression: value)
{
if (Nonterminators.count(expression.front()) != 0)
{
// 合并其他非终结符的FIRST集合
for (const auto&c: FirstSet[expression.front()])
{
if (c != EMPTY and FirstSet[key].count(c) == 0)
{
FirstSet[key].emplace(c);
changed = true;
}
}
}
else
{
if (FirstSet[key].count(expression.front()) == 0)
{
FirstSet[key].emplace(expression.front());
changed = true;
}
}
}
}
}
// 求非终结符的FOLLOW集合
changed = true;
FollowSet[begin].emplace(END);
while (changed)
{
changed = false;
for (const auto&[key, value]: generators)
{
for (const auto&expression: value)
{
for (auto i = expression.begin(); i != expression.end(); ++i)
{
if (Nonterminators.count(*i) != 0)
{
// 发现非终结符
auto next = i;
++next;
if (next == expression.end())
{
// 非终结符在表达式末尾
for (const auto&c: FollowSet[key])
{
if (FollowSet[*i].count(c) == 0)
{
FollowSet[*i].emplace(c);
changed = true;
}
}
continue;
}
std::wstring newExpression;
while (next != expression.end())
{
newExpression += *next;
++next;
}
const auto&firstSet = GetFirstSet(newExpression);
for (const auto&c: firstSet)
{
if (c == EMPTY)
{
for (const auto&j: FollowSet[key])
{
if (FollowSet[*i].count(j) == 0)
{
FollowSet[*i].emplace(j);
changed = true;
}
}
}
else
{
if (FollowSet[*i].count(c) == 0)
{
FollowSet[*i].emplace(c);
changed = true;
}
}
}
}
}
}
}
}
// 生成预测分析表
for (const auto&[key, array]: Generators)
{
for (const auto&expression: array)
{
const auto&firstSet = GetFirstSet(expression);
for (const auto&c: firstSet)
{
if (c == EMPTY)
{
for (const auto&i: FollowSet.at(key))
{
if (!AnalysisTable[key].emplace(i, expression).second)
{
std::cout << "Error, not LL(1) grammar!" << std::endl;
}
}
}
else
{
if (!AnalysisTable[key].emplace(c, expression).second)
{
std::cout << "Error, not LL(1) grammar!" << std::endl;
}
}
}
}
}
}
std::unordered_set<wchar_t> GetFirstSet(const std::wstring&expression) const
{
std::unordered_set<wchar_t> set;
if (Nonterminators.count(expression.front()) != 0)
{
// 起手是非终结符
for (const auto&c: FirstSet.at(expression.front()))
{
// 只有所有的符号都能推出空串 才添加
if (c == EMPTY)
{
bool flag = true;
for (const auto&i: expression)
{
if (Nonterminators.count(i) != 0)
{
flag = FirstSet.at(i).count(EMPTY) != 0;
}
else
{
flag = i == EMPTY;
}
if (!flag)
{
break;
}
}
if (flag)
{
set.emplace(EMPTY);
}
}
else
{
set.emplace(c);
}
}
}
else
{
set.emplace(expression.front());
}
return set;
}
void Analyse(const std::wstring& input, const std::unordered_map<std::wstring, int>& numbers)
{
std::list<wchar_t> states;
auto i = input.begin();
states.emplace_back(END);
states.emplace_back(Begin);
do
{
for (const auto c : states)
{
std::wcout << c;
}
std::wcout << L'\t';
for (auto j = i; j != input.end(); ++j)
{
std::wcout << *j;
}
std::wcout << L'\t';
if (auto top = states.back(); Nonterminators.count(top) != 0)
{
// 栈顶是非终结符
if (AnalysisTable[top].count(*i) == 0)
{
std::wcout << L"error" << std::endl;
break;
}
states.pop_back();
if (AnalysisTable[top][*i].front() != EMPTY)
{
// 如果是空串还是不要加了
for (auto j = AnalysisTable[top][*i].rbegin(); j != AnalysisTable[top][*i].rend(); ++j)
{
states.emplace_back(*j);
}
}
auto expression = top + AnalysisTable[top][*i];
std::wcout << numbers.at(expression);
}
else
{
if (top == *i)
{
if (top == END)
{
std::wcout << L"accept";
}
else
{
std::wcout << L"match";
}
states.pop_back();
++i;
}
else
{
std::wcout << L"error" << std::endl;
break;
}
}
std::wcout << std::endl;
}
while (!states.empty());
}
};
int main()
{
const std::unordered_map<wchar_t, std::vector<std::wstring>> map = {
{'E', {L"TA"}},
{'A', {L"+TA", L"-TA", L"ε"}},
{'T', {L"FB"}},
{'B', {L"ε", L"*FB", L"/FB"}},
{'F', {L"(E)", L"n"}}
};
// 通过另外的表输入产生式编号
const std::unordered_map<std::wstring, int> numbers = {
{L"ETA", 1},
{L"A+TA", 2},
{L"A-TA", 3},
{L"", 4},
{L"TFB", 5},
{L"B*FB", 6},
{L"B/FB", 7},
{L"", 8},
{L"F(E)", 9},
{L"Fn", 10}
};
/**
FIRST
E -> n (
A -> e - +
T -> ( n
B -> / * e
F -> n (
FOLLOW
E -> ) $
A -> $ )
T -> $ ) - +
B -> + - ) $
F -> + - ) $ / *
*/
Grammar grammar(map, 'E');
std::wstring input;
std::wcin >> input;
input += END;
grammar.Analyse(input, numbers);
return 0;
}