RubbishBin/GrammarParser/LrParser.cpp

444 lines
12 KiB
C++

// ReSharper disable CppTooWideScopeInitStatement
// ReSharper disable CppUseStructuredBinding
#include <iostream>
#include <sstream>
#include <memory>
#include <unordered_map>
#include <string>
#include <unordered_set>
#include <algorithm>
#include <vector>
#include <list>
#define END L'$'
struct Expression
{
char Left;
char LookAhead;
std::string Right;
int Pos;
Expression(const char left, const std::string& right, const char lookAhead)
{
Left = left;
LookAhead = lookAhead;
Right = std::string(right);
Pos = 0;
}
std::string GetHashCode() const
{
std::stringstream hash;
hash << Left << Right << Pos << LookAhead;
return hash.str();
}
};
struct ExpressionHash
{
std::size_t operator() (const Expression & expression) const
{
return std::hash<std::string>()(expression.GetHashCode());
}
};
struct ExpressionEqual
{
bool operator() (const Expression& a, const Expression& b) const
{
return a.GetHashCode() == b.GetHashCode();
}
};
struct State
{
std::unordered_set<Expression, ExpressionHash, ExpressionEqual> Expressions;
std::unordered_map<char, std::shared_ptr<State>> Transformers;
explicit State(const std::unordered_set<Expression, ExpressionHash, ExpressionEqual>& expressions)
{
Expressions = expressions;
}
std::string GetHashCode() const
{
std::unordered_map<std::string, std::vector<char>> map;
for (const auto &e : Expressions)
{
std::stringstream stream;
stream << e.Left;
for (size_t i =0 ; i < e.Pos; ++i)
{
stream << e.Right[i];
}
stream << '^';
for (size_t i = e.Pos; i < e.Right.size(); ++i)
{
stream << e.Right[i];
}
const auto expression = stream.str();
if (map.count(expression) == 0)
{
map[expression] = std::vector<char>();
map[expression].emplace_back(e.LookAhead);
}
else
{
map[expression].emplace_back(e.LookAhead);
}
}
std::vector<std::string> list;
for (auto& pair: map)
{
std::string hash(pair.first);
std::sort(pair.second.begin(), pair.second.end());
for (const auto c : pair.second)
{
hash += c;
}
list.emplace_back(hash);
}
std::sort(list.begin(), list.end());
std::string hash;
for (const auto& s : list)
{
hash += s;
}
return hash;
}
};
struct StateHash
{
std::size_t operator() (const std::shared_ptr<State>& s) const
{
return std::hash<std::string>()(s->GetHashCode());
}
};
struct StateEqual
{
bool operator() (const std::shared_ptr<State>& a, const std::shared_ptr<State>& b) const
{
return a->GetHashCode() == b->GetHashCode();
}
};
struct Grammar
{
std::unordered_map<char, std::vector<std::string>> Generators;
char Begin;
std::shared_ptr<State> BeginState;
std::unordered_set<char> Nonterminators;
std::unordered_map<char, std::unordered_set<char>> FirstSet;
std::unordered_set<std::shared_ptr<State>, StateHash, StateEqual> DFA;
Grammar(const std::unordered_map<char, std::vector<std::string>>& map, const char begin)
{
Generators = map;
Begin = begin;
for (const auto& pair : Generators)
{
Nonterminators.emplace(pair.first);
}
// 构造FirstSet
bool changed = true;
while (changed)
{
changed = false;
for (const auto& pair: Generators)
{
for (const auto&expression: pair.second)
{
if (Nonterminators.count(expression.front()) != 0)
{
// 合并其他非终结符的FIRST集合
for (const auto&c: FirstSet[expression.front()])
{
if (FirstSet[pair.first].count(c) == 0)
{
FirstSet[pair.first].emplace(c);
changed = true;
}
}
}
else
{
if (FirstSet[pair.first].count(expression.front()) == 0)
{
FirstSet[pair.first].emplace(expression.front());
changed = true;
}
}
}
}
}
}
~Grammar()
{
DFA.clear();
}
/**
* \brief 计算句子的First集合
* \param expression 需要计算的句子
* \return 句子的First集合
*/
std::unordered_set<char> GetFirstSet(const std::string& expression) const
{
std::unordered_set<char> result;
if (Nonterminators.count(expression.front()) != 0)
{
// 起手是非终结符
for (const char c : FirstSet.at(expression.front()))
{
result.emplace(c);
}
}
else
{
result.emplace(expression.front());
}
return result;
}
std::unordered_set<Expression, ExpressionHash, ExpressionEqual> ConstructClosure(const Expression& expression) const
{
std::unordered_set<Expression, ExpressionHash, ExpressionEqual> result;
result.emplace(expression);
bool changed = true;
while (changed)
{
changed = false;
for (const auto& e : result)
{
const char next = e.Right[e.Pos];
if (Nonterminators.count(next) == 0)
{
continue;
}
std::string ahead;
for (size_t i = e.Pos + 1; i < e.Right.size(); ++i)
{
ahead += e.Right[i];
}
ahead += e.LookAhead;
std::unordered_set<char> lookAheadSet = GetFirstSet(ahead);
for (const auto& i : Generators.at(next))
{
for (const char lookAhead : lookAheadSet)
{
Expression newExpression(next, i, lookAhead);
if (result.count(newExpression) == 0)
{
result.emplace(newExpression);
changed = true;
}
}
}
}
}
return result;
}
void ConstructDFA()
{
const Expression begin = Expression(Begin, Generators.at(Begin).front(), END);
int id = 0;
BeginState = std::make_shared<State>(ConstructClosure(begin));
DFA.emplace(BeginState);
++id;
bool added = true;
while (added)
{
added = false;
for (const auto& state : DFA)
{
// 表示使用key 进行移进可以生成的新LR(1)句型
std::unordered_map<char, std::vector<Expression>> nextExpressions;
for (const auto& e : state->Expressions)
{
Expression nextExpression = Expression(e);
if (nextExpression.Pos >= nextExpression.Right.size())
{
// 移进符号已经到达句型的末尾
continue;
}
nextExpression.Pos++;
if (nextExpressions.count(e.Right[e.Pos]) == 0)
{
std::vector<Expression> list;
list.emplace_back(nextExpression);
nextExpressions.emplace(e.Right[e.Pos], list);
}
else
{
nextExpressions.at(e.Right[e.Pos]).emplace_back(nextExpression);
}
}
for (const auto& pair : nextExpressions)
{
// 针对每个构建项目集闭包
std::unordered_set<Expression, ExpressionHash, ExpressionEqual> closure;
for (const auto& i : pair.second)
{
for (const auto& e : ConstructClosure(i))
{
closure.emplace(e);
}
}
auto nextState = std::make_shared<State>(closure);
auto iter = DFA.find(nextState);
if (iter == DFA.end())
{
// 不存在这个项目集闭包
DFA.emplace(nextState);
state->Transformers.emplace(pair.first, nextState);
++id;
added = true;
}
else
{
// 存在这个项目集闭包
state->Transformers.emplace(pair.first, *iter);
}
}
}
}
}
void Analyse(const std::unordered_map<std::string, int>& numbers, const std::string& input) const
{
std::list<std::shared_ptr<State>> stateStack;
stateStack.emplace_back(BeginState);
std::string buffer(input);
buffer += END;
auto iter = buffer.begin();
while (true)
{
const auto& top = stateStack.back();
// 尝试进行移进
bool acceptFlag = false;
bool reduceFlag = false;
for (const auto& expression : top->Expressions)
{
if (expression.Pos == expression.Right.size() and expression.LookAhead == *iter)
{
if (expression.Left == Begin)
{
acceptFlag = true;
std::cout << "accept" << std::endl;
}
else
{
reduceFlag = true;
std::cout << numbers.at(expression.Left + expression.Right) << std::endl;
for (size_t i = 0; i < expression.Right.size(); ++i)
{
stateStack.pop_back();
}
stateStack.emplace_back(stateStack.back()->Transformers.at(expression.Left));
}
}
}
if (acceptFlag)
{
// 接受
break;
}
if (reduceFlag)
{
// reduce
continue;
}
if (top->Transformers.count(*iter) != 0)
{
stateStack.emplace_back(top->Transformers.at(*iter));
++iter;
std::cout << "shift" << std::endl;
continue;
}
std::cout << "error" << std::endl;
break;
}
}
};
int main()
{
const std::unordered_map<char, std::vector<std::string>> map = {
{'S', {"E"}},
{'E', {"E+T", "E-T", "T"}},
{'T', {"T*F", "T/F", "F"}},
{'F', {"(E)", "n"}}
};
const std::unordered_map<std::string, int> numbers = {
{"SE", 0},
{"EE+T", 1},
{"EE-T", 2},
{"ET", 3},
{"TT*F", 4},
{"TT/F", 5},
{"TF", 6},
{"F(E)", 7},
{"Fn", 8}
};
Grammar grammar(map, 'S');
grammar.ConstructDFA();
std::string input;
std::cin >> input;
grammar.Analyse(numbers, input);
return 0;
}