add: data-structure-lab & compiler-lab

This commit is contained in:
2024-10-30 17:23:52 +08:00
commit eb8c4fa451
35 changed files with 4266 additions and 0 deletions

View File

@@ -0,0 +1,751 @@
#include <cstdio>
#include <list>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <string>
class LexicalParser
{
public:
int LineCount = 0;
int KeywordCount = 0;
int IdentifierCount = 0;
int OperatorCount = 0;
int DelimiterCount = 0;
int CharCount = 0;
int StringCount = 0;
int NumberCount = 0;
int ErrorCount = 0;
explicit LexicalParser(FILE *file)
{
this->File = file;
}
void Loop()
{
bool mark = false;
while (Readline())
{
while (!Buffer.empty())
{
if (mark)
{
// 多行注释
for (auto i = Buffer.begin(); i != Buffer.end(); i++)
{
if (*i == '*')
{
i++;
if (i != Buffer.end() and *i == '/')
{
mark = false;
i++;
if (i == Buffer.end())
{
Buffer.clear();
}
else
{
Buffer.erase(Buffer.begin(), i);
}
break;
}
}
}
if (mark)
{
// 说明在这行没有找到结束符
Buffer.clear();
continue;
}
// 这行读取完成了
if (Buffer.empty())
{
continue;
}
}
auto pos = Buffer.begin();
if (*pos == '/')
{
pos++;
if (pos != Buffer.end())
{
// 判断单行注释
if (*pos == '/')
{
Buffer.clear();
continue;
}
else if (*pos == '*')
{
mark = true;
pos++;
Buffer.erase(Buffer.begin(), pos);
continue;
}
}
}
if (*Buffer.begin() == ' ' or *Buffer.begin() == '\t')
{
Buffer.pop_front();
continue;
}
// 处理特殊错误 @
if (*Buffer.begin() == '@')
{
Buffer.pop_front();
ErrorCount++;
printf("%d <ERROR,@>\n", LineCount);
continue;
}
if (!Parse())
{
return;
}
}
}
}
private:
std::list<char> Buffer;
FILE *File;
bool Parse()
{
if (ParseCharacter() or ParseString())
{
return true;
}
if (ParseNumber())
{
return true;
}
if (ParseOperator() or ParseDelimiter())
{
return true;
}
if (ParseKeyword())
{
return true;
}
return ParseIdentifier();
}
bool Readline()
{
// 标记是否是最后一行
bool read = false;
while (true)
{
int c = fgetc(File);
if (c == EOF)
{
if (read)
{
LineCount++;
}
return read;
}
else if (c == '\n')
{
LineCount++;
return true;
}
Buffer.emplace_back((char) c);
read = true;
}
}
bool ParseKeyword()
{
auto begin = Buffer.begin();
if (KeywordsMap.count(*begin) != 0)
{
const auto &array = KeywordsMap.at(*begin);
for (const auto &i: array)
{
if (i.length() > Buffer.size())
{
continue;
}
auto pos = Buffer.begin();
bool flag = true;
for (auto c: i)
{
if (c != *pos)
{
flag = false;
break;
}
pos++;
}
if (flag)
{
// 同标识符吻合的字符串
// 如果是标识符,应该是分隔符或者空格
if (pos == Buffer.end() or *pos == ' ' or DelimitersSet.count(*pos) != 0
or OperatorsMap.count(*pos) != 0)
{
KeywordCount++;
std::string output;
for (auto j = Buffer.begin(); j != pos; j++)
{
output += *j;
}
printf("%d <KEYWORD,%s>\n", LineCount, output.c_str());
Buffer.erase(Buffer.begin(), pos);
return true;
}
}
}
}
return false;
}
bool ParseIdentifier()
{
auto pos = Buffer.begin();
if (*pos == '_' or (*pos >= 'A' and *pos <= 'Z') or (*pos >= 'a' and *pos <= 'z'))
{
while (*pos == '_' or (*pos >= 'A' and *pos <= 'Z') or (*pos >= 'a' and *pos <= 'z')
or (*pos >= '0' and *pos <= '9'))
{
pos++;
}
if (pos == Buffer.end() or *pos == ' ' or DelimitersSet.count(*pos) != 0
or OperatorsMap.count(*pos) != 0)
{
IdentifierCount++;
std::string output;
for (auto i = Buffer.begin(); i != pos; i++)
{
output += *i;
}
printf("%d <IDENTIFIER,%s>\n", LineCount, output.c_str());
Buffer.erase(Buffer.begin(), pos);
return true;
}
}
return false;
}
bool ParseDelimiter()
{
auto pos = Buffer.begin();
if (DelimitersSet.count(*pos) != 0)
{
DelimiterCount++;
printf("%d <DELIMITER,%c>\n", LineCount, *pos);
Buffer.pop_front();
return true;
}
return false;
}
bool ParseOperator()
{
auto begin = Buffer.begin();
if (OperatorsMap.count(*begin))
{
const auto &array = OperatorsMap.at(*begin);
for (const auto &s: array)
{
if (s.length() > Buffer.size())
{
continue;
}
auto pos = Buffer.begin();
bool flag = true;
for (auto i: s)
{
if (i != *pos)
{
flag = false;
break;
}
pos++;
}
if (flag)
{
OperatorCount++;
// 感觉,,,,
// 可以不用判断运算符的后面是什么
std::string output;
for (auto i = Buffer.begin(); i != pos; i++)
{
output += *i;
}
printf("%d <OPERATOR,%s>\n", LineCount, output.c_str());
Buffer.erase(Buffer.begin(), pos);
return true;
}
}
}
return false;
}
bool ParseCharacter()
{
std::string output;
auto first = Buffer.begin();
auto second = first;
second++;
if (*first == 'L' or *first == 'u' or *first == 'U')
{
if (*second == '\'')
{
output += *first;
Buffer.erase(first, second);
}
}
auto pos = Buffer.begin();
if (*pos == '\'')
{
pos++;
while (true)
{
//处理本行没有闭合的错误
if (pos == Buffer.end())
{
for (auto c: Buffer)
{
output += c;
}
Buffer.clear();
ErrorCount++;
printf("%d <ERROR,%s>\n", LineCount, output.c_str());
return true;
}
if (*pos == '\'')
{
break;
}
if (*pos == '\\')
{
pos++;
}
pos++;
}
pos++;
CharCount++;
for (auto i = Buffer.begin(); i != pos; i++)
{
output += *i;
}
printf("%d <CHARCON,%s>\n", LineCount, output.c_str());
Buffer.erase(Buffer.begin(), pos);
return true;
}
return false;
}
bool ParseString()
{
auto first = Buffer.begin();
auto second = first;
second++;
std::string output;
if (*first == 'u' or *first == 'U' or *first == 'L')
{
auto third = second;
third++;
if (*second == '\"')
{
output += *first;
Buffer.erase(first, second);
}
else if (*first == 'u' and *second == '8')
{
if (*third == '\"')
{
output = "u8";
Buffer.erase(first, third);
}
}
}
auto pos = Buffer.begin();
if (*pos == '"')
{
pos++;
while (true)
{
//处理本行没有闭合的错误
if (pos == Buffer.end())
{
for (auto c: Buffer)
{
output += c;
}
Buffer.clear();
ErrorCount++;
printf("%d <ERROR,%s>\n", LineCount, output.c_str());
return true;
}
if (*pos == '"')
{
break;
}
if (*pos == '\\')
{
pos++;
}
pos++;
}
pos++;
StringCount++;
for (auto i = Buffer.begin(); i != pos; i++)
{
output += *i;
}
printf("%d <STRING,%s>\n", LineCount, output.c_str());
Buffer.erase(Buffer.begin(), pos);
return true;
}
return false;
}
bool ParseNumber()
{
auto first = Buffer.begin();
auto second = first;
second++;
auto third = second;
third++;
if ((*first >= '0' and *first <= '9') or *first == '.')
{
if (*first == '0' and (*second == 'x' or *second == 'X'))
{
// 处理十六进制数据
ParseHexadecimalNumber();
return true;
}
auto pos = Buffer.begin();
if (*first == '.')
{
// 区分小数点和访问符
if (*second < '0' or *second > '9')
{
return false;
}
pos++;
}
while (pos != Buffer.end() and *pos >= '0' and *pos <= '9' or *pos == '.')
{
pos++;
}
if (pos != Buffer.end() and (*pos == 'e' or *pos == 'E' or *pos == '.'))
{
pos++;
if (*pos == '+' or *pos == '-')
{
pos++;
}
if (pos == Buffer.end() or *pos < '0' or *pos > '9')
{
// 坏了
while (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and
OperatorsMap.count(*pos) == 0 and
DelimitersSet.count(*pos) == 0)
{
pos++;
}
std::string output;
for (auto i = Buffer.begin(); i != pos; i++)
{
output += *i;
}
printf("%d <ERROR,%s>\n", LineCount, output.c_str());
ErrorCount++;
Buffer.erase(Buffer.begin(), pos);
return true;
}
}
while (pos != Buffer.end() and *pos >= '0' and *pos <= '9' or *pos == '.')
{
pos++;
}
std::unordered_set<char> suffixSet = {'u', 'l', 'U', 'L', 'f', 'F'};
if (pos != Buffer.end() and suffixSet.count(*pos) != 0)
{
while (pos != Buffer.end() and suffixSet.count(*pos) != 0)
{
pos++;
}
if (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and
OperatorsMap.count(*pos) == 0 and
DelimitersSet.count(*pos) == 0)
{
while (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and
OperatorsMap.count(*pos) == 0 and
DelimitersSet.count(*pos) == 0)
{
pos++;
}
std::string output;
for (auto i = Buffer.begin(); i != pos; i++)
{
output += *i;
}
printf("%d <ERROR,%s>\n", LineCount, output.c_str());
ErrorCount++;
Buffer.erase(Buffer.begin(), pos);
return true;
}
std::string output;
for (auto i = Buffer.begin(); i != pos; i++)
{
output += *i;
}
printf("%d <NUMBER,%s>\n", LineCount, output.c_str());
NumberCount++;
Buffer.erase(Buffer.begin(), pos);
return true;
}
else if (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and
OperatorsMap.count(*pos) == 0 and
DelimitersSet.count(*pos) == 0)
{
while (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and
OperatorsMap.count(*pos) == 0 and
DelimitersSet.count(*pos) == 0)
{
pos++;
}
std::string output;
for (auto i = Buffer.begin(); i != pos; i++)
{
output += *i;
}
printf("%d <ERROR,%s>\n", LineCount, output.c_str());
ErrorCount++;
Buffer.erase(Buffer.begin(), pos);
return true;
}
std::string output;
for (auto i = Buffer.begin(); i != pos; i++)
{
output += *i;
}
printf("%d <NUMBER,%s>\n", LineCount, output.c_str());
NumberCount++;
Buffer.erase(Buffer.begin(), pos);
return true;
}
return false;
}
void ParseHexadecimalNumber()
{
auto pos = Buffer.begin();
pos++;
pos++;
while (true)
{
if ((*pos >= '0' and *pos <= '9') or
(*pos >= 'A' and *pos <= 'F') or
(*pos >= 'a' and *pos <= 'f'))
{
pos++;
}
else if (pos == Buffer.end() or *pos == ' ' or *pos == '\t' or
OperatorsMap.count(*pos) != 0 or
DelimitersSet.count(*pos) != 0)
{
break;
}
else
{
// 遇到错误
while (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and
OperatorsMap.count(*pos) == 0 and
DelimitersSet.count(*pos) == 0)
{
pos++;
}
std::string output;
for (auto i = Buffer.begin(); i != pos; i++)
{
output += *i;
}
printf("%d <ERROR,%s>\n", LineCount, output.c_str());
Buffer.erase(Buffer.begin(), pos);
ErrorCount++;
return;
}
}
std::string output;
for (auto i = Buffer.begin(); i != pos; i++)
{
output += *i;
}
printf("%d <NUMBER,%s>\n", LineCount, output.c_str());
NumberCount++;
Buffer.erase(Buffer.begin(), pos);
}
const std::unordered_map<char, std::vector<std::string>> KeywordsMap = {
{'a', {"auto"}},
{'b', {"break"}},
{'c', {"case", "char", "const", "continue"}},
{'d', {"double", "default", "do"}},
{'e', {"else", "extern", "enum"}},
{'f', {"float", "for"}},
{'g', {"goto"}},
{'i', {"if", "int"}},
{'l', {"long"}},
{'s', {"struct", "static", "switch", "short", "signed", "sizeof"}},
{'r', {"register", "return"}},
{'t', {"typedef",}},
{'u', {"union", "unsigned"}},
{'v', {"void", "volatile"}},
{'w', {"while"}}
};
const std::unordered_map<char, std::vector<std::string>> OperatorsMap = {
{'+', {"++", "+=", "+"}},
{'-', {"--", "-=", "->", "-"}},
{'*', {"*=", "*"}},
{'/', {"/=", "/"}},
{'%', {"%=", "%"}},
{'=', {"==", "="}},
{'!', {"!=", "!"}},
{'>', {">>=", ">>", ">=", ">"}},
{'<', {"<<=", "<<", "<=", "<"}},
{'&', {"&&", "&=", "&"}},
{'|', {"||", "|=", "|"}},
{'^', {"^=", "^"}},
{'.', {"."}},
{'~', {"~"}}
};
const std::unordered_set<char> DelimitersSet = {
';', ',', ':', '?', '(', ')', '[', ']', '{', '}'
};
};
int main(int argc, char *argv[])
{
FILE *source_file = fopen(argv[1], "r");
if (source_file == nullptr || argc != 2)
{
printf("Failed to open source File.\n");
}
LexicalParser parser(source_file);
parser.Loop();
printf("%d\n", parser.LineCount);
printf("%d %d %d %d %d %d %d\n", parser.KeywordCount,
parser.IdentifierCount,
parser.OperatorCount,
parser.DelimiterCount,
parser.CharCount,
parser.StringCount,
parser.NumberCount);
printf("%d", parser.ErrorCount);
fclose(source_file);
return 0;
}

27
LexicalParser/pl_start.l Normal file
View File

@@ -0,0 +1,27 @@
/* 简单词法分析器 */
/* 功能能够识别出以小写字母ab结尾的所有字符串仅含大小写字母并给打印'Hit!' */
/* 说明在下面的begin和end之间添加代码注意格式 */
/* 提示你只需要保证合法的输入以ab结尾的字符串有结果不合法的输入将会包含在.规则中~ */
%{
#include <stdio.h>
%}
%%
/* begin */
[a-zA-Z]*ab {printf("%s: Hit!\n", yytext);}
/* end */
\n {}
. {}
%%
int yywrap() { return 1; }
int main(int argc, char **argv)
{
if (argc > 1) {
if (!(yyin = fopen(argv[1], "r"))) {
perror(argv[1]);
return 1;
}
}
while (yylex());
return 0;
}

117
LexicalParser/pl_test.l Normal file
View File

@@ -0,0 +1,117 @@
/* PL词法分析器 */
/* 功能能够识别出PL支持的所有单词符号并给出种别值 */
/* 说明在下面的begin和end之间添加代码已经实现了标识符和整常量的识别你需要完成剩下的部分加油吧 */
/* 提示:因为是顺序匹配,即从上至下依次匹配规则,所以需要合理安排顺序~ */
%{
#include <stdio.h>
%}
/* begin */
INTCON [\-]?[1-9][0-9]*|0
IDENT [A-Za-z][A-Za-z0-9]*
CHARCON \'[^\']*\'
OFSYM of
ARRAYSYM array
PROGRAMSYM program
MODSYM mod
ANDSYM and
ORSYM or
NOTSYM not
BEGINSYM begin
ENDSYM end
IFSYM if
THENSYM then
ELSESYM else
WHILESYM while
DOSYM do
CALLSYM call
CONSTSYM const
TYPESYM type
VARSYM var
PROCSYM procedure
NEQ \<\>
LEQ \<\=
GEQ \>\=
BECOME \:\=
PLUS \+
MINUS \-
TIMES \*
DIVSYM \/
EQL \=
LSS \<
GTR \>
LBRACK \[
RBRACK \]
LPAREN \(
RPAREN \)
COMMA \,
SEMICOLON \;
PERIOD \.
COLON \:
ERROR [\~\!\@\#\$\%\^\&\_\\]
/* end */
%%
/* begin */
{OFSYM} {printf("%s: OFSYM\n", yytext);}
{ARRAYSYM} {printf("%s: ARRAYSYM\n", yytext);}
{PROGRAMSYM} {printf("%s: PROGRAMSYM\n", yytext);}
{MODSYM} {printf("%s: MODSYM\n", yytext);}
{ANDSYM} {printf("%s: ANDSYM\n", yytext);}
{ORSYM} {printf("%s: ORSYM\n", yytext);}
{NOTSYM} {printf("%s: NOTSYM\n", yytext);}
{BEGINSYM} {printf("%s: BEGINSYM\n", yytext);}
{ENDSYM} {printf("%s: ENDSYM\n", yytext);}
{IFSYM} {printf("%s: IFSYM\n", yytext);}
{THENSYM} {printf("%s: THENSYM\n", yytext);}
{ELSESYM} {printf("%s: ELSESYM\n", yytext);}
{WHILESYM} {printf("%s: WHILESYM\n", yytext);}
{DOSYM} {printf("%s: DOSYM\n", yytext);}
{CALLSYM} {printf("%s: CALLSYM\n", yytext);}
{CONSTSYM} {printf("%s: CONSTSYM\n", yytext);}
{TYPESYM} {printf("%s: TYPESYM\n", yytext);}
{VARSYM} {printf("%s: VARSYM\n", yytext);}
{PROCSYM} {printf("%s: PROCSYM\n", yytext);}
{NEQ} {printf("%s: NEQ\n", yytext);}
{LEQ} {printf("%s: LEQ\n", yytext);}
{GEQ} {printf("%s: GEQ\n", yytext);}
{BECOME} {printf("%s: BECOME\n", yytext);}
{PLUS} {printf("%s: PLUS\n", yytext);}
{MINUS} {printf("%s: MINUS\n", yytext);}
{TIMES} {printf("%s: TIMES\n", yytext);}
{DIVSYM} {printf("%s: DIVSYM\n", yytext);}
{EQL} {printf("%s: EQL\n", yytext);}
{LSS} {printf("%s: LSS\n", yytext);}
{GTR} {printf("%s: GTR\n", yytext);}
{LBRACK} {printf("%s: LBRACK\n", yytext);}
{RBRACK} {printf("%s: RBRACK\n", yytext);}
{LPAREN} {printf("%s: LPAREN\n", yytext);}
{RPAREN} {printf("%s: RPAREN\n", yytext);}
{COMMA} {printf("%s: COMMA\n", yytext);}
{SEMICOLON} {printf("%s: SEMICOLON\n", yytext);}
{PERIOD} {printf("%s: PERIOD\n", yytext);}
{COLON} {printf("%s: COLON\n", yytext);}
{CHARCON} {printf("%s: CHARCON\n", yytext);}
{INTCON} {printf("%s: INTCON\n", yytext);}
{IDENT} {printf("%s: IDENT\n", yytext);}
{ERROR} {printf("%s: ERROR\n", yytext);}
/* end */
\n {}
. {}
%%
int yywrap() { return 1; }
int main(int argc, char **argv)
{
if (argc > 1) {
if (!(yyin = fopen(argv[1], "r"))) {
perror(argv[1]);
return 1;
}
}
while (yylex());
return 0;
}