add: data-structure-lab & compiler-lab

2024-10-30 17:23:52 +08:00
commit eb8c4fa451
35 changed files with 4266 additions and 0 deletions
--- a/LexicalParser/LexicalParser.cpp
+++ b/LexicalParser/LexicalParser.cpp
@@ -0,0 +1,751 @@
+#include <cstdio>
+#include <list>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+#include <string>
+
+class LexicalParser
+{
+public:
+    int LineCount = 0;
+    int KeywordCount = 0;
+    int IdentifierCount = 0;
+    int OperatorCount = 0;
+    int DelimiterCount = 0;
+    int CharCount = 0;
+    int StringCount = 0;
+    int NumberCount = 0;
+    int ErrorCount = 0;
+
+    explicit LexicalParser(FILE *file)
+    {
+        this->File = file;
+    }
+
+    void Loop()
+    {
+        bool mark = false;
+
+        while (Readline())
+        {
+            while (!Buffer.empty())
+            {
+                if (mark)
+                {
+                    // 多行注释
+                    for (auto i = Buffer.begin(); i != Buffer.end(); i++)
+                    {
+                        if (*i == '*')
+                        {
+                            i++;
+                            if (i != Buffer.end() and *i == '/')
+                            {
+                                mark = false;
+
+                                i++;
+                                if (i == Buffer.end())
+                                {
+                                    Buffer.clear();
+                                }
+                                else
+                                {
+                                    Buffer.erase(Buffer.begin(), i);
+                                }
+                                break;
+                            }
+                        }
+                    }
+
+                    if (mark)
+                    {
+                        // 说明在这行没有找到结束符
+                        Buffer.clear();
+                        continue;
+                    }
+
+                    // 这行读取完成了
+                    if (Buffer.empty())
+                    {
+                        continue;
+                    }
+                }
+
+
+                auto pos = Buffer.begin();
+                if (*pos == '/')
+                {
+                    pos++;
+                    if (pos != Buffer.end())
+                    {
+                        // 判断单行注释
+                        if (*pos == '/')
+                        {
+                            Buffer.clear();
+                            continue;
+                        }
+                        else if (*pos == '*')
+                        {
+                            mark = true;
+                            pos++;
+                            Buffer.erase(Buffer.begin(), pos);
+                            continue;
+                        }
+                    }
+                }
+
+                if (*Buffer.begin() == ' ' or *Buffer.begin() == '\t')
+                {
+                    Buffer.pop_front();
+                    continue;
+                }
+
+                // 处理特殊错误 @
+                if (*Buffer.begin() == '@')
+                {
+                    Buffer.pop_front();
+
+                    ErrorCount++;
+                    printf("%d <ERROR,@>\n", LineCount);
+                    continue;
+                }
+
+                if (!Parse())
+                {
+                    return;
+                }
+            }
+        }
+    }
+
+
+private:
+    std::list<char> Buffer;
+    FILE *File;
+
+    bool Parse()
+    {
+        if (ParseCharacter() or ParseString())
+        {
+            return true;
+        }
+
+        if (ParseNumber())
+        {
+            return true;
+        }
+
+        if (ParseOperator() or ParseDelimiter())
+        {
+            return true;
+        }
+
+        if (ParseKeyword())
+        {
+            return true;
+        }
+
+        return ParseIdentifier();
+    }
+
+    bool Readline()
+    {
+        // 标记是否是最后一行
+        bool read = false;
+        while (true)
+        {
+            int c = fgetc(File);
+
+            if (c == EOF)
+            {
+                if (read)
+                {
+                    LineCount++;
+                }
+                return read;
+            }
+            else if (c == '\n')
+            {
+                LineCount++;
+                return true;
+            }
+
+            Buffer.emplace_back((char) c);
+            read = true;
+        }
+    }
+
+    bool ParseKeyword()
+    {
+        auto begin = Buffer.begin();
+
+        if (KeywordsMap.count(*begin) != 0)
+        {
+            const auto &array = KeywordsMap.at(*begin);
+
+
+            for (const auto &i: array)
+            {
+                if (i.length() > Buffer.size())
+                {
+                    continue;
+                }
+
+                auto pos = Buffer.begin();
+                bool flag = true;
+                for (auto c: i)
+                {
+                    if (c != *pos)
+                    {
+                        flag = false;
+                        break;
+                    }
+
+                    pos++;
+                }
+
+                if (flag)
+                {
+                    // 同标识符吻合的字符串
+                    // 如果是标识符，应该是分隔符或者空格
+                    if (pos == Buffer.end() or *pos == ' ' or DelimitersSet.count(*pos) != 0
+                        or OperatorsMap.count(*pos) != 0)
+                    {
+                        KeywordCount++;
+
+                        std::string output;
+                        for (auto j = Buffer.begin(); j != pos; j++)
+                        {
+                            output += *j;
+                        }
+
+                        printf("%d <KEYWORD,%s>\n", LineCount, output.c_str());
+
+                        Buffer.erase(Buffer.begin(), pos);
+
+                        return true;
+                    }
+                }
+            }
+        }
+
+        return false;
+    }
+
+    bool ParseIdentifier()
+    {
+        auto pos = Buffer.begin();
+
+        if (*pos == '_' or (*pos >= 'A' and *pos <= 'Z') or (*pos >= 'a' and *pos <= 'z'))
+        {
+            while (*pos == '_' or (*pos >= 'A' and *pos <= 'Z') or (*pos >= 'a' and *pos <= 'z')
+                   or (*pos >= '0' and *pos <= '9'))
+            {
+                pos++;
+            }
+
+            if (pos == Buffer.end() or *pos == ' ' or DelimitersSet.count(*pos) != 0
+                or OperatorsMap.count(*pos) != 0)
+            {
+                IdentifierCount++;
+
+                std::string output;
+                for (auto i = Buffer.begin(); i != pos; i++)
+                {
+                    output += *i;
+                }
+
+                printf("%d <IDENTIFIER,%s>\n", LineCount, output.c_str());
+
+                Buffer.erase(Buffer.begin(), pos);
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    bool ParseDelimiter()
+    {
+        auto pos = Buffer.begin();
+
+        if (DelimitersSet.count(*pos) != 0)
+        {
+            DelimiterCount++;
+            printf("%d <DELIMITER,%c>\n", LineCount, *pos);
+
+            Buffer.pop_front();
+            return true;
+        }
+
+        return false;
+    }
+
+    bool ParseOperator()
+    {
+        auto begin = Buffer.begin();
+
+        if (OperatorsMap.count(*begin))
+        {
+            const auto &array = OperatorsMap.at(*begin);
+
+            for (const auto &s: array)
+            {
+                if (s.length() > Buffer.size())
+                {
+                    continue;
+                }
+
+                auto pos = Buffer.begin();
+                bool flag = true;
+
+                for (auto i: s)
+                {
+                    if (i != *pos)
+                    {
+                        flag = false;
+                        break;
+                    }
+
+                    pos++;
+                }
+
+                if (flag)
+                {
+                    OperatorCount++;
+                    // 感觉，，，，
+                    // 可以不用判断运算符的后面是什么
+                    std::string output;
+                    for (auto i = Buffer.begin(); i != pos; i++)
+                    {
+                        output += *i;
+                    }
+
+                    printf("%d <OPERATOR,%s>\n", LineCount, output.c_str());
+
+                    Buffer.erase(Buffer.begin(), pos);
+                    return true;
+                }
+            }
+        }
+
+        return false;
+    }
+
+    bool ParseCharacter()
+    {
+        std::string output;
+
+        auto first = Buffer.begin();
+        auto second = first;
+        second++;
+
+        if (*first == 'L' or *first == 'u' or *first == 'U')
+        {
+            if (*second == '\'')
+            {
+                output += *first;
+                Buffer.erase(first, second);
+            }
+        }
+
+        auto pos = Buffer.begin();
+
+        if (*pos == '\'')
+        {
+            pos++;
+            while (true)
+            {
+                //处理本行没有闭合的错误
+                if (pos == Buffer.end())
+                {
+                    for (auto c: Buffer)
+                    {
+                        output += c;
+                    }
+
+                    Buffer.clear();
+                    ErrorCount++;
+                    printf("%d <ERROR,%s>\n", LineCount, output.c_str());
+
+                    return true;
+                }
+
+                if (*pos == '\'')
+                {
+                    break;
+                }
+
+                if (*pos == '\\')
+                {
+                    pos++;
+                }
+
+                pos++;
+            }
+            pos++;
+
+            CharCount++;
+            for (auto i = Buffer.begin(); i != pos; i++)
+            {
+                output += *i;
+            }
+
+            printf("%d <CHARCON,%s>\n", LineCount, output.c_str());
+
+            Buffer.erase(Buffer.begin(), pos);
+            return true;
+        }
+
+        return false;
+    }
+
+    bool ParseString()
+    {
+        auto first = Buffer.begin();
+        auto second = first;
+        second++;
+
+        std::string output;
+
+        if (*first == 'u' or *first == 'U' or *first == 'L')
+        {
+            auto third = second;
+            third++;
+            if (*second == '\"')
+            {
+                output += *first;
+                Buffer.erase(first, second);
+            }
+            else if (*first == 'u' and *second == '8')
+            {
+                if (*third == '\"')
+                {
+                    output = "u8";
+                    Buffer.erase(first, third);
+                }
+            }
+        }
+
+        auto pos = Buffer.begin();
+
+        if (*pos == '"')
+        {
+            pos++;
+            while (true)
+            {
+                //处理本行没有闭合的错误
+                if (pos == Buffer.end())
+                {
+                    for (auto c: Buffer)
+                    {
+                        output += c;
+                    }
+
+                    Buffer.clear();
+                    ErrorCount++;
+                    printf("%d <ERROR,%s>\n", LineCount, output.c_str());
+
+                    return true;
+                }
+
+                if (*pos == '"')
+                {
+                    break;
+                }
+
+                if (*pos == '\\')
+                {
+                    pos++;
+                }
+
+                pos++;
+            }
+            pos++;
+
+            StringCount++;
+            for (auto i = Buffer.begin(); i != pos; i++)
+            {
+                output += *i;
+            }
+
+            printf("%d <STRING,%s>\n", LineCount, output.c_str());
+            Buffer.erase(Buffer.begin(), pos);
+            return true;
+        }
+
+        return false;
+    }
+
+    bool ParseNumber()
+    {
+        auto first = Buffer.begin();
+        auto second = first;
+        second++;
+        auto third = second;
+        third++;
+
+        if ((*first >= '0' and *first <= '9') or *first == '.')
+        {
+            if (*first == '0' and (*second == 'x' or *second == 'X'))
+            {
+                // 处理十六进制数据
+                ParseHexadecimalNumber();
+                return true;
+            }
+
+            auto pos = Buffer.begin();
+            if (*first == '.')
+            {
+                // 区分小数点和访问符
+                if (*second < '0' or *second > '9')
+                {
+                    return false;
+                }
+
+                pos++;
+            }
+
+            while (pos != Buffer.end() and *pos >= '0' and *pos <= '9' or *pos == '.')
+            {
+                pos++;
+            }
+
+            if (pos != Buffer.end() and (*pos == 'e' or *pos == 'E' or *pos == '.'))
+            {
+                pos++;
+
+                if (*pos == '+' or *pos == '-')
+                {
+                    pos++;
+                }
+
+                if (pos == Buffer.end() or *pos < '0' or *pos > '9')
+                {
+                    // 坏了
+                    while (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and
+                           OperatorsMap.count(*pos) == 0 and
+                           DelimitersSet.count(*pos) == 0)
+                    {
+                        pos++;
+                    }
+
+                    std::string output;
+                    for (auto i = Buffer.begin(); i != pos; i++)
+                    {
+                        output += *i;
+                    }
+
+                    printf("%d <ERROR,%s>\n", LineCount, output.c_str());
+                    ErrorCount++;
+                    Buffer.erase(Buffer.begin(), pos);
+                    return true;
+                }
+            }
+
+            while (pos != Buffer.end() and *pos >= '0' and *pos <= '9' or *pos == '.')
+            {
+                pos++;
+            }
+
+            std::unordered_set<char> suffixSet = {'u', 'l', 'U', 'L', 'f', 'F'};
+
+            if (pos != Buffer.end() and suffixSet.count(*pos) != 0)
+            {
+                while (pos != Buffer.end() and suffixSet.count(*pos) != 0)
+                {
+                    pos++;
+                }
+
+                if (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and
+                    OperatorsMap.count(*pos) == 0 and
+                    DelimitersSet.count(*pos) == 0)
+                {
+                    while (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and
+                           OperatorsMap.count(*pos) == 0 and
+                           DelimitersSet.count(*pos) == 0)
+                    {
+                        pos++;
+                    }
+
+                    std::string output;
+                    for (auto i = Buffer.begin(); i != pos; i++)
+                    {
+                        output += *i;
+                    }
+
+                    printf("%d <ERROR,%s>\n", LineCount, output.c_str());
+                    ErrorCount++;
+                    Buffer.erase(Buffer.begin(), pos);
+                    return true;
+                }
+
+                std::string output;
+                for (auto i = Buffer.begin(); i != pos; i++)
+                {
+                    output += *i;
+                }
+
+                printf("%d <NUMBER,%s>\n", LineCount, output.c_str());
+                NumberCount++;
+                Buffer.erase(Buffer.begin(), pos);
+                return true;
+            }
+            else if (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and
+                     OperatorsMap.count(*pos) == 0 and
+                     DelimitersSet.count(*pos) == 0)
+            {
+                while (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and
+                       OperatorsMap.count(*pos) == 0 and
+                       DelimitersSet.count(*pos) == 0)
+                {
+                    pos++;
+                }
+
+                std::string output;
+                for (auto i = Buffer.begin(); i != pos; i++)
+                {
+                    output += *i;
+                }
+
+                printf("%d <ERROR,%s>\n", LineCount, output.c_str());
+                ErrorCount++;
+                Buffer.erase(Buffer.begin(), pos);
+                return true;
+            }
+
+            std::string output;
+            for (auto i = Buffer.begin(); i != pos; i++)
+            {
+                output += *i;
+            }
+
+            printf("%d <NUMBER,%s>\n", LineCount, output.c_str());
+            NumberCount++;
+            Buffer.erase(Buffer.begin(), pos);
+            return true;
+        }
+
+        return false;
+    }
+
+    void ParseHexadecimalNumber()
+    {
+        auto pos = Buffer.begin();
+        pos++;
+        pos++;
+
+        while (true)
+        {
+            if ((*pos >= '0' and *pos <= '9') or
+                (*pos >= 'A' and *pos <= 'F') or
+                (*pos >= 'a' and *pos <= 'f'))
+            {
+                pos++;
+            }
+            else if (pos == Buffer.end() or *pos == ' ' or *pos == '\t' or
+                     OperatorsMap.count(*pos) != 0 or
+                     DelimitersSet.count(*pos) != 0)
+            {
+                break;
+            }
+            else
+            {
+                // 遇到错误
+                while (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and
+                       OperatorsMap.count(*pos) == 0 and
+                       DelimitersSet.count(*pos) == 0)
+                {
+                    pos++;
+                }
+
+                std::string output;
+                for (auto i = Buffer.begin(); i != pos; i++)
+                {
+                    output += *i;
+                }
+
+                printf("%d <ERROR,%s>\n", LineCount, output.c_str());
+                Buffer.erase(Buffer.begin(), pos);
+                ErrorCount++;
+                return;
+            }
+        }
+
+        std::string output;
+        for (auto i = Buffer.begin(); i != pos; i++)
+        {
+            output += *i;
+        }
+
+        printf("%d <NUMBER,%s>\n", LineCount, output.c_str());
+        NumberCount++;
+        Buffer.erase(Buffer.begin(), pos);
+    }
+
+    const std::unordered_map<char, std::vector<std::string>> KeywordsMap = {
+            {'a', {"auto"}},
+            {'b', {"break"}},
+            {'c', {"case",     "char",    "const",  "continue"}},
+            {'d', {"double",   "default", "do"}},
+            {'e', {"else",     "extern",  "enum"}},
+            {'f', {"float",    "for"}},
+            {'g', {"goto"}},
+            {'i', {"if",       "int"}},
+            {'l', {"long"}},
+            {'s', {"struct",   "static",  "switch", "short", "signed", "sizeof"}},
+            {'r', {"register", "return"}},
+            {'t', {"typedef",}},
+            {'u', {"union",    "unsigned"}},
+            {'v', {"void",     "volatile"}},
+            {'w', {"while"}}
+    };
+
+    const std::unordered_map<char, std::vector<std::string>> OperatorsMap = {
+            {'+', {"++",  "+=", "+"}},
+            {'-', {"--",  "-=", "->", "-"}},
+            {'*', {"*=",  "*"}},
+            {'/', {"/=",  "/"}},
+            {'%', {"%=",  "%"}},
+            {'=', {"==",  "="}},
+            {'!', {"!=",  "!"}},
+            {'>', {">>=", ">>", ">=", ">"}},
+            {'<', {"<<=", "<<", "<=", "<"}},
+            {'&', {"&&",  "&=", "&"}},
+            {'|', {"||",  "|=", "|"}},
+            {'^', {"^=",  "^"}},
+            {'.', {"."}},
+            {'~', {"~"}}
+    };
+
+    const std::unordered_set<char> DelimitersSet = {
+            ';', ',', ':', '?', '(', ')', '[', ']', '{', '}'
+    };
+};
+
+int main(int argc, char *argv[])
+{
+    FILE *source_file = fopen(argv[1], "r");
+
+    if (source_file == nullptr || argc != 2)
+    {
+        printf("Failed to open source File.\n");
+    }
+
+    LexicalParser parser(source_file);
+
+    parser.Loop();
+
+    printf("%d\n", parser.LineCount);
+    printf("%d %d %d %d %d %d %d\n", parser.KeywordCount,
+           parser.IdentifierCount,
+           parser.OperatorCount,
+           parser.DelimiterCount,
+           parser.CharCount,
+           parser.StringCount,
+           parser.NumberCount);
+    printf("%d", parser.ErrorCount);
+
+    fclose(source_file);
+    return 0;
+}
--- a/LexicalParser/pl_start.l
+++ b/LexicalParser/pl_start.l
@@ -0,0 +1,27 @@
+ /* 简单词法分析器 */
+ /* 功能：能够识别出以小写字母ab结尾的所有字符串（仅含大小写字母）并给打印'Hit!' */
+ /* 说明：在下面的begin和end之间添加代码，注意格式 */
+ /* 提示：你只需要保证合法的输入（以ab结尾的字符串）有结果，不合法的输入将会包含在.规则中～ */
+%{
+#include <stdio.h>
+%}
+
+%%
+ /* begin */
+[a-zA-Z]*ab    {printf("%s: Hit!\n", yytext);}
+ /* end */
+\n				{}
+.				{}
+%%
+int yywrap() { return 1; }
+int main(int argc, char **argv)
+{
+	if (argc > 1) {
+		if (!(yyin = fopen(argv[1], "r"))) {
+			perror(argv[1]);
+			return 1;
+		}
+	}
+	while (yylex());
+	return 0;
+}
--- a/LexicalParser/pl_test.l
+++ b/LexicalParser/pl_test.l
@@ -0,0 +1,117 @@
+ /* PL词法分析器 */
+ /* 功能：能够识别出PL支持的所有单词符号并给出种别值 */
+ /* 说明：在下面的begin和end之间添加代码，已经实现了标识符和整常量的识别，你需要完成剩下的部分，加油吧！ */
+ /* 提示：因为是顺序匹配，即从上至下依次匹配规则，所以需要合理安排顺序～ */
+%{
+#include <stdio.h>
+%}
+ /* begin */
+INTCON			[\-]?[1-9][0-9]*|0
+IDENT			[A-Za-z][A-Za-z0-9]*
+CHARCON         \'[^\']*\'
+OFSYM           of
+ARRAYSYM		array
+PROGRAMSYM 		program
+MODSYM			mod
+ANDSYM			and
+ORSYM			or
+NOTSYM			not
+BEGINSYM 		begin
+ENDSYM			end
+IFSYM			if
+THENSYM			then
+ELSESYM			else
+WHILESYM		while
+DOSYM			do
+CALLSYM 		call
+CONSTSYM		const
+TYPESYM			type
+VARSYM			var
+PROCSYM			procedure
+NEQ				\<\>
+LEQ				\<\=
+GEQ				\>\=
+BECOME			\:\=
+PLUS            \+
+MINUS           \-
+TIMES           \*
+DIVSYM          \/
+EQL				\=
+LSS				\<
+GTR				\>
+LBRACK			\[
+RBRACK			\]
+LPAREN			\(
+RPAREN			\)
+COMMA			\,
+SEMICOLON		\;
+PERIOD			\.
+COLON			\:
+ERROR			[\~\!\@\#\$\%\^\&\_\\]
+ 
+ /* end */
+
+%%
+ /* begin */
+{OFSYM}         {printf("%s: OFSYM\n", yytext);}
+{ARRAYSYM}		{printf("%s: ARRAYSYM\n", yytext);}
+{PROGRAMSYM} 	{printf("%s: PROGRAMSYM\n", yytext);}
+{MODSYM}		{printf("%s: MODSYM\n", yytext);}
+{ANDSYM}		{printf("%s: ANDSYM\n", yytext);}
+{ORSYM}			{printf("%s: ORSYM\n", yytext);}
+{NOTSYM}		{printf("%s: NOTSYM\n", yytext);}
+{BEGINSYM}  	{printf("%s: BEGINSYM\n", yytext);}
+{ENDSYM}		{printf("%s: ENDSYM\n", yytext);}
+{IFSYM}			{printf("%s: IFSYM\n", yytext);}
+{THENSYM}		{printf("%s: THENSYM\n", yytext);}
+{ELSESYM}		{printf("%s: ELSESYM\n", yytext);}
+{WHILESYM} 		{printf("%s: WHILESYM\n", yytext);}
+{DOSYM} 		{printf("%s: DOSYM\n", yytext);}
+{CALLSYM}		{printf("%s: CALLSYM\n", yytext);}
+{CONSTSYM}		{printf("%s: CONSTSYM\n", yytext);}
+{TYPESYM}		{printf("%s: TYPESYM\n", yytext);}
+{VARSYM}		{printf("%s: VARSYM\n", yytext);}
+{PROCSYM}		{printf("%s: PROCSYM\n", yytext);}
+
+{NEQ}			{printf("%s: NEQ\n", yytext);}
+{LEQ}			{printf("%s: LEQ\n", yytext);}
+{GEQ}			{printf("%s: GEQ\n", yytext);}
+{BECOME}		{printf("%s: BECOME\n", yytext);}
+
+{PLUS}          {printf("%s: PLUS\n", yytext);}
+{MINUS}         {printf("%s: MINUS\n", yytext);}
+{TIMES}         {printf("%s: TIMES\n", yytext);}
+{DIVSYM}        {printf("%s: DIVSYM\n", yytext);}
+{EQL}			{printf("%s: EQL\n", yytext);}
+{LSS}			{printf("%s: LSS\n", yytext);}
+{GTR}			{printf("%s: GTR\n", yytext);}
+{LBRACK}		{printf("%s: LBRACK\n", yytext);}
+{RBRACK}		{printf("%s: RBRACK\n", yytext);}
+{LPAREN}		{printf("%s: LPAREN\n", yytext);}
+{RPAREN}		{printf("%s: RPAREN\n", yytext);}
+{COMMA}			{printf("%s: COMMA\n", yytext);}
+{SEMICOLON}		{printf("%s: SEMICOLON\n", yytext);}
+{PERIOD}		{printf("%s: PERIOD\n", yytext);}
+{COLON}			{printf("%s: COLON\n", yytext);}
+
+{CHARCON}       {printf("%s: CHARCON\n", yytext);}
+{INTCON}		{printf("%s: INTCON\n", yytext);}
+{IDENT}			{printf("%s: IDENT\n", yytext);}
+{ERROR}			{printf("%s: ERROR\n", yytext);}
+ /* end */
+
+\n				{}
+.				{}
+%%
+int yywrap() { return 1; }
+int main(int argc, char **argv)
+{
+	if (argc > 1) {
+		if (!(yyin = fopen(argv[1], "r"))) {
+			perror(argv[1]);
+			return 1;
+		}
+	}
+	while (yylex());
+	return 0;
+}