#include #include #include #include #include #include class LexicalParser { public: int LineCount = 0; int KeywordCount = 0; int IdentifierCount = 0; int OperatorCount = 0; int DelimiterCount = 0; int CharCount = 0; int StringCount = 0; int NumberCount = 0; int ErrorCount = 0; explicit LexicalParser(FILE *file) { this->File = file; } void Loop() { bool mark = false; while (Readline()) { while (!Buffer.empty()) { if (mark) { // 多行注释 for (auto i = Buffer.begin(); i != Buffer.end(); i++) { if (*i == '*') { i++; if (i != Buffer.end() and *i == '/') { mark = false; i++; if (i == Buffer.end()) { Buffer.clear(); } else { Buffer.erase(Buffer.begin(), i); } break; } } } if (mark) { // 说明在这行没有找到结束符 Buffer.clear(); continue; } // 这行读取完成了 if (Buffer.empty()) { continue; } } auto pos = Buffer.begin(); if (*pos == '/') { pos++; if (pos != Buffer.end()) { // 判断单行注释 if (*pos == '/') { Buffer.clear(); continue; } else if (*pos == '*') { mark = true; pos++; Buffer.erase(Buffer.begin(), pos); continue; } } } if (*Buffer.begin() == ' ' or *Buffer.begin() == '\t') { Buffer.pop_front(); continue; } // 处理特殊错误 @ if (*Buffer.begin() == '@') { Buffer.pop_front(); ErrorCount++; printf("%d \n", LineCount); continue; } if (!Parse()) { return; } } } } private: std::list Buffer; FILE *File; bool Parse() { if (ParseCharacter() or ParseString()) { return true; } if (ParseNumber()) { return true; } if (ParseOperator() or ParseDelimiter()) { return true; } if (ParseKeyword()) { return true; } return ParseIdentifier(); } bool Readline() { // 标记是否是最后一行 bool read = false; while (true) { int c = fgetc(File); if (c == EOF) { if (read) { LineCount++; } return read; } else if (c == '\n') { LineCount++; return true; } Buffer.emplace_back((char) c); read = true; } } bool ParseKeyword() { auto begin = Buffer.begin(); if (KeywordsMap.count(*begin) != 0) { const auto &array = KeywordsMap.at(*begin); for (const auto &i: array) { if (i.length() > Buffer.size()) { continue; } auto pos = Buffer.begin(); bool flag = true; for (auto c: i) { if (c != *pos) { flag = false; break; } pos++; } if (flag) { // 同标识符吻合的字符串 // 如果是标识符,应该是分隔符或者空格 if (pos == Buffer.end() or *pos == ' ' or DelimitersSet.count(*pos) != 0 or OperatorsMap.count(*pos) != 0) { KeywordCount++; std::string output; for (auto j = Buffer.begin(); j != pos; j++) { output += *j; } printf("%d \n", LineCount, output.c_str()); Buffer.erase(Buffer.begin(), pos); return true; } } } } return false; } bool ParseIdentifier() { auto pos = Buffer.begin(); if (*pos == '_' or (*pos >= 'A' and *pos <= 'Z') or (*pos >= 'a' and *pos <= 'z')) { while (*pos == '_' or (*pos >= 'A' and *pos <= 'Z') or (*pos >= 'a' and *pos <= 'z') or (*pos >= '0' and *pos <= '9')) { pos++; } if (pos == Buffer.end() or *pos == ' ' or DelimitersSet.count(*pos) != 0 or OperatorsMap.count(*pos) != 0) { IdentifierCount++; std::string output; for (auto i = Buffer.begin(); i != pos; i++) { output += *i; } printf("%d \n", LineCount, output.c_str()); Buffer.erase(Buffer.begin(), pos); return true; } } return false; } bool ParseDelimiter() { auto pos = Buffer.begin(); if (DelimitersSet.count(*pos) != 0) { DelimiterCount++; printf("%d \n", LineCount, *pos); Buffer.pop_front(); return true; } return false; } bool ParseOperator() { auto begin = Buffer.begin(); if (OperatorsMap.count(*begin)) { const auto &array = OperatorsMap.at(*begin); for (const auto &s: array) { if (s.length() > Buffer.size()) { continue; } auto pos = Buffer.begin(); bool flag = true; for (auto i: s) { if (i != *pos) { flag = false; break; } pos++; } if (flag) { OperatorCount++; // 感觉,,,, // 可以不用判断运算符的后面是什么 std::string output; for (auto i = Buffer.begin(); i != pos; i++) { output += *i; } printf("%d \n", LineCount, output.c_str()); Buffer.erase(Buffer.begin(), pos); return true; } } } return false; } bool ParseCharacter() { std::string output; auto first = Buffer.begin(); auto second = first; second++; if (*first == 'L' or *first == 'u' or *first == 'U') { if (*second == '\'') { output += *first; Buffer.erase(first, second); } } auto pos = Buffer.begin(); if (*pos == '\'') { pos++; while (true) { //处理本行没有闭合的错误 if (pos == Buffer.end()) { for (auto c: Buffer) { output += c; } Buffer.clear(); ErrorCount++; printf("%d \n", LineCount, output.c_str()); return true; } if (*pos == '\'') { break; } if (*pos == '\\') { pos++; } pos++; } pos++; CharCount++; for (auto i = Buffer.begin(); i != pos; i++) { output += *i; } printf("%d \n", LineCount, output.c_str()); Buffer.erase(Buffer.begin(), pos); return true; } return false; } bool ParseString() { auto first = Buffer.begin(); auto second = first; second++; std::string output; if (*first == 'u' or *first == 'U' or *first == 'L') { auto third = second; third++; if (*second == '\"') { output += *first; Buffer.erase(first, second); } else if (*first == 'u' and *second == '8') { if (*third == '\"') { output = "u8"; Buffer.erase(first, third); } } } auto pos = Buffer.begin(); if (*pos == '"') { pos++; while (true) { //处理本行没有闭合的错误 if (pos == Buffer.end()) { for (auto c: Buffer) { output += c; } Buffer.clear(); ErrorCount++; printf("%d \n", LineCount, output.c_str()); return true; } if (*pos == '"') { break; } if (*pos == '\\') { pos++; } pos++; } pos++; StringCount++; for (auto i = Buffer.begin(); i != pos; i++) { output += *i; } printf("%d \n", LineCount, output.c_str()); Buffer.erase(Buffer.begin(), pos); return true; } return false; } bool ParseNumber() { auto first = Buffer.begin(); auto second = first; second++; auto third = second; third++; if ((*first >= '0' and *first <= '9') or *first == '.') { if (*first == '0' and (*second == 'x' or *second == 'X')) { // 处理十六进制数据 ParseHexadecimalNumber(); return true; } auto pos = Buffer.begin(); if (*first == '.') { // 区分小数点和访问符 if (*second < '0' or *second > '9') { return false; } pos++; } while (pos != Buffer.end() and *pos >= '0' and *pos <= '9' or *pos == '.') { pos++; } if (pos != Buffer.end() and (*pos == 'e' or *pos == 'E' or *pos == '.')) { pos++; if (*pos == '+' or *pos == '-') { pos++; } if (pos == Buffer.end() or *pos < '0' or *pos > '9') { // 坏了 while (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and OperatorsMap.count(*pos) == 0 and DelimitersSet.count(*pos) == 0) { pos++; } std::string output; for (auto i = Buffer.begin(); i != pos; i++) { output += *i; } printf("%d \n", LineCount, output.c_str()); ErrorCount++; Buffer.erase(Buffer.begin(), pos); return true; } } while (pos != Buffer.end() and *pos >= '0' and *pos <= '9' or *pos == '.') { pos++; } std::unordered_set suffixSet = {'u', 'l', 'U', 'L', 'f', 'F'}; if (pos != Buffer.end() and suffixSet.count(*pos) != 0) { while (pos != Buffer.end() and suffixSet.count(*pos) != 0) { pos++; } if (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and OperatorsMap.count(*pos) == 0 and DelimitersSet.count(*pos) == 0) { while (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and OperatorsMap.count(*pos) == 0 and DelimitersSet.count(*pos) == 0) { pos++; } std::string output; for (auto i = Buffer.begin(); i != pos; i++) { output += *i; } printf("%d \n", LineCount, output.c_str()); ErrorCount++; Buffer.erase(Buffer.begin(), pos); return true; } std::string output; for (auto i = Buffer.begin(); i != pos; i++) { output += *i; } printf("%d \n", LineCount, output.c_str()); NumberCount++; Buffer.erase(Buffer.begin(), pos); return true; } else if (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and OperatorsMap.count(*pos) == 0 and DelimitersSet.count(*pos) == 0) { while (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and OperatorsMap.count(*pos) == 0 and DelimitersSet.count(*pos) == 0) { pos++; } std::string output; for (auto i = Buffer.begin(); i != pos; i++) { output += *i; } printf("%d \n", LineCount, output.c_str()); ErrorCount++; Buffer.erase(Buffer.begin(), pos); return true; } std::string output; for (auto i = Buffer.begin(); i != pos; i++) { output += *i; } printf("%d \n", LineCount, output.c_str()); NumberCount++; Buffer.erase(Buffer.begin(), pos); return true; } return false; } void ParseHexadecimalNumber() { auto pos = Buffer.begin(); pos++; pos++; while (true) { if ((*pos >= '0' and *pos <= '9') or (*pos >= 'A' and *pos <= 'F') or (*pos >= 'a' and *pos <= 'f')) { pos++; } else if (pos == Buffer.end() or *pos == ' ' or *pos == '\t' or OperatorsMap.count(*pos) != 0 or DelimitersSet.count(*pos) != 0) { break; } else { // 遇到错误 while (pos != Buffer.end() and *pos != ' ' and *pos != '\t' and OperatorsMap.count(*pos) == 0 and DelimitersSet.count(*pos) == 0) { pos++; } std::string output; for (auto i = Buffer.begin(); i != pos; i++) { output += *i; } printf("%d \n", LineCount, output.c_str()); Buffer.erase(Buffer.begin(), pos); ErrorCount++; return; } } std::string output; for (auto i = Buffer.begin(); i != pos; i++) { output += *i; } printf("%d \n", LineCount, output.c_str()); NumberCount++; Buffer.erase(Buffer.begin(), pos); } const std::unordered_map> KeywordsMap = { {'a', {"auto"}}, {'b', {"break"}}, {'c', {"case", "char", "const", "continue"}}, {'d', {"double", "default", "do"}}, {'e', {"else", "extern", "enum"}}, {'f', {"float", "for"}}, {'g', {"goto"}}, {'i', {"if", "int"}}, {'l', {"long"}}, {'s', {"struct", "static", "switch", "short", "signed", "sizeof"}}, {'r', {"register", "return"}}, {'t', {"typedef",}}, {'u', {"union", "unsigned"}}, {'v', {"void", "volatile"}}, {'w', {"while"}} }; const std::unordered_map> OperatorsMap = { {'+', {"++", "+=", "+"}}, {'-', {"--", "-=", "->", "-"}}, {'*', {"*=", "*"}}, {'/', {"/=", "/"}}, {'%', {"%=", "%"}}, {'=', {"==", "="}}, {'!', {"!=", "!"}}, {'>', {">>=", ">>", ">=", ">"}}, {'<', {"<<=", "<<", "<=", "<"}}, {'&', {"&&", "&=", "&"}}, {'|', {"||", "|=", "|"}}, {'^', {"^=", "^"}}, {'.', {"."}}, {'~', {"~"}} }; const std::unordered_set DelimitersSet = { ';', ',', ':', '?', '(', ')', '[', ']', '{', '}' }; }; int main(int argc, char *argv[]) { FILE *source_file = fopen(argv[1], "r"); if (source_file == nullptr || argc != 2) { printf("Failed to open source File.\n"); } LexicalParser parser(source_file); parser.Loop(); printf("%d\n", parser.LineCount); printf("%d %d %d %d %d %d %d\n", parser.KeywordCount, parser.IdentifierCount, parser.OperatorCount, parser.DelimiterCount, parser.CharCount, parser.StringCount, parser.NumberCount); printf("%d", parser.ErrorCount); fclose(source_file); return 0; }