21/08/14 20:28
TheDarkJuster
L' altra sera ero felice che il mio lexer funzionasse bene, così ho deciso di modificare il lexer: vector<Token> è diventato vector<Token>*. La modifica ha avuto successo, così ho deciso di memorizzare nei token anche il numero di carattere nella riga corrispondente a quel token e qui sono cominciati i problemi: lancio l' esecuzione del programma e mi viene stampato a video numero commenti: 0 e numero commenti multiriga: 0 e non c'è nessun token nel vettore. Non riesco a capire cosa non va, aiuto.
Altra domanda: credete che dovrei usare shared_ptr invece che passarmi il puntatore? Che benefici avrei nel farlo?
P.S. Non ho capito perchè abbia funzionato durante varie prove, ma il problema è legato al fatto che nella classe ci sia vector<Token>* anzichè vector<Token>. A dire la verità mi scoccia parecchia il fatto che anche se ho ricompilato e provato più volte il problema sia sorto in ritardo e ancora di più avere nella classe lexer un vector<Token>. C'è una alternativa più "elegante"?
/* * Lexer.h * * Created on: 14/08/2014 * Author: Denis */ #ifndef __LEXER__ #define __LEXER__ #include "Token.h" #include <string.h> #include <vector> #include <memory> using namespace std; #define EMPTY_BUFFER -1 #define NULL_BUFFER -2 #define NO_ERRORS 0 enum LexReading { MultiLineComment, SingleLineComment, Number, String, Identifier, Source }; class Lexer { public: Lexer(char*, vector<Token>*); short Analyze(void); //lexer data char* buffer; unsigned long bufferLength; //lex result vector<Token>* Tokens; //statistics unsigned long MultiLineFoundComments; unsigned long SingleLineFoundComments; }; #endif /* __LEXER__ */
#include "Lexer.h" Lexer::Lexer(char* buffer, vector<Token>* tokens) { this->buffer = buffer; //save the buffer pointer this->bufferLength = strlen(buffer); //save the buffer length this->Tokens = tokens; //save the pointer //setup everything this->MultiLineFoundComments = 0L; this->SingleLineFoundComments = 0L; } short Lexer::Analyze(void) { //check if the buffer is null or empty and return the proper error if so if (this->buffer == (char*)NULL) return NULL_BUFFER; else if (this->bufferLength == 0) return EMPTY_BUFFER; //what is the lexer reading? LexReading currentlyReading; unsigned long currentBufferCharacter = 0L; //the character that the lexer will read unsigned long currentBufferRow = 1L; //the user will know the exact line of an error (or warning) if any unsigned long currentBufferColumn = 1L; //the user will know the exact line of an error (or warning) if any vector<char> temp; //a temporary vector to store the characters of identifiers, numbers, strings and chars while (currentBufferCharacter < this->bufferLength) { switch (currentlyReading) { case MultiLineComment: //if this is the end of the comment if ((this->buffer[currentBufferCharacter] == '*') && (this->buffer[currentBufferCharacter + 1] == '/')) { currentlyReading = Source; //the lexer is going to read source code again this->MultiLineFoundComments++; //update the number of multi line comments lexed currentBufferCharacter++; //the lexer won't read the / character the next step } else if (this->buffer[currentBufferCharacter] == '\n') { currentBufferRow++; //update the row number currentBufferColumn = 1L; //update the column number } //else do nothing, I don't care about comments break; case SingleLineComment: //if this is the end of the line if (this->buffer[currentBufferCharacter] == '\n') { currentlyReading = Source; //the lexer is going to read source code again this->SingleLineFoundComments++; //update the number of single line comments lexed currentBufferRow++; //update the row number currentBufferColumn = 1L; //update the column number } //else do nothing, I don't care about comments break; case Number: //if this is the end of the number if (((this->buffer[currentBufferCharacter] < 48) || (this->buffer[currentBufferCharacter] > 57)) && ((this->buffer[currentBufferCharacter] != '.') && (this->buffer[currentBufferCharacter] != 'D') && (this->buffer[currentBufferCharacter] != 'X') && (this->buffer[currentBufferCharacter] != 'B') && (this->buffer[currentBufferCharacter] != 'd') && (this->buffer[currentBufferCharacter] != 'x') && (this->buffer[currentBufferCharacter] != 'b') && (this->buffer[currentBufferCharacter] != 'a')&& (this->buffer[currentBufferCharacter] != 'A') && (this->buffer[currentBufferCharacter] != 'b') && (this->buffer[currentBufferCharacter] != 'C') && (this->buffer[currentBufferCharacter] != 'c') && (this->buffer[currentBufferCharacter] != 'd') && (this->buffer[currentBufferCharacter] != 'D') && (this->buffer[currentBufferCharacter] != 'e') && (this->buffer[currentBufferCharacter] != 'E') && (this->buffer[currentBufferCharacter] != 'f') && (this->buffer[currentBufferCharacter] != 'F'))) { //then save the number stored in temp size_t characters = temp.size(); char* numberStringFromTemp = new char[characters + 1]; numberStringFromTemp[characters] = (char)0x00; size_t currentChar; for (currentChar = 0L; currentChar < characters; currentChar++) numberStringFromTemp[currentChar] = temp[currentChar]; //create the token Token numberToken; numberToken.type = TNUMBER; numberToken.row = currentBufferRow; numberToken.column = currentBufferColumn; numberToken.data = numberStringFromTemp; this->Tokens->push_back(numberToken); //save the new token temp.clear(); //clear the temp buffer after having saved the number stored in currentlyReading = Source; //prepare the lexer for the next character currentBufferCharacter--; //the lexer have to re-read the current character } else { //else save the character to the temp temp.push_back(this->buffer[currentBufferCharacter]); //store the read character } break; case String: //if this is the end of a string if (this->buffer[currentBufferCharacter] == '"') { //then save the string stored in temp size_t characters = temp.size(); char* stringFromTemp = new char[characters + 1]; stringFromTemp[characters] = (char)0x00; size_t currentChar; for (currentChar = 0L; currentChar < characters; currentChar++) stringFromTemp[currentChar] = temp[currentChar]; //create the token Token stringToken; stringToken.type = TSTRING; stringToken.row = currentBufferRow; stringToken.column = currentBufferColumn; stringToken.data = stringFromTemp; this->Tokens->push_back(stringToken); //save the new token temp.clear(); //clear the temp buffer after having saved the string stored in currentlyReading = Source; //prepare the lexer for the next character } else if (this->buffer[currentBufferCharacter] == '\n') { temp.push_back('\'); //store the read character temp.push_back('n'); //as it should be stored currentBufferRow++; //update the row number currentBufferColumn = 1L; //update the column number /* FUCK YOU! STUPID USER! */ } else if (this->buffer[currentBufferCharacter] == '\t') { temp.push_back('\'); //store the read character temp.push_back('t'); //as it should be stored /* FUCK YOU! STUPID USER! */ } else if (this->buffer[currentBufferCharacter] == '\b') { temp.push_back('\'); //store the read character temp.push_back('b'); //as it should be stored /* FUCK YOU! STUPID USER! */ } else if (this->buffer[currentBufferCharacter] == '\f') { temp.push_back('\'); //store the read character temp.push_back('f'); //as it should be stored /* FUCK YOU! STUPID USER! */ } else if (this->buffer[currentBufferCharacter] == '\v') { temp.push_back('\'); //store the read character temp.push_back('v'); //as it should be stored /* FUCK YOU! STUPID USER! */ } else if (this->buffer[currentBufferCharacter] == '\r') { temp.push_back('\'); //store the read character temp.push_back('r'); //as it should be stored /* FUCK YOU! STUPID USER! */ } else if (this->buffer[currentBufferCharacter] == '\?') { temp.push_back('\'); //store the read character temp.push_back('?'); //as it should be stored /* FUCK YOU! STUPID USER! */ } else { //else save the character to the temp temp.push_back(this->buffer[currentBufferCharacter]); //store the read character } break; case Identifier: //if this is a character that is part of an identifier if (((this->buffer[currentBufferCharacter] >= 65) && (this->buffer[currentBufferCharacter] <= 90)) || ((this->buffer[currentBufferCharacter] >= 97) && (this->buffer[currentBufferCharacter] <= 122)) || (this->buffer[currentBufferCharacter] == '_')) { temp.push_back(this->buffer[currentBufferCharacter]); //store the read character } else { //save the string stored in temp size_t characters = temp.size(); char* identifierStringFromTemp = new char[characters + 1]; identifierStringFromTemp[characters] = (char)0x00; size_t currentChar; for (currentChar = 0L; currentChar < characters; currentChar++) identifierStringFromTemp[currentChar] = temp[currentChar]; //create the token Token identifierToken; identifierToken.type = TIDENTIFIER; identifierToken.row = currentBufferRow; identifierToken.column = currentBufferColumn; identifierToken.data = identifierStringFromTemp; this->Tokens->push_back(identifierToken); //save the new token temp.clear(); //clear the temp buffer after having saved the identifier stored in currentlyReading = Source; //prepare the lexer for the next character currentBufferCharacter--; //the lexer have to re-read the current character } break; case Source: if (this->buffer[currentBufferCharacter] == '\n') { currentBufferRow++; //update the row number currentBufferColumn = 1L; //update the column number } else if (this->buffer[currentBufferCharacter] == '#') //single line comments aren't C-like { currentlyReading = SingleLineComment; //the lexer is going to read a single line comment } else if ((this->buffer[currentBufferCharacter] == '/') && (this->buffer[currentBufferCharacter + 1] == '*')) //multi line comments are C-like { currentlyReading = MultiLineComment; //the lexer is going to read a multi line comment currentBufferCharacter++; //the lexer won't read the * simbol } else if (this->buffer[currentBufferCharacter] == '"') //start of a string { currentlyReading = String; //the lexer is going to read a string } else if ((this->buffer[currentBufferCharacter] >= 48) && (this->buffer[currentBufferCharacter] <= 57)) // ASCII code of 0 is 48 and of 9 is 57 { currentlyReading = Number; //the lexer is going to read a number currentBufferCharacter--; //a little trick: i want the lexer to read this character again (when the lexer will expect a number) } else if (((this->buffer[currentBufferCharacter] >= 65) && (this->buffer[currentBufferCharacter] <= 90)) || ((this->buffer[currentBufferCharacter] >= 97) && (this->buffer[currentBufferCharacter] <= 122)) || (this->buffer[currentBufferCharacter] == '_')) { currentlyReading = Identifier; //the lexer is going to read an identifier currentBufferCharacter--; //the same little trick: i want the lexer to read this character again (when the lexer will expect an identifier) } else if (this->buffer[currentBufferCharacter] == ';') { Token newToken; newToken.type = TDOTCOMMA; newToken.row = currentBufferRow; newToken.column = currentBufferColumn; newToken.data = (char*)NULL; this->Tokens->push_back(newToken); } else if (this->buffer[currentBufferCharacter] == '.') { Token newToken; newToken.type = TDOT; newToken.row = currentBufferRow; newToken.column = currentBufferColumn; newToken.data = (char*)NULL; this->Tokens->push_back(newToken); } else if (this->buffer[currentBufferCharacter] == ',') { Token newToken; newToken.type = TCOMMA; newToken.row = currentBufferRow; newToken.column = currentBufferColumn; newToken.data = (char*)NULL; this->Tokens->push_back(newToken); } else if (this->buffer[currentBufferCharacter] == '{') { Token newToken; newToken.type = TLBRACE; newToken.row = currentBufferRow; newToken.column = currentBufferColumn; newToken.data = (char*)NULL; this->Tokens->push_back(newToken); } else if (this->buffer[currentBufferCharacter] == '}') { Token newToken; newToken.type = TRBRACE; newToken.row = currentBufferRow; newToken.column = currentBufferColumn; newToken.data = (char*)NULL; this->Tokens->push_back(newToken); } else if (this->buffer[currentBufferCharacter] == '(') { Token newToken; newToken.type = TLPAREN; newToken.row = currentBufferRow; newToken.column = currentBufferColumn; newToken.data = (char*)NULL; this->Tokens->push_back(newToken); } else if (this->buffer[currentBufferCharacter] == ')') { Token newToken; newToken.type = TRPAREN; newToken.row = currentBufferRow; newToken.column = currentBufferColumn; newToken.data = (char*)NULL; this->Tokens->push_back(newToken); } else if (this->buffer[currentBufferCharacter] == '[') { Token newToken; newToken.type = TLSQRPAREN; newToken.row = currentBufferRow; newToken.column = currentBufferColumn; newToken.data = (char*)NULL; this->Tokens->push_back(newToken); } else if (this->buffer[currentBufferCharacter] == ']') { Token newToken; newToken.type = TRSQRPAREN; newToken.row = currentBufferRow; newToken.column = currentBufferColumn; newToken.data = (char*)NULL; this->Tokens->push_back(newToken); } break; default: break; } currentBufferColumn++; //update the number of the character in the current line currentBufferCharacter++; //next time I'll read the next character } //check if the lexer reached the end unexpectly if ((currentlyReading != Source) && (currentlyReading != SingleLineComment)) { if (currentlyReading == Identifier) { size_t characters = temp.size(); char* identifierStringFromTemp = new char[characters + 1]; identifierStringFromTemp[characters] = (char)0x00; size_t currentChar; for (currentChar = 0L; currentChar < characters; currentChar++) identifierStringFromTemp[currentChar] = temp[currentChar]; //create the token Token identifierToken; identifierToken.type = TIDENTIFIER; identifierToken.row = currentBufferRow; identifierToken.column = currentBufferColumn; identifierToken.data = identifierStringFromTemp; this->Tokens->push_back(identifierToken); //save the new token /* FUCK YOU! STUPID USER! */ } else if (currentlyReading == Number) { size_t characters = temp.size(); char* numberStringFromTemp = new char [characters + 1]; numberStringFromTemp[characters] = (char)0x00; size_t currentChar; for (currentChar = 0L; currentChar < characters; currentChar++) numberStringFromTemp[currentChar] = temp[currentChar]; //create the token Token numberToken; numberToken.type = TNUMBER; numberToken.row = currentBufferRow; numberToken.column = currentBufferColumn; numberToken.data = numberStringFromTemp; this->Tokens->push_back(numberToken); //save the new token /* FUCK YOU! STUPID USER! */ } else if (currentlyReading == String) { //return an error } else { //MultiLineComment //return an error } } //the lexer's job is done return NO_ERRORS; }
#include <stdio.h> #include <string.h> #include <vector> #include "Lexer.h" int main(int argc, char** argv) { char* src = "/* sd */ \"This should be displayed \"...# /*s5d*/\n/*cd \"This should not be displayed\"*/#f\n55 6.5H\n5.9B /* 5.7 */ 5f, 5,f\n \"a slash and a n should be displayed: \n\" \"'\" #comment\n\n "; vector<Token> TokensList; Lexer Analyzer(src, &TokensList); if (Analyzer.Analyze() != 0) { printf("Si e' verificato un errore!"); } else { printf("Commenti multilinea: %u\nCommenti: %u\n", Analyzer.MultiLineFoundComments, Analyzer.SingleLineFoundComments ); } size_t i; for (i = 0; i < TokensList.size(); i++) { printf("\n\nToken numer %u: \nType: ", (i + 1)); if (TokensList[i].type == TIDENTIFIER) { printf("identifier\nValue: "); puts(TokensList[i].data); } else if (TokensList[i].type == TNUMBER) { printf("number\nValue: "); puts(TokensList[i].data); } else if (TokensList[i].type == TCHAR) { printf("character\nValue: "); puts(TokensList[i].data); } else if (TokensList[i].type == TSTRING) { printf("string\nValue: "); puts(TokensList[i].data); } } return 0; }
Altra domanda: credete che dovrei usare shared_ptr invece che passarmi il puntatore? Che benefici avrei nel farlo?
P.S. Non ho capito perchè abbia funzionato durante varie prove, ma il problema è legato al fatto che nella classe ci sia vector<Token>* anzichè vector<Token>. A dire la verità mi scoccia parecchia il fatto che anche se ho ricompilato e provato più volte il problema sia sorto in ritardo e ancora di più avere nella classe lexer un vector<Token>. C'è una alternativa più "elegante"?
Ultima modifica effettuata da TheDarkJuster 21/08/14 21:01
aaa