Changeset 614

Show
Ignore:
Timestamp:
07/25/08 08:02:42 (4 months ago)
Author:
weyrick
Message:

get a dumpTokens() working to some degree, for testing the lexer. add debug_visitor.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/rphp/compiler/pDriver.cpp

    r611 r614  
    1818*/ 
    1919 
     20#include <iostream> 
     21#include <iomanip> 
     22#include <fstream> 
    2023#include <string> 
    21 #include <iostream> 
    22  
     24#include <unicode/unistr.h> 
     25#include <unicode/ustream.h> 
     26 
     27#include "parser/phplexer.h" 
    2328#include "parser/rphp_parser.h" 
    24 #include "parser/phplexer.h" 
    2529#include "pDriver.h" 
    2630 
     31using namespace std; 
     32 
    2733namespace rphp { 
    2834 
    29     void pDriver::compile(std::string fileName) { 
    30         std::cout << "if this were real, you'd be running your file: " << fileName << " by now!" << std::endl; 
    31         const UnicodeString content( "print();" ); 
    32  
    33         rphp::parser parser; 
    34         rphp::Lexer lexer( &parser, content ); 
     35    /** 
     36    * print the token with the same text as php tokens - so they can be compared with 
     37    * the result of get_token_all (see test-tokenize.php) 
     38    **/ 
     39    void printToken(int token, const Lexer& lexer, const UnicodeString& content) 
     40    { 
     41        int begin = lexer.tokenBegin(); 
     42        int end = lexer.tokenEnd(); 
     43        //UnicodeString tokenText = content.replace(begin, end-begin+1,"\n", "\\n"); 
     44        UnicodeString tokenText(content, begin, end-begin+1); 
     45        if (token == parser::Token_INLINE_HTML) { 
     46            cout << tokenText << " T_INLINE_HTML" << endl; 
     47        } else if (token == parser::Token_OPEN_TAG) { 
     48            cout << tokenText << " T_OPEN_TAG" << endl; 
     49        } else if (token == parser::Token_CLOSE_TAG) { 
     50            cout << tokenText << " T_CLOSE_TAG" << endl; 
     51        } else if (token == parser::Token_ECHO) { 
     52            cout << tokenText << " T_ECHO" << endl; 
     53        } else if (token == parser::Token_WHITESPACE) { 
     54            cout << tokenText << " T_WHITESPACE" << endl; 
     55        } else if (token == parser::Token_CONSTANT_ENCAPSED_STRING) { 
     56            cout << tokenText << " T_CONSTANT_ENCAPSED_STRING" << endl; 
     57        } else if (token == parser::Token_SEMICOLON) { 
     58            cout << tokenText << " ;" << endl; 
     59        } else if (token == parser::Token_VARIABLE) { 
     60            cout << tokenText << " T_VARIABLE" << endl; 
     61        } else if (token == parser::Token_DOUBLE_QUOTE) { 
     62            cout << tokenText << " \"" << endl; 
     63        } else if (token == parser::Token_ENCAPSED_AND_WHITESPACE) { 
     64            cout << tokenText << " T_ENCAPSED_AND_WHITESPACE" << endl; 
     65        } else if (token == parser::Token_OBJECT_OPERATOR) { 
     66            cout << tokenText << " T_OBJECT_OPERATOR" << endl; 
     67        } else if (token == parser::Token_LBRACKET) { 
     68            cout << tokenText << " [" << endl; 
     69        } else if (token == parser::Token_RBRACKET) { 
     70            cout << tokenText << " ]" << endl; 
     71        } else if (token == parser::Token_NUM_STRING) { 
     72            cout << tokenText << " T_NUM_STRING" << endl; 
     73        } else if (token == parser::Token_STRING) { 
     74            cout << tokenText << " T_STRING" << endl; 
     75        } else if (token == parser::Token_ASSIGN) { 
     76            cout << tokenText << " =" << endl; 
     77        } else if (token == parser::Token_DNUMBER) { 
     78            cout << tokenText << " T_DNUMBER" << endl; 
     79        } else if (token == parser::Token_LNUMBER) { 
     80            cout << tokenText << " T_LNUMBER" << endl; 
     81        } else if (token == parser::Token_PLUS) { 
     82            cout << tokenText << " +" << endl; 
     83        } else if (token == parser::Token_MINUS) { 
     84            cout << tokenText << " -" << endl; 
     85        } else if (token == parser::Token_CONCAT) { 
     86            cout << tokenText << " ." << endl; 
     87        } else if (token == parser::Token_INC) { 
     88            cout << tokenText << " T_INC" << endl; 
     89        } else if (token == parser::Token_DEC) { 
     90            cout << tokenText << " T_DEC" << endl; 
     91        } else if (token == parser::Token_IS_EQUAL) { 
     92            cout << tokenText << " T_IS_EQUAL" << endl; 
     93        } else if (token == parser::Token_IS_NOT_EQUAL) { 
     94            cout << tokenText << " T_IS_NOT_EQUAL" << endl; 
     95        } else if (token == parser::Token_IS_IDENTICAL) { 
     96            cout << tokenText << " T_IS_IDENTICAL" << endl; 
     97        } else if (token == parser::Token_IS_NOT_IDENTICAL) { 
     98            cout << tokenText << " T_IS_NOT_IDENTICAL" << endl; 
     99        } else if (token == parser::Token_IS_SMALLER) { 
     100            cout << tokenText << " <" << endl; 
     101        } else if (token == parser::Token_IS_GREATER) { 
     102            cout << tokenText << " >" << endl; 
     103        } else if (token == parser::Token_IS_SMALLER_OR_EQUAL) { 
     104            cout << tokenText << " T_IS_SMALLER_OR_EQUAL" << endl; 
     105        } else if (token == parser::Token_IS_GREATER_OR_EQUAL) { 
     106            cout << tokenText << " T_IS_GREATER_OR_EQUAL" << endl; 
     107        } else if (token == parser::Token_BOOLEAN_OR) { 
     108            cout << tokenText << " T_BOOLEAN_OR" << endl; 
     109        } else if (token == parser::Token_BOOLEAN_AND) { 
     110            cout << tokenText << " T_BOOLEAN_AND" << endl; 
     111        } else if (token == parser::Token_PLUS_ASSIGN) { 
     112            cout << tokenText << " T_PLUS_EQUAL" << endl; 
     113        } else if (token == parser::Token_MINUS_ASSIGN) { 
     114            cout << tokenText << " T_MINUS_EQUAL" << endl; 
     115        } else if (token == parser::Token_MUL_ASSIGN) { 
     116            cout << tokenText << " T_MUL_EQUAL" << endl; 
     117        } else if (token == parser::Token_DIV_ASSIGN) { 
     118            cout << tokenText << " T_DIV_EQUAL" << endl; 
     119        } else if (token == parser::Token_CONCAT_ASSIGN) { 
     120            cout << tokenText << " T_CONCAT_EQUAL" << endl; 
     121        } else if (token == parser::Token_MOD_ASSIGN) { 
     122            cout << tokenText << " T_MOD_EQUAL" << endl; 
     123        } else if (token == parser::Token_AND_ASSIGN) { 
     124            cout << tokenText << " T_AND_EQUAL" << endl; 
     125        } else if (token == parser::Token_OR_ASSIGN) { 
     126            cout << tokenText << " T_OR_EQUAL" << endl; 
     127        } else if (token == parser::Token_XOR_ASSIGN) { 
     128            cout << tokenText << " T_XOR_EQUAL" << endl; 
     129        } else if (token == parser::Token_SL_ASSIGN) { 
     130            cout << tokenText << " T_SL_EQUAL" << endl; 
     131        } else if (token == parser::Token_SR_ASSIGN) { 
     132            cout << tokenText << " T_SR_EQUAL" << endl; 
     133        } else if (token == parser::Token_BANG) { 
     134            cout << tokenText << " !" << endl; 
     135        } else if (token == parser::Token_QUESTION) { 
     136            cout << tokenText << " ?" << endl; 
     137        } else if (token == parser::Token_COLON) { 
     138            cout << tokenText << " :" << endl; 
     139        } else if (token == parser::Token_BIT_AND) { 
     140            cout << tokenText << " &" << endl; 
     141        } else if (token == parser::Token_BIT_OR) { 
     142            cout << tokenText << " |" << endl; 
     143        } else if (token == parser::Token_BIT_XOR) { 
     144            cout << tokenText << " ^" << endl; 
     145        } else if (token == parser::Token_SL) { 
     146            cout << tokenText << " T_SL" << endl; 
     147        } else if (token == parser::Token_SR) { 
     148            cout << tokenText << " T_SR" << endl; 
     149        } else if (token == parser::Token_MUL) { 
     150            cout << tokenText << " *" << endl; 
     151        } else if (token == parser::Token_DIV) { 
     152            cout << tokenText << " /" << endl; 
     153        } else if (token == parser::Token_MOD) { 
     154            cout << tokenText << " %" << endl; 
     155        } else if (token == parser::Token_TILDE) { 
     156            cout << tokenText << " ~" << endl; 
     157        } else if (token == parser::Token_LPAREN) { 
     158            cout << tokenText << " (" << endl; 
     159        } else if (token == parser::Token_RPAREN) { 
     160            cout << tokenText << " )" << endl; 
     161        } else if (token == parser::Token_LBRACE) { 
     162            cout << tokenText << " {" << endl; 
     163        } else if (token == parser::Token_RBRACE) { 
     164            cout << tokenText << " }" << endl; 
     165        } else if (token == parser::Token_COMMA) { 
     166            cout << tokenText << " ," << endl; 
     167        } else if (token == parser::Token_AT) { 
     168            cout << tokenText << " @" << endl; 
     169        } else if (token == parser::Token_INCLUDE) { 
     170            cout << tokenText << " T_INCLUDE" << endl; 
     171        } else if (token == parser::Token_INCLUDE_ONCE) { 
     172            cout << tokenText << " T_INCLUDE_ONCE" << endl; 
     173        } else if (token == parser::Token_EVAL) { 
     174            cout << tokenText << " T_EVAL" << endl; 
     175        } else if (token == parser::Token_REQUIRE) { 
     176            cout << tokenText << " T_REQUIRE" << endl; 
     177        } else if (token == parser::Token_REQUIRE_ONCE) { 
     178            cout << tokenText << " T_REQUIRE_ONCE" << endl; 
     179        } else if (token == parser::Token_PRINT) { 
     180            cout << tokenText << " T_PRINT" << endl; 
     181        } else if (token == parser::Token_ABSTRACT) { 
     182            cout << tokenText << " T_ABSTRACT" << endl; 
     183        } else if (token == parser::Token_BREAK) { 
     184            cout << tokenText << " T_BREAK" << endl; 
     185        } else if (token == parser::Token_CASE) { 
     186            cout << tokenText << " T_CASE" << endl; 
     187        } else if (token == parser::Token_CATCH) { 
     188            cout << tokenText << " T_CATCH" << endl; 
     189        } else if (token == parser::Token_CLASS) { 
     190            cout << tokenText << " T_CLASS" << endl; 
     191        } else if (token == parser::Token_CONST) { 
     192            cout << tokenText << " T_CONST" << endl; 
     193        } else if (token == parser::Token_CONTINUE) { 
     194            cout << tokenText << " T_CONTINUE" << endl; 
     195        } else if (token == parser::Token_DEFAULT) { 
     196            cout << tokenText << " T_DEFAULT" << endl; 
     197        } else if (token == parser::Token_DO) { 
     198            cout << tokenText << " T_DO" << endl; 
     199        } else if (token == parser::Token_ELSE) { 
     200            cout << tokenText << " T_ELSE" << endl; 
     201        } else if (token == parser::Token_EXTENDS) { 
     202            cout << tokenText << " T_EXTENDS" << endl; 
     203        } else if (token == parser::Token_FINAL) { 
     204            cout << tokenText << " T_FINAL" << endl; 
     205        } else if (token == parser::Token_FOR) { 
     206            cout << tokenText << " T_FOR" << endl; 
     207        } else if (token == parser::Token_IF) { 
     208            cout << tokenText << " T_IF" << endl; 
     209        } else if (token == parser::Token_IMPLEMENTS) { 
     210            cout << tokenText << " T_IMPLEMENTS" << endl; 
     211        } else if (token == parser::Token_INSTANCEOF) { 
     212            cout << tokenText << " T_INSTANCEOF" << endl; 
     213        } else if (token == parser::Token_INTERFACE) { 
     214            cout << tokenText << " T_INTERFACE" << endl; 
     215        } else if (token == parser::Token_NEW) { 
     216            cout << tokenText << " T_NEW" << endl; 
     217        } else if (token == parser::Token_PRIVATE) { 
     218            cout << tokenText << " T_PRIVATE" << endl; 
     219        } else if (token == parser::Token_PROTECTED) { 
     220            cout << tokenText << " T_PROTECTED" << endl; 
     221        } else if (token == parser::Token_PUBLIC) { 
     222            cout << tokenText << " T_PUBLIC" << endl; 
     223        } else if (token == parser::Token_RETURN) { 
     224            cout << tokenText << " T_RETURN" << endl; 
     225        } else if (token == parser::Token_STATIC) { 
     226            cout << tokenText << " T_STATIC" << endl; 
     227        } else if (token == parser::Token_SWITCH) { 
     228            cout << tokenText << " T_SWITCH" << endl; 
     229        } else if (token == parser::Token_THROW) { 
     230            cout << tokenText << " T_THROW" << endl; 
     231        } else if (token == parser::Token_TRY) { 
     232            cout << tokenText << " T_TRY" << endl; 
     233        } else if (token == parser::Token_WHILE) { 
     234            cout << tokenText << " T_WHILE" << endl; 
     235        } else if (token == parser::Token_INT_CAST) { 
     236            cout << tokenText << " T_INT_CAST" << endl; 
     237        } else if (token == parser::Token_DOUBLE_CAST) { 
     238            cout << tokenText << " T_DOUBLE_CAST" << endl; 
     239        } else if (token == parser::Token_STRING_CAST) { 
     240            cout << tokenText << " T_STRING_CAST" << endl; 
     241        } else if (token == parser::Token_ARRAY_CAST) { 
     242            cout << tokenText << " T_ARRAY_CAST" << endl; 
     243        } else if (token == parser::Token_OBJECT_CAST) { 
     244            cout << tokenText << " T_OBJECT_CAST" << endl; 
     245        } else if (token == parser::Token_BOOL_CAST) { 
     246            cout << tokenText << " T_BOOL_CAST" << endl; 
     247        } else if (token == parser::Token_UNSET_CAST) { 
     248            cout << tokenText << " T_UNSET_CAST" << endl; 
     249        } else if (token == parser::Token_CLONE) { 
     250            cout << tokenText << " T_CLONE" << endl; 
     251        } else if (token == parser::Token_EXIT) { 
     252            cout << tokenText << " T_EXIT" << endl; 
     253        } else if (token == parser::Token_ELSEIF) { 
     254            cout << tokenText << " T_ELSEIF" << endl; 
     255        } else if (token == parser::Token_ENDIF) { 
     256            cout << tokenText << " T_ENDIF" << endl; 
     257        } else if (token == parser::Token_ENDWHILE) { 
     258            cout << tokenText << " T_ENDWHILE" << endl; 
     259        } else if (token == parser::Token_ENDFOR) { 
     260            cout << tokenText << " T_ENDFOR" << endl; 
     261        } else if (token == parser::Token_FOREACH) { 
     262            cout << tokenText << " T_FOREACH" << endl; 
     263        } else if (token == parser::Token_ENDFOREACH) { 
     264            cout << tokenText << " T_ENDFOREACH" << endl; 
     265        } else if (token == parser::Token_DECLARE) { 
     266            cout << tokenText << " T_DECLARE" << endl; 
     267        } else if (token == parser::Token_ENDDECLARE) { 
     268            cout << tokenText << " T_ENDDECLARE" << endl; 
     269        } else if (token == parser::Token_AS) { 
     270            cout << tokenText << " T_AS" << endl; 
     271        } else if (token == parser::Token_ENDSWITCH) { 
     272            cout << tokenText << " T_ENDSWITCH" << endl; 
     273        } else if (token == parser::Token_FUNCTION) { 
     274            cout << tokenText << " T_FUNCTION" << endl; 
     275        } else if (token == parser::Token_USE) { 
     276            cout << tokenText << " T_USE" << endl; 
     277        } else if (token == parser::Token_GLOBAL) { 
     278            cout << tokenText << " T_GLOBAL" << endl; 
     279        } else if (token == parser::Token_VAR) { 
     280            cout << tokenText << " T_VAR" << endl; 
     281        } else if (token == parser::Token_UNSET) { 
     282            cout << tokenText << " T_UNSET" << endl; 
     283        } else if (token == parser::Token_ISSET) { 
     284            cout << tokenText << " T_ISSET" << endl; 
     285        } else if (token == parser::Token_ISSET) { 
     286            cout << tokenText << " T_ISSET" << endl; 
     287        } else if (token == parser::Token_EMPTY) { 
     288            cout << tokenText << " T_EMPTY" << endl; 
     289        } else if (token == parser::Token_HALT_COMPILER) { 
     290            cout << tokenText << " T_HALT_COMPILER" << endl; 
     291        } else if (token == parser::Token_DOUBLE_ARROW) { 
     292            cout << tokenText << " T_DOUBLE_ARROW" << endl; 
     293        } else if (token == parser::Token_LIST) { 
     294            cout << tokenText << " T_LIST" << endl; 
     295        } else if (token == parser::Token_ARRAY) { 
     296            cout << tokenText << " T_ARRAY" << endl; 
     297        } else if (token == parser::Token_CLASS_C) { 
     298            cout << tokenText << " T_CLASS_C" << endl; 
     299        } else if (token == parser::Token_METHOD_C) { 
     300            cout << tokenText << " T_METHOD_C" << endl; 
     301        } else if (token == parser::Token_FUNC_C) { 
     302            cout << tokenText << " T_FUNC_C" << endl; 
     303        } else if (token == parser::Token_LINE) { 
     304            cout << tokenText << " T_LINE" << endl; 
     305        } else if (token == parser::Token_FILE) { 
     306            cout << tokenText << " T_FILE" << endl; 
     307        } else if (token == parser::Token_COMMENT) { 
     308            cout << tokenText << " T_COMMENT" << endl; 
     309        } else if (token == parser::Token_DOC_COMMENT) { 
     310            cout << tokenText << " T_DOC_COMMENT" << endl; 
     311        } else if (token == parser::Token_PAAMAYIM_NEKUDOTAYIM) { 
     312            cout << tokenText << " T_DOUBLE_COLON" << endl; 
     313        } else if (token == parser::Token_OPEN_TAG_WITH_ECHO) { 
     314            cout << tokenText << " T_OPEN_TAG_WITH_ECHO" << endl; 
     315        } else if (token == parser::Token_CURLY_OPEN) { 
     316            cout << tokenText << " T_CURLY_OPEN" << endl; 
     317        } else if (token == parser::Token_STRING_VARNAME) { 
     318            cout << tokenText << " T_STRING_VARNAME" << endl; 
     319        } else if (token == parser::Token_DOLLAR_OPEN_CURLY_BRACES) { 
     320            cout << tokenText << " T_DOLLAR_OPEN_CURLY_BRACES" << endl; 
     321        } else if (token == parser::Token_DOLLAR) { 
     322            cout << tokenText << " $" << endl; 
     323        } else if (token == parser::Token_LOGICAL_XOR) { 
     324            cout << tokenText << " T_LOGICAL_XOR" << endl; 
     325        } else if (token == parser::Token_LOGICAL_AND) { 
     326            cout << tokenText << " T_LOGICAL_AND" << endl; 
     327        } else if (token == parser::Token_LOGICAL_OR) { 
     328            cout << tokenText << " T_LOGICAL_OR" << endl; 
     329        } else if (token == parser::Token_START_HEREDOC) { 
     330            cout << tokenText << " T_START_HEREDOC" << endl; 
     331        } else if (token == parser::Token_END_HEREDOC) { 
     332            cout << tokenText << " T_END_HEREDOC" << endl; 
     333        } else if (token == parser::Token_BACKTICK) { 
     334            cout << tokenText << " `" << endl; 
     335        } else if (token == 0) { 
     336            cout << tokenText << " end of file" << endl; 
     337        } else { 
     338            cout << tokenText << " unknown token" << token; 
     339        } 
    35340    } 
     341     
     342    void pDriver::dumpTokens(string fileName) { 
     343 
     344        ifstream inFile; 
     345     
     346        inFile.open(fileName.c_str(), ifstream::in); 
     347        if (!inFile) { 
     348            cout << "Unable to open file: " << endl; 
     349            exit(1); // terminate with error 
     350        } 
     351 
     352        UnicodeString contents; 
     353        char buf[512]; 
     354        while (inFile) { 
     355            inFile.getline(buf, 512); 
     356            //cout << "read: " << buf << endl; 
     357            contents += buf; 
     358        } 
     359         
     360        inFile.close(); 
     361         
     362        Lexer lexer(0, contents); 
     363        int token; 
     364        while ((token = lexer.nextTokenKind())) { 
     365            printToken(token, lexer, contents); 
     366        } 
     367        printToken(token, lexer, contents); 
     368 
     369    } 
    36370 
    37371} 
  • trunk/rphp/compiler/pDriver.h

    r611 r614  
    2121#define RPHP_PDRIVER_H_ 
    2222 
     23#include <string> 
    2324 
    2425namespace rphp { 
     
    2829 
    2930        public: 
    30             void compile(std::string fileName); 
     31            void dumpTokens(std::string fileName); 
    3132 
    3233    }; 
  • trunk/rphp/compiler/parser/phplexer.cpp

    r606 r614  
    101101    } 
    102102    int pos = m_curpos; 
    103     #ifdef THOMAS_TEMP_DISABLED 
    104     // TODO temp. disabled code, tokenBegin was not in use 
    105103    m_tokenBegin = m_curpos; 
    106     #endif 
    107104    switch ( state() ) 
    108105    { 
     
    149146                while (m_curpos < m_contentSize && lookAt( pos ) == ' ') { 
    150147                    if ( lookAt( pos ) == '\n') createNewline(m_curpos); 
    151                     pos; // weiterspringen!!!! 
     148                    pos++; // weiterspringen!!!! 
    152149                    m_curpos++; 
    153150                } 
  • trunk/rphp/compiler/parser/phplexer.h

    r606 r614  
    6565    bool isHeredocEnd(const UChar32& it, int pos); 
    6666 
    67     UChar32 lookAt( int pos ){ return m_content.char32At( m_curpos ); } 
     67    UChar32 lookAt( int pos ){ return m_content.char32At( pos ); } 
    6868 
    6969    std::vector<int> m_state; // was: QStack<int> 
  • trunk/rphp/frontend/cli/main.cpp

    r612 r614  
    3838    std::vector<std::string> infiles = vm["input-file"].as< std::vector<std::string> >(); 
    3939    for (std::vector<std::string>::iterator it = infiles.begin(); it!=infiles.end(); ++it) { 
    40         driver.compile(*it); 
     40        driver.dumpTokens(*it); 
    4141    } 
    4242