Changeset 640
- Timestamp:
- 08/26/08 15:13:49 (3 months ago)
- Files:
-
- trunk/rphp/compiler/CMakeLists.txt (modified) (1 diff)
- trunk/rphp/compiler/pAST.h (added)
- trunk/rphp/compiler/pASTVisitors.cpp (added)
- trunk/rphp/compiler/pASTVisitors.h (added)
- trunk/rphp/compiler/pDriver.cpp (modified) (4 diffs)
- trunk/rphp/compiler/pLexers.h (modified) (4 diffs)
- trunk/rphp/compiler/pParser.h (modified) (7 diffs)
- trunk/rphp/compiler/pTokens.h (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/rphp/compiler/CMakeLists.txt
r637 r640 23 23 24 24 set(EVAL_SRC_FILES 25 pASTVisitors.cpp 25 26 pDriver.cpp 26 27 ) trunk/rphp/compiler/pDriver.cpp
r639 r640 22 22 #include <fstream> 23 23 #include <string> 24 #include <sstream> 24 25 #include <unicode/unistr.h> 25 26 #include <unicode/ustream.h> … … 306 307 pLangLexer lexer(tokens); 307 308 309 string tokID; 310 stringstream val; 311 308 312 std::string::iterator source_it = contents.begin(); 309 313 for (tokIteratorType iter = lexer.begin(source_it, contents.end()); iter != lexer.end(); ++iter) 310 314 { 311 std::cout << "tok: " << (*iter).id() << " >" << (*iter).value() << "<" << std::endl; 312 if ((*iter).id() == 0) 313 break; 315 if ((*iter).id() == 0) { 316 // if we didn't match, we switch to state 1 which is our skip_toks (i.e. whitespace, comments) 317 iter.set_state(1); 318 // if we still haven't matched, then we have a lexer error or end of input 319 if ((*iter).id() == 0) 320 break; 321 val.str(""); 322 if ((*iter).id() != T_WHITESPACE) 323 val << (*iter).value(); 324 tokID = getTokenDescription((*iter).id()); 325 if (tokID.size() == 0) 326 tokID = val.str(); 327 std::cout << val.str() << " " << tokID << std::endl; 328 // always switch back 329 iter.set_state(0); 330 } 331 else { 332 // matched 333 val.str(""); 334 if ((*iter).id() != T_WHITESPACE) 335 val << (*iter).value(); 336 tokID = getTokenDescription((*iter).id()); 337 if (tokID.size() == 0) 338 tokID = val.str(); 339 std::cout << val.str() << " " << tokID << std::endl; 340 } 314 341 } 315 342 … … 330 357 331 358 void pDriver::dumpAST(string fileName) { 332 359 /* 333 360 ifstream inFile; 334 361 … … 347 374 bool r = phrase_parse(iter, end, parser, in_state(ws)[tokens.skip_toks]); 348 375 349 if (r && iter == end) 350 { 351 std::cout << "-------------------------\n"; 352 std::cout << "Parsing succeeded\n"; 353 std::cout << "-------------------------\n"; 354 } 355 else 356 { 357 std::cout << "-------------------------\n"; 376 if (!r || iter != end) { 358 377 std::cout << "Parsing failed\n"; 359 std::cout << "-------------------------\n";360 }361 362 /*363 ifstream inFile;364 365 inFile.open(fileName.c_str(), ifstream::in);366 if (!inFile) {367 cout << "Unable to open file: " << endl;368 exit(1); // terminate with error369 }370 371 UnicodeString contents;372 char buf[512];373 while (inFile) {374 inFile.getline(buf, 512);375 //cout << "read: " << buf << endl;376 contents += buf;377 }378 379 inFile.close();380 381 parser p;382 p.set_token_stream( new parser::token_stream_type() );383 p.set_memory_pool( new parser::memory_pool_type() );384 p.setDebug( true );385 386 p.tokenize(contents);387 start_ast* phpAst;388 bool matched = p.parse_start(&phpAst);389 if( matched )390 {391 std::cout << "Successfully parsed" << std::endl;392 debug_visitor dv;393 dv.visit_start(phpAst);394 }else395 {396 //*ast = 0;397 //std::cout << p.expected_symbol(ast_node::Kind_start, "start");398 std::cout << "Couldn't parse content" << std::endl;399 378 } 400 379 */ trunk/rphp/compiler/pLexers.h
r637 r640 44 44 { 45 45 46 identifier = "[a-zA-Z_][a-zA-Z0-9_]*"; 47 variable = ("\\$[a-zA-Z_][a-zA-Z0-9_]*", T_VARIABLE); 48 constant = "[0-9]+"; 46 if_ = token_def<omitted>("if", T_IF); 47 while_ = token_def<omitted>("while", T_WHILE); 48 else_ = token_def<omitted>("else", T_ELSE); 49 echo = token_def<omitted>("echo", T_ECHO); 49 50 50 if_ = ("if", T_IF); 51 while_ = "while"; 52 else_ = "else"; 51 identifier = token_def<std::string>("[a-zA-Z_][a-zA-Z0-9_]*", T_IDENTIFIER); 52 53 //dqstring = token_def<omitted>("\"([^\"\\\\]|\\\\.)*\"", T_CONSTANT_ENCAPSED_STRING); 54 dqstring = token_def<omitted>("[\"][^\"]*[\"]", T_CONSTANT_ENCAPSED_STRING); 55 variable = token_def<std::string>("\\$[a-zA-Z_][a-zA-Z0-9_]*", T_VARIABLE); 56 lnumber = token_def<unsigned int>("[0-9]+", T_LNUMBER); 57 58 opentag = token_def<omitted>("<\\?", T_OPEN_TAG); 59 closetag = token_def<omitted>("\\?>", T_CLOSE_TAG); 53 60 54 61 skip_toks 55 = token_def<std::string>("[ \\t\\n]+" )56 | token_def<std::string>("\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" )57 | token_def<std::string>("\\/\\/.*$" );62 = token_def<std::string>("[ \\t\\n]+", T_WHITESPACE) 63 | token_def<std::string>("\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/", T_ML_COMMENT) 64 | token_def<std::string>("\\/\\/.*$", T_SL_COMMENT); 58 65 59 66 // associate the tokens and the token set with the lexer 60 self += token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant; 61 self += if_ | while_ | else_ | identifier | variable; 67 self += token_def<>('(') | ')' | '{' | '}' | '=' | ';'; 68 self += if_ | while_ | else_ | echo | opentag | closetag; 69 self += identifier | variable | lnumber | dqstring; 62 70 63 71 // whitespace tokens in WS lexer state … … 71 79 72 80 // these tokens have no value 73 token_def<omitted> if_, while_, else_ ;81 token_def<omitted> if_, while_, else_, echo, opentag, closetag, dqstring; 74 82 75 83 // tokens with string value … … 77 85 78 86 // tokens with int value 79 token_def<unsigned int> constant; 87 // TODO: change this to pInt 88 token_def<unsigned int> lnumber; 80 89 81 90 // token set to be used as the skip parser (whitespace and comments) … … 105 114 typedef lexer<pLangTokens>::iterator_type tokIteratorType; 106 115 116 const char* getTokenDescription(const std::size_t t) { 117 118 switch (t) { 119 case T_VARIABLE: 120 return "T_VARIABLE"; 121 case T_WHITESPACE: 122 return "T_WHITESPACE"; 123 case T_ML_COMMENT: 124 return "T_ML_COMMENT"; 125 case T_SL_COMMENT: 126 return "T_SL_COMMENT"; 127 case T_ECHO: 128 return "T_ECHO"; 129 case T_OPEN_TAG: 130 return "T_OPEN_TAG"; 131 case T_CLOSE_TAG: 132 return "T_CLOSE_TAG"; 133 case T_LNUMBER: 134 return "T_LNUMBER"; 135 case T_INLINE_HTML: 136 return "T_INLINE_HTML"; 137 case T_IF: 138 return "T_IF"; 139 case T_ELSE: 140 return "T_ELSE"; 141 case T_WHILE: 142 return "T_WHILE"; 143 case T_IDENTIFIER: 144 return "T_IDENTIFIER"; 145 case T_CONSTANT_ENCAPSED_STRING: 146 return "T_CONSTANT_ENCAPSED_STRING"; 147 } 148 return ""; 149 150 } 107 151 108 152 } // namespace trunk/rphp/compiler/pParser.h
r639 r640 21 21 #define RPHP_PPARSER_H_ 22 22 23 //#define BOOST_SPIRIT_LEXERTL_DEBUG 24 //#define BOOST_SPIRIT_DEBUG 25 23 26 #include <boost/spirit/include/qi.hpp> 24 27 #include <boost/spirit/include/phoenix_core.hpp> … … 30 33 31 34 #include "pLexers.h" 35 #include "pAST.h" 32 36 33 37 using namespace boost::spirit; … … 72 76 template <typename TokenDef> 73 77 rphpLangGrammarDef(TokenDef const& tok) 74 : rphpLangGrammarDef::base_type( program, "program")78 : rphpLangGrammarDef::base_type(module, "module") 75 79 { 76 program 77 = +block 80 using boost::spirit::arg_names::_1; 81 using boost::spirit::arg_names::_2; 82 using boost::spirit::arg_names::_3; 83 using boost::spirit::arg_names::_4; 84 85 module 86 = *statement 78 87 ; 79 88 80 block89 statement_block 81 90 = '{' >> *statement >> '}' 82 91 ; 83 92 84 93 statement 85 = assignment 94 = statement_block 95 | assignment 86 96 | if_stmt 87 97 | while_stmt 98 | echo_stmt 88 99 ; 89 100 … … 91 102 = (tok.identifier >> '=' >> expression >> ';') 92 103 [ 93 std::cout << val("assignment statement to: ") << _1 << " \n"104 std::cout << val("assignment statement to: ") << _1 << " from "<< _2 << "\n" 94 105 ] 95 106 ; 96 107 97 108 if_stmt 98 = ( tok.if_ >> '(' >> expression > > ')' >>block99 >> -(tok.else_ >> block)109 = ( tok.if_ >> '(' >> expression > ')' >> statement_block 110 >> -(tok.else_ >> statement_block) 100 111 ) 101 112 [ … … 105 116 106 117 while_stmt 107 = (tok.while_ >> '(' >> expression >> ')' >> block)118 = (tok.while_ >> '(' >> expression >> ')' >> statement_block) 108 119 [ 109 120 std::cout << val("while expression: ") << _1 << "\n" 121 ] 122 ; 123 124 echo_stmt 125 = (tok.echo >> expression >> ';') 126 [ 127 std::cout << val("echo: ") << _1 << "\n" 110 128 ] 111 129 ; … … 117 135 = tok.identifier [ _val = _1 ] 118 136 | tok.variable [ _val = _1 ] 119 | tok. constant[ _val = _1 ]137 | tok.lnumber [ _val = _1 ] 120 138 ; 121 139 122 //BOOST_SPIRIT_DEBUG_NODE(program); 123 124 program.name("program"); 125 block.name("block"); 140 module.name("module"); 141 statement_block.name("statement block"); 126 142 statement.name("statement"); 127 143 assignment.name("assignment"); 128 144 if_stmt.name("if_stmt"); 129 145 while_stmt.name("while_stmt"); 146 echo_stmt.name("echo_stmt"); 130 147 131 on_error<fail>(program, parse_error_handler(_4, _3, _2)); 132 /* 133 on_error<fail>(block, error_handler(_4, _3, _2)); 134 on_error<fail>(statement, error_handler(_4, _3, _2)); 135 on_error<fail>(assignment, error_handler(_4, _3, _2)); 136 on_error<fail>(if_stmt, error_handler(_4, _3, _2)); 137 on_error<fail>(while_stmt, error_handler(_4, _3, _2)); 138 on_error<fail>(expression, error_handler(_4, _3, _2)); 139 */ 148 on_error<fail>(module, parse_error_handler(_4, _3, _2)); 140 149 141 150 } … … 144 153 typedef typename base_type::skipper_type skipper_type; 145 154 146 rule<Iterator, skipper_type> program, block, statement;155 rule<Iterator, skipper_type> module, statement, statement_block; 147 156 rule<Iterator, skipper_type> assignment, if_stmt; 148 rule<Iterator, skipper_type> while_stmt ;157 rule<Iterator, skipper_type> while_stmt, echo_stmt; 149 158 150 159 // the expression is the only rule having a return value trunk/rphp/compiler/pTokens.h
r637 r640 23 23 namespace rphp { 24 24 25 enum languageTokenList { 25 typedef enum { 26 26 27 T_VARIABLE = 1000, 27 T_IF 28 }; 28 T_WHITESPACE, 29 T_ML_COMMENT, 30 T_SL_COMMENT, 31 T_ECHO, 32 T_OPEN_TAG, 33 T_CLOSE_TAG, 34 T_LNUMBER, 35 T_INLINE_HTML, 36 T_IF, 37 T_ELSE, 38 T_WHILE, 39 T_IDENTIFIER, 40 T_CONSTANT_ENCAPSED_STRING 41 } languageTokenIDType; 29 42 30 43 }
