crocopat.cpp

Go to the documentation of this file.
00001 /*
00002  * CrocoPat is a tool for relational programming.
00003  * This file is part of CrocoPat. 
00004  *
00005  * Copyright (C) 2002-2008  Dirk Beyer
00006  *
00007  * CrocoPat is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License
00009  * as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * CrocoPat is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public License
00018  * along with CrocoPat; if not, write to the Free Software
00019  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00020  *
00021  * Please find the GNU Lesser General Public License in file
00022  * License_LGPL.txt or at http://www.gnu.org/licenses/lgpl.txt
00023  *
00024  * Author:
00025  * Dirk Beyer (firstname.lastname@sfu.ca)
00026  * Simon Fraser University
00027  *
00028  * With contributions of: Andreas Noack, Michael Vogel
00029  */
00030 
00031 #include "bddBdd.h"
00032 #include "relStatement.h"
00033 #include "FlexLexer.h"
00034 #include "relReaderWriter.h"
00035 
00036 #include <fstream>
00037 #include <sstream>
00038 #include <cstdlib>
00039 #include <cstdio>
00040 #include <cmath>
00041 #include <csignal>
00042 #include <unistd.h>
00043 #include <time.h>
00044 using namespace std;
00045 
00047 extern int yyparse(void);
00048 
00050 yyFlexLexer               gScanner;
00051 int                       gNoParseErrs       = 0;
00052 bddSymTab*                gSymTab            = NULL;  // For parser+relExpression.
00053 relStatement*             gSyntaxTree        = NULL;
00054 map<string, relDataType*> gVariables;
00055 bool                      gPrintWarnings     = true;
00056 
00058 const unsigned gRSFLineLength   = 100000; // Default: 100000 chars per RSF line max.
00059 const char*    gRSFDelimiter    = " \t\f\r\v\n";
00060 char*          gReadPos         =   NULL; // Reading position for input data.
00061 const unsigned gAttributeNum    =   1000; // Default:   1000 internal attributes.
00062 const char     gAttributePrefix =    '.'; // Prefix for internal attributes.
00063 set<string>    gValueUniverse;
00064 
00066 clock_t gStart = 0;
00067 
00069 char**  gArgv;
00070 int     gArgc;
00071 
00072 
00074 double 
00075 string2double(string pStr)
00076 {
00077   return atof(pStr.c_str());
00078 }
00079 
00081 string 
00082 double2string(double pNum)
00083 {
00084   string result;
00085   stringstream lStringStream;
00086   lStringStream << pNum;
00087   lStringStream >> result;
00088   return result;
00089 }
00090 
00092 string 
00093 unsigned2string(unsigned pUnsigned)
00094 {
00095   string result;
00096   stringstream lStringStream;
00097   lStringStream << pUnsigned;
00098   lStringStream >> result;
00099   return result;
00100 }
00101 
00104 double elapsed() {
00105   clock_t lNow = clock();
00106   double lElapsed = ((double) (lNow - gStart)) / CLOCKS_PER_SEC;
00107   gStart = lNow;
00108   return lElapsed;
00109 }
00110 
00114 void 
00115 signal_handler(int pSignal)
00116 {
00117   cerr << "\nSignal 'program interrupt' (SIGINT) received." << endl
00118        << "Terminating the program." << endl;
00119   exit(EXIT_FAILURE);
00120 }
00121 
00126 bool
00127 readValue(string& pValue, int pLineNo)
00128 {
00129   pValue = "";
00130   char* lEntry = NULL;
00131   bool inString = false;   // 'true' if we have seen the opening '"'.
00132   do {
00133     lEntry = strtok(gReadPos, gRSFDelimiter); // Read entry.
00134     gReadPos = NULL;                          // Use stored position next time.
00135     if (lEntry == NULL) {                     // No more values at this line.
00136       if (inString && gPrintWarnings) {
00137         cerr << "RSF reader warning at line " << pLineNo
00138              << ": Closing double quote for string missing." << endl;
00139       }
00140       return false;
00141     }
00142 
00143     if (inString) {
00144       pValue += ' ' + string(lEntry);         // Append.
00145     } else {
00146       pValue = string(lEntry);                // New.
00147     }
00148     if (inString) {
00149       // If the last char is the double quote.
00150       if (pValue[pValue.length()-1] == '"') {
00151         if (pValue[pValue.length()-2] != '\\') {
00152           inString = false;                     // Finish string scanning.
00153         }
00154       }
00155     } else {
00156       // If the first char is the double quote, and
00157       //   if size > 1  then  last char is not the double quote.
00158       if ( pValue[0] == '"' &&
00159            (!(pValue.size() > 1) || pValue[pValue.length()-1] != '"') ) {
00160         inString = true;                      // Start string scanning.
00161       } 
00162     }
00163   } while (inString);
00164   return true;
00165 }
00166 
00169 void
00170 file2vector(istream& pDataInStream, vector< vector<string> >& pRelationVector)
00171 {
00172   char* lLine = new char[gRSFLineLength];
00173   while (pDataInStream.good()) {
00174     pDataInStream.getline(lLine, gRSFLineLength);
00175     const int lLineLength = strlen(lLine);
00176     if (lLine[0] == '.') {                      // '.' found -> EOF.
00177       delete lLine;
00178       return;
00179     }
00180     vector<string> lRow;
00181     if (lLine[0] == '#') {                      // '#' found -> comment.
00182       pRelationVector.push_back(lRow);          // Add empty row.
00183       continue;
00184     }
00185     if (lLineLength == gRSFLineLength-1) {
00186       cerr << "RSF reader error at line " << pRelationVector.size() + 1 
00187            << ": Maximum line length exceeded." << endl;
00188       exit(EXIT_FAILURE);
00189     }
00190     gReadPos = lLine;
00191     string lValue;
00192     while (readValue(lValue, pRelationVector.size() + 1))
00193     {
00194       if (lValue[0] == '"'  &&  lValue[lValue.length()-1] == '"') {
00195         // Cut the quotes ('"') at begin and end of the string.
00196         lValue = string(lValue, 1, lValue.length() - 2);
00197         // Remember that it was quoted (for output of RSF relations).
00198         gSymTab->setQuoted(lValue);
00199       }
00200       if (lRow.size() == 0) {
00201         // (lRow.size() == 0) -> (lValue) is the name of the relation.
00202         map<string, relDataType*>::const_iterator 
00203           lVarIt = gVariables.find(lValue);
00204         if (lVarIt == gVariables.end()) {  // New relation.
00205           gVariables[lValue] = new bddRelation(gSymTab, false);
00206         }
00207       } else {
00208         // (lRow.size() > 0) -> Add value to value universe.
00209         gValueUniverse.insert(lValue);
00210       }
00211       // Add value to row vector.
00212       lRow.push_back(lValue);
00213     }
00214     pRelationVector.push_back(lRow);     // Add empty rows, too.
00215   }
00216   delete lLine;
00217   return;
00218 }
00219 
00227 void 
00228 createBddRelation(vector< vector<string> >& pRelation)
00229 {
00230   // Insert relation tuples to BDD.
00231   for( unsigned i = 0; i < pRelation.size(); ++i )
00232   {
00233     if( pRelation[i].size() != 0 ) {    // Skip empty rows.
00234       // Find the BDD for the current relation variable with name (*it)[0].
00235       // Unite all tuples to the current relation.
00236       string lVar = pRelation[i][0];
00237       map<string, relDataType*>::const_iterator lVarIt = gVariables.find(lVar);
00238       assert(lVarIt != gVariables.end());  // Must be declared.
00239       assert(lVarIt->second != NULL);
00240       bddRelation* lResult = dynamic_cast<bddRelation*>(lVarIt->second);
00241       if (lResult == NULL) {
00242         cerr << "RSF reader error at line " << i+1 << ":" << endl
00243              << "'" << lVarIt->first 
00244              << "' is a predefined standard variable." << endl;
00245         exit(EXIT_FAILURE);
00246       }
00247 
00248       // Some type checking: Track and check arity.
00249       int lArity = pRelation[i].size()-1;
00250       if (lResult->mArity == -1) {
00251         lResult->mArity = lArity;
00252       } else {
00253         if (lResult->mArity != lArity) {
00254           if (gPrintWarnings) {
00255             cerr << "RSF reader warning at line " << i+1 << ": Arity mismatch." << endl
00256                  << "'" << lVar << "' was initialized with arity " << lResult->mArity 
00257                  << " but now gets a tuple of arity " << lArity << "." << endl;
00258           }
00259         }
00260       }
00261 
00262       // Build the tuple-BDD for one row of the input file.
00263       bddRelation bddTuple(gSymTab, true);
00264       // First position of the tuple is the relation name, don't include.
00265       for( unsigned j = 1; j < pRelation[i].size(); ++j)
00266       {
00267         if( j > gAttributeNum ) {
00268           cerr << "RSF reader error at line " << i+1 
00269                << ": Maximum arity (" << gAttributeNum << ") exceeded." << endl;
00270           exit(EXIT_FAILURE);
00271         }
00272         bddTuple.intersect( 
00273                            bddRelation::mkAttributeValue(gSymTab,
00274                                                          gAttributePrefix + unsigned2string(j-1),
00275                                                          pRelation[i][j] )
00276                            );
00277       }
00278       lResult->unite(bddTuple);
00279     }
00280   }
00281 }
00282 
00284 #define STRINGIFY(x) #x
00285 #define EXPAND(x) STRINGIFY(x)
00286 void
00287 printVersion(void) 
00288 {
00289   cout << "CrocoPat 2.1.4, 2008-02-15 "
00290        << "(Rev " << EXPAND(REVISION) << ", Build " << EXPAND(BUILDTIME) << ")." << endl
00291        << "Copyright (C) 2002-2008  Dirk Beyer (Simon Fraser University)." << endl
00292        << "CrocoPat is free software, released under the GNU LGPL." << endl;
00293 }
00294 
00297 void
00298 printHelp(void) 
00299 {
00300   cout << endl
00301        << "This is CrocoPat, a tool for relational programming." << endl
00302        << endl
00303        << "Usage: crocopat [OPTION]... FILE [ARGUMENT]..." << endl
00304        << "Execute RML (Relation Manipulation Language) program FILE." << endl
00305        << "ARGUMENTs are passed to the RML program." << endl
00306        << "Options:" << endl
00307        << "  -e           do not read RSF data from stdin." << endl
00308        << "  -h           display this help message and exit." << endl
00309        << "  -m NUMBER    approximate memory for BDD package in MB (default 50)." << endl
00310        << "  -q           quiet mode, supress warnings." << endl
00311        << "  -v           print version information and exit." << endl
00312        << endl
00313        << "Input data are read from stdin, unless option -e is given." << endl
00314        << endl
00315        << "http://www.cs.sfu.ca/~dbeyer/CrocoPat/" << endl 
00316        << endl
00317        << "Report bugs to Dirk Beyer." << endl
00318        << endl;
00319 }
00320 
00323 int 
00324 crocopat(int argc, char *argv []) 
00325 {
00326   // Time measurement.
00327   gStart = clock();
00328 
00329   // Initialize input stream. Default: stdin.
00330   istream* gDataInStream = &cin;
00331 
00332   // Initial value for BDD pkg size.
00333   int gBddPkgSizeMB = 50;   // Default: 50 MB. Changed by cmd line option.
00334 
00335   // Handle command line options.
00336   int c;
00337   while ( (c = getopt(argc, argv, "ehm:qv")) != -1 ) {
00338     switch (c) {
00339     case 'e':
00340       // No input data.
00341       gDataInStream = NULL;
00342       break;
00343     case 'h':
00344       printHelp();
00345       exit(EXIT_SUCCESS);
00346     case 'm':
00347       // Memory size for BDD package.
00348       gBddPkgSizeMB = atoi(optarg);
00349       assert(gBddPkgSizeMB > 0);
00350       break;
00351     case 'q':   
00352       gPrintWarnings = false;
00353       break;
00354     case 'v':   
00355       printVersion();
00356       exit(EXIT_SUCCESS);
00357     }
00358   }
00359 
00360   
00361   // If no program file is given, print help text.
00362   if (optind >= argc) {
00363     printHelp();
00364     exit(EXIT_SUCCESS);
00365   }
00366   // Initialize program input stream.
00367   ifstream lProgramStream(argv[optind], ios::in);
00368   istream* gProgramStream = &lProgramStream;
00369   if ( !lProgramStream.good() ) {
00370     cerr << "Error: Cannot open program file '" << argv[optind] << "'." << endl;
00371     exit(EXIT_FAILURE);
00372   }
00373   
00374   // Ignore the rest of the command line now,
00375   //   may be the arguments are used by the program.
00376   // User should access argv[optind+1] by number 1.
00377   // Initialize global variables for access by the interpreter.
00378   gArgv = argv + optind;
00379   gArgc = argc - optind;
00380 
00381   // Install the signal handler for keyboard interrupt (C-c).
00382   signal(SIGINT, &signal_handler);
00383 
00384   // Initialize symbol table with (gAttributeNum) internal attributes.
00385   {
00386     gSymTab = new bddSymTab();
00387     // Add internal variables to symtab.
00388     //   '.X0', ..., '.Xn'.
00389     for( unsigned i = 0; i < gAttributeNum; ++i) {
00390       gSymTab->addAttribute( gAttributePrefix + unsigned2string(i) ); 
00391     }
00392   }
00393   // Initialize BDD package.
00394   {
00395     // Default: ca. 2000000 BDD nodes (50 MB);
00396     //unsigned lNrNodes  = gBddPkgSizeMB * (1024 * 1024) / 36;
00397     unsigned lNrNodes  = gBddPkgSizeMB * 30000;
00398     unsigned lHashSize = (unsigned) ( log((float)lNrNodes) / log(2.0) );
00399     // Initialize BDD package.
00400     bddBdd::init(lNrNodes, lHashSize, lHashSize, lHashSize - 4);
00401   }
00402   // Declare predefined (internal or constant) variables.
00403   {
00404     // Variables for exit status and number of arguments.
00405     gVariables["exitStatus"]  = new relNumber(0);
00406     gVariables["argCount"]    = new relNumber(gArgc);
00407 
00408     // Declare predefined relations. Constant, cannot be changed by user.
00409     gVariables["TRUE"]  = new bddRelationConst(bddRelation(gSymTab, true));
00410     gVariables["FALSE"] = new bddRelationConst(bddRelation(gSymTab, false));
00411     // These are initialized later.
00412     gVariables["="]     = new bddRelationConst(bddRelation(gSymTab, false));
00413     gVariables["!="]    = new bddRelationConst(bddRelation(gSymTab, false));
00414     gVariables["<"]     = new bddRelationConst(bddRelation(gSymTab, false));
00415     gVariables["<="]    = new bddRelationConst(bddRelation(gSymTab, false));
00416     gVariables[">"]     = new bddRelationConst(bddRelation(gSymTab, false));
00417     gVariables[">="]    = new bddRelationConst(bddRelation(gSymTab, false));
00418   }
00419   
00420 
00421   {
00422     vector< vector<string> > lRelationVector;
00423 
00424     // Read relation from data input stream.
00425     if (gDataInStream != NULL) {  // NULL pointer means don't read any input data.
00426       file2vector(*gDataInStream, lRelationVector);
00427     }
00428 
00429 
00430     // Read syntax tree (program parser).
00431     {
00432       // Initialize scanner input.
00433       gScanner.yyrestart( gProgramStream );
00434       // Parse error?
00435       if( yyparse() || gNoParseErrs > 0) {
00436         exit(EXIT_FAILURE);
00437       }
00438       lProgramStream.close();
00439     }
00440     // Now the syntax tree is contained in 'gSyntaxTree'.
00441 
00442 
00443     // Now we prepare the symbol table to support real BDD operations.
00444     gSymTab->initValueUniverse(gValueUniverse);
00445 
00446     // Transform relation from vector to BDD representation.
00447     createBddRelation(lRelationVector);
00448     // Now all relations are assigned to relational variables.
00449   }
00450 
00451 
00452   // Initialize predefined constant relations.
00453   {
00454     delete gVariables["="];
00455     delete gVariables["!="];
00456     delete gVariables["<"];
00457     delete gVariables["<="];
00458     delete gVariables[">"];
00459     delete gVariables[">="];
00460 
00461     bddRelation
00462     lRelEqual = bddRelation::mkEqual(gSymTab, 
00463                                      gAttributePrefix + unsigned2string(0),
00464                                      gAttributePrefix + unsigned2string(1));
00465     gVariables["="]     = new bddRelationConst(lRelEqual);
00466 
00467     bddRelation 
00468     lRelTmp = lRelEqual;
00469     lRelTmp.complement();
00470     gVariables["!="]     = new bddRelationConst(lRelTmp);
00471 
00472     bddRelation lRelLess  = bddRelation::mkLess(gSymTab, 
00473                                                 gAttributePrefix + unsigned2string(0),
00474                                                 gAttributePrefix + unsigned2string(1));
00475     gVariables["<"]     = new bddRelationConst(lRelLess);
00476 
00477     lRelTmp = lRelLess;
00478     lRelTmp.unite(lRelEqual);
00479     gVariables["<="]    = new bddRelationConst(lRelTmp);
00480 
00481     lRelTmp.complement();
00482     gVariables[">"]     = new bddRelationConst(lRelTmp);
00483 
00484     lRelTmp.unite(lRelEqual);
00485     gVariables[">="]    = new bddRelationConst(lRelTmp);
00486   }
00487 
00488   // Interpret syntax tree.
00489   gSyntaxTree->interpret(gSymTab);
00490 
00491 
00492   // Free memory.
00493   {
00494     // Free relation variables.
00495     for( map<string, relDataType*>::iterator 
00496            lIt = gVariables.begin();
00497          lIt != gVariables.end();
00498          ++lIt)
00499     {
00500       delete lIt->second;
00501     }
00502     gVariables.clear();
00503     
00504     // Free syntax tree.
00505     delete gSyntaxTree;
00506     
00507     // Uninitialize BDD storage.
00508     bddBdd::done();
00509 
00510     // Free symbol table.
00511     delete gSymTab;
00512   }
00513 
00514   // For controlling deallocation.
00515   if( relObject::GetCount() != 0 )
00516   {
00517     cerr << "######################################################" << endl
00518          << "Internal error: We still have " << relObject::GetCount()
00519          << " relObjects after deallocation (should be 0)." << endl;
00520   }
00521   
00522   return(EXIT_SUCCESS);
00523 };
00524 
00526   

Generated on Fri Jun 6 22:21:09 2008 for CrocoPat by  doxygen 1.5.1