Main Page | Namespace List | Class Hierarchy | Alphabetical List | Compound List | File List | Compound Members | File Members

tr-stat.cc

Go to the documentation of this file.
00001 // Generate statistics from UCB traces
00002 // All we need to know: 
00003 // 
00004 // (1) client request streams: 
00005 //     <time> <clientID> <serverID> <URL_ID> 
00006 // (2) server page mod stream(s):
00007 //     <serverID> <URL_ID> <PageSize> <access times>
00008 //
00009 // Part of the code comes from Steven Gribble's UCB trace parse codes
00010 // 
00011 // $Header: /nfs/jade/vint/CVSROOT/ns-2/indep-utils/webtrace-conv/nlanr/tr-stat.cc,v 1.2 1999/07/09 21:19:08 haoboy Exp $
00012 
00013 #include <stdio.h>
00014 #include <stdlib.h>
00015 #include <string.h>
00016 #include <ctype.h>
00017 #include <time.h>
00018 #include <sys/types.h>
00019 #include <sys/socket.h>
00020 #include <netinet/in.h>
00021 #include <arpa/inet.h>
00022 #include <tcl.h>
00023 
00024 #include "logparse.h"
00025 
00026 Tcl_HashTable cidHash;  // Client id (IP, port) hash
00027 int client = 0;         // client sequence number
00028 
00029 Tcl_HashTable sidHash;  // server id (IP, port) hash
00030 int server = 0;         // server sequence number
00031 
00032 Tcl_HashTable urlHash;  // URL id hash
00033 int url = 0;            // URL sequence number
00034 int* umap;              // URL mapping table, used for url sort
00035 struct URL {
00036         URL(int i, int sd, int sz) : access(1), id(i), sid(sd), size(sz) {}
00037         int access;     // access counts
00038         int id;
00039         int sid, size;
00040 };
00041 
00042 FILE *cf, *sf;
00043 double initTime = -1;
00044 double duration = -1;
00045 double startTime = -1;
00046 
00047 struct ReqLog {
00048         ReqLog() {}
00049         ReqLog(double t, unsigned int c, unsigned int s, unsigned int u) :
00050                 time(t), cid(c), sid(s), url(u) {}
00051         double time;
00052         unsigned int cid, sid, url;
00053 };
00054 ReqLog* rlog = NULL;
00055 unsigned int num_rlog = 0, sz_rlog = 0;
00056 
00057 int compare(const void *a1, const void *b1)
00058 {
00059         const ReqLog *a = (const ReqLog*)a1, *b = (const ReqLog*)b1;
00060         return (a->time > b->time) ? 1 : 
00061                 (a->time == b->time) ? 0 : -1;
00062 }
00063 
00064 void sort_rlog()
00065 {
00066         qsort((void *)rlog, num_rlog, sizeof(ReqLog), compare);
00067         double t = rlog[0].time;
00068         for (unsigned int i = 0; i < num_rlog; i++) {
00069                 rlog[i].time -= t;
00070                 fprintf(cf, "%f %d %d %d\n", rlog[i].time, 
00071                         rlog[i].cid, rlog[i].sid, umap[rlog[i].url]);
00072         }
00073         delete []umap;
00074         // Record trace duration and # of unique urls
00075         fprintf(cf, "i %f %u\n", rlog[num_rlog-1].time, url);
00076 }
00077 
00078 int compare_url(const void* a1, const void* b1)
00079 {
00080         const URL **a = (const URL**)a1, **b = (const URL**)b1;
00081         return ((*a)->access > (*b)->access) ? -1:
00082                 ((*a)->access == (*b)->access) ? 0 : 1;
00083 }
00084 
00085 void sort_url()
00086 {
00087         // XXX use an interval member of Tcl_HashTable
00088         URL** tbl = new URL*[urlHash.numEntries];
00089         Tcl_HashEntry *he;
00090         Tcl_HashSearch hs;
00091         int i = 0, sz = urlHash.numEntries;
00092         for (he = Tcl_FirstHashEntry(&urlHash, &hs);
00093              he != NULL;
00094              he = Tcl_NextHashEntry(&hs))
00095                 tbl[i++] = (URL*)Tcl_GetHashValue(he);
00096         Tcl_DeleteHashTable(&urlHash);
00097 
00098         // sort using access frequencies
00099         qsort((void *)tbl, sz, sizeof(URL*), compare_url);
00100         umap = new int[url];
00101         // write sorted url to page table
00102         for (i = 0; i < sz; i++) {
00103                 umap[tbl[i]->id] = i;
00104                 fprintf(sf, "%d %d %d %u\n", tbl[i]->sid, i,
00105                         tbl[i]->size, tbl[i]->access);
00106                 delete tbl[i];
00107         }
00108         delete []tbl;
00109 }
00110 
00111 double lf_analyze(lf_entry& lfe)
00112 {
00113         double time;
00114         int ne, cid, sid, uid;
00115         Tcl_HashEntry *he;
00116 
00117         time = lfe.rt;
00118 
00119         if (initTime < 0) {
00120                 initTime = time;
00121                 time = 0;
00122         } else 
00123                 time -= initTime;
00124 
00125         // If a trace start time is required, don't do anything
00126         if ((startTime > 0) && (time < startTime)) 
00127                 return -1;
00128 
00129         // Ignore pages with size 0
00130         if (lfe.size == 0) 
00131                 return -1;
00132 
00133         // check client id
00134         if (!(he = Tcl_FindHashEntry(&cidHash, (const char *)lfe.cid))) {
00135                 // new client, allocate a client id
00136                 he = Tcl_CreateHashEntry(&cidHash, (const char *)lfe.cid, &ne);
00137                 Tcl_SetHashValue(he, ++client);
00138                 cid = client;
00139         } else {
00140                 // existing entry, find its client seqno
00141                 cid = (int)Tcl_GetHashValue(he);
00142         }
00143 
00144         // check server id
00145         if (!(he = Tcl_FindHashEntry(&sidHash, lfe.sid))) {
00146                 // new server, assign a server id
00147                 he = Tcl_CreateHashEntry(&sidHash, lfe.sid, &ne);
00148                 Tcl_SetHashValue(he, ++server);
00149                 sid = server;
00150         } else {
00151                 // existing entry, find its client seqno
00152                 sid = (int)Tcl_GetHashValue(he);
00153         }
00154 
00155         // check url id
00156         if (!(he = Tcl_FindHashEntry(&urlHash, lfe.url))) {
00157                 // new client, allocate a client id
00158                 he = Tcl_CreateHashEntry(&urlHash, lfe.url, &ne);
00159                 URL* u = new URL(++url, sid, lfe.size);
00160                 Tcl_SetHashValue(he, (const char*)u);
00161                 uid = u->id;
00162                 //fprintf(sf, "%d %d %ld\n", sid, u->id, lfe.rhl+lfe.rdl);
00163         } else {
00164                 // existing entry, find its client seqno
00165                 URL* u = (URL*)Tcl_GetHashValue(he);
00166                 u->access++;
00167                 uid = u->id;
00168         }
00169 
00170         rlog[num_rlog++] = ReqLog(time, cid, sid, uid);
00171         //fprintf(cf, "%f %d %d %d\n", time, cid, sid, uid);
00172 
00173         if (startTime > 0) 
00174                 return time - startTime;
00175         else 
00176                 return time;
00177 }
00178 
00179 int main(int argc, char**argv)
00180 {
00181         lf_entry lfntree;
00182         int      ret;
00183         double   ctime;
00184 
00185         // Init tcl
00186         Tcl_Interp *interp = Tcl_CreateInterp();
00187         if (Tcl_Init(interp) == TCL_ERROR) {
00188                 printf("%s\n", interp->result);
00189                 abort();
00190         }
00191         Tcl_InitHashTable(&cidHash, TCL_ONE_WORD_KEYS);
00192         Tcl_InitHashTable(&sidHash, TCL_STRING_KEYS);
00193         Tcl_InitHashTable(&urlHash, TCL_STRING_KEYS);
00194 
00195         if ((cf = fopen("reqlog", "w")) == NULL) {
00196                 printf("cannot open request log.\n");
00197                 exit(1);
00198         }
00199         if ((sf = fopen("pglog", "w")) == NULL) {
00200                 printf("cannot open page log.\n");
00201                 exit(1);
00202         }
00203 
00204         if ((argc < 2) || (argc > 4)) {
00205                 printf("Usage: %s <trace size> [<time duration>] [<start_time>]\n", argv[0]);
00206                 return 1;
00207         }
00208         if (argc >= 3) {
00209                 duration = strtod(argv[2], NULL);
00210                 if (argc == 4) {
00211                         startTime = strtod(argv[3], NULL);
00212                         printf("start time = %f\n", startTime);
00213                 }
00214         }
00215 
00216         sz_rlog = strtoul(argv[1], NULL, 10);
00217         rlog = new ReqLog[sz_rlog];
00218 
00219         while(1) {
00220                 ret = lf_get_next_entry(stdin, lfntree);
00221                 if (ret > 0) {
00222                         if (ret == 1) {
00223                                 /* EOF */
00224                                 break;
00225                         }
00226                         fprintf(stderr, "Failed to get next entry.\n");
00227                         exit(1);
00228                 } else if (ret < 0) {
00229                         // Unusable entry, i.e., cache miss, cgi-bin, etc.
00230                         continue;
00231                 }
00232                 // Analyse one log entry
00233                 ctime = lf_analyze(lfntree);
00234                 delete []lfntree.url;
00235                 delete []lfntree.sid;
00236                 if ((duration > 0) && (ctime > duration))
00237                         break;
00238         }
00239         Tcl_DeleteHashTable(&cidHash);
00240         Tcl_DeleteHashTable(&sidHash);
00241 
00242         fprintf(stderr, "sort url\n");
00243         sort_url();
00244         fclose(sf);
00245 
00246         fprintf(stderr, "sort requests\n");
00247         sort_rlog();
00248         fclose(cf);
00249 
00250         fprintf(stderr, 
00251                 "%d unique clients, %d unique servers, %d unique urls.\n", 
00252                 client, server, url);
00253         return 0;
00254 }

Generated on Tue Apr 20 12:14:37 2004 for NS2.26SourcesOriginal by doxygen 1.3.3