Main Page | Namespace List | Class Hierarchy | Alphabetical List | Compound List | File List | Compound Members | File Members

tr-stat.cc

Go to the documentation of this file.
00001 // Generate statistics from UCB traces
00002 // All we need to know: 
00003 // 
00004 // (1) client request streams: 
00005 //     <time> <clientID> <serverID> <URL_ID> 
00006 // (2) server page mod stream(s):
00007 //     <serverID> <URL_ID> <PageSize>
00008 //
00009 // Part of the code comes from Steven Gribble's UCB trace parse codes
00010 // 
00011 // $Header: /nfs/jade/vint/CVSROOT/ns-2/indep-utils/webtrace-conv/dec/tr-stat.cc,v 1.2 1999/07/09 21:19:04 haoboy Exp $
00012 
00013 #include <stdio.h>
00014 #include <stdlib.h>
00015 #include <string.h>
00016 #include <ctype.h>
00017 #include <time.h>
00018 #include <sys/types.h>
00019 #include <sys/socket.h>
00020 #include <netinet/in.h>
00021 #include <arpa/inet.h>
00022 #include <tcl.h>
00023 
00024 #include "proxytrace.h"
00025 
00026 FILE *cf, *sf;
00027 double initTime = -1;
00028 double duration = -1;
00029 double startTime = -1;
00030 
00031 Tcl_HashTable cidHash;  // Client id (IP, port) hash
00032 static int client = 0;  // client sequence number
00033 
00034 Tcl_HashTable sidHash;  // server id (IP, port) hash
00035 static int server = 0;  // server sequence number
00036 
00037 Tcl_HashTable urlHash;  // URL id hash
00038 static int url = 0;     // URL sequence number
00039 static int* umap;       // URL mapping table, used for url sort
00040 
00041 ReqLog* rlog = NULL;
00042 unsigned int num_rlog = 0, sz_rlog = 0;
00043 
00044 static int compare(const void *a1, const void *b1)
00045 {
00046         const ReqLog *a = (const ReqLog*)a1, *b = (const ReqLog*)b1;
00047         return (a->time > b->time) ? 1 : 
00048                 (a->time == b->time) ? 0 : -1;
00049 }
00050 
00051 void sort_rlog()
00052 {
00053         qsort((void *)rlog, num_rlog, sizeof(ReqLog), compare);
00054         double t = rlog[0].time;
00055         for (unsigned int i = 0; i < num_rlog; i++) {
00056                 rlog[i].time -= t;
00057                 fprintf(cf, "%f %d %d %d\n", rlog[i].time, 
00058                         rlog[i].cid, rlog[i].sid, umap[rlog[i].url]);
00059         }
00060         // Record trace duration and # of unique urls
00061         fprintf(cf, "i %f %u\n", rlog[num_rlog-1].time, url);
00062 
00063         fprintf(stderr, 
00064                 "%d unique clients, %d unique servers, %d unique urls.\n", 
00065                 client, server, url);
00066 }
00067 
00068 static int compare_url(const void* a1, const void* b1)
00069 {
00070         const URL **a = (const URL**)a1, **b = (const URL**)b1;
00071         return ((*a)->access > (*b)->access) ? -1:
00072                 ((*a)->access == (*b)->access) ? 0 : 1;
00073 }
00074 
00075 void sort_url()
00076 {
00077         // XXX use an interval member of Tcl_HashTable
00078         URL** tbl = new URL*[urlHash.numEntries];
00079         Tcl_HashEntry *he;
00080         Tcl_HashSearch hs;
00081         int i = 0, sz = urlHash.numEntries;
00082         for (he = Tcl_FirstHashEntry(&urlHash, &hs);
00083              he != NULL;
00084              he = Tcl_NextHashEntry(&hs))
00085                 tbl[i++] = (URL*)Tcl_GetHashValue(he);
00086         Tcl_DeleteHashTable(&urlHash);
00087 
00088         // sort using access frequencies
00089         qsort((void *)tbl, sz, sizeof(URL*), compare_url);
00090         umap = new int[url];
00091         // write sorted url to page table
00092         for (i = 0; i < sz; i++) {
00093                 umap[tbl[i]->id] = i;
00094                 fprintf(sf, "%d %d %d %u\n", tbl[i]->sid, i,
00095                         tbl[i]->size, tbl[i]->access);
00096                 delete tbl[i];
00097         }
00098         delete []tbl;
00099 }
00100 
00101 const unsigned long MAX_FILESIZE = 10000000;
00102 
00103 double lf_analyze(TEntry& lfe)
00104 {
00105         double time;
00106         int ne, cid, sid, uid;
00107         Tcl_HashEntry *he;
00108 
00109         // Filter out entries with 'post', 'head' etc. only keep 'get'
00110         // Also filter out 
00111         if (lfe.tail.method != METHOD_GET)
00112                 return -1;
00113         if ((lfe.tail.flags & QUERY_FOUND_FLAG) || 
00114             (lfe.tail.flags & CGI_BIN_FLAG))
00115                 return -1;
00116         if ((lfe.tail.status != 200) && (lfe.tail.status != 304))
00117                 return -1;
00118 
00119         // We don't consider pages with size 0
00120         if (lfe.head.size == 0)
00121                 return -1;
00122         // We don't consider file size larger than 10MB
00123         if (lfe.head.size > MAX_FILESIZE)
00124                 return -1;
00125 
00126         time = (double)lfe.head.time_sec + (double)lfe.head.time_usec/(double)1000000.0;
00127 
00128         if (initTime < 0) {
00129                 initTime = time;
00130                 time = 0;
00131         } else 
00132                 time -= initTime;
00133 
00134         // If a trace start time is required, don't do anything
00135         if ((startTime > 0) && (time < startTime)) 
00136                 return -1;
00137 
00138         // check client id
00139         if (!(he = Tcl_FindHashEntry(&cidHash, (const char *)lfe.head.client))) {
00140                 // new client, allocate a client id
00141                 he = Tcl_CreateHashEntry(&cidHash, (const char *)lfe.head.client, &ne);
00142                 Tcl_SetHashValue(he, ++client);
00143                 cid = client;
00144         } else {
00145                 // existing entry, find its client seqno
00146                 cid = (int)Tcl_GetHashValue(he);
00147         }
00148 
00149         // check server id
00150         if (!(he = Tcl_FindHashEntry(&sidHash, (const char *)lfe.head.server))) {
00151                 // new client, allocate a client id
00152                 he = Tcl_CreateHashEntry(&sidHash, (const char *)lfe.head.server, &ne);
00153                 Tcl_SetHashValue(he, ++server);
00154                 sid = server;
00155         } else {
00156                 // existing entry, find its client seqno
00157                 sid = (int)Tcl_GetHashValue(he);
00158         }
00159 
00160         // check url id
00161         if (!(he = Tcl_FindHashEntry(&urlHash, (const char*)lfe.url))) {
00162                 // new client, allocate a client id
00163                 he = Tcl_CreateHashEntry(&urlHash, (const char*)lfe.url, &ne);
00164                 URL* u = new URL(++url, sid, lfe.head.size);
00165                 Tcl_SetHashValue(he, (const char*)u);
00166                 uid = u->id;
00167         } else {
00168                 // existing entry, find its client seqno
00169                 URL* u = (URL*)Tcl_GetHashValue(he);
00170                 u->access++;
00171                 uid = u->id;
00172         }
00173 
00174         rlog[num_rlog++] = ReqLog(time, cid, sid, uid);
00175         //fprintf(cf, "%f %d %d %d\n", time, cid, sid, uid);
00176 
00177         if (startTime > 0) 
00178                 return time - startTime;
00179         else 
00180                 return time;
00181 }

Generated on Tue Apr 20 12:14:37 2004 for NS2.26SourcesOriginal by doxygen 1.3.3