00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include <stdio.h>
00014 #include <stdlib.h>
00015 #include <string.h>
00016 #include <ctype.h>
00017 #include <time.h>
00018 #include <sys/types.h>
00019 #include <sys/socket.h>
00020 #include <netinet/in.h>
00021 #include <arpa/inet.h>
00022 #include <tcl.h>
00023
00024 #include "proxytrace.h"
00025
00026 FILE *cf, *sf;
00027 double initTime = -1;
00028 double duration = -1;
00029 double startTime = -1;
00030
00031 Tcl_HashTable cidHash;
00032 static int client = 0;
00033
00034 Tcl_HashTable sidHash;
00035 static int server = 0;
00036
00037 Tcl_HashTable urlHash;
00038 static int url = 0;
00039 static int* umap;
00040
00041 ReqLog* rlog = NULL;
00042 unsigned int num_rlog = 0, sz_rlog = 0;
00043
00044 static int compare(const void *a1, const void *b1)
00045 {
00046 const ReqLog *a = (const ReqLog*)a1, *b = (const ReqLog*)b1;
00047 return (a->time > b->time) ? 1 :
00048 (a->time == b->time) ? 0 : -1;
00049 }
00050
00051 void sort_rlog()
00052 {
00053 qsort((void *)rlog, num_rlog, sizeof(ReqLog), compare);
00054 double t = rlog[0].time;
00055 for (unsigned int i = 0; i < num_rlog; i++) {
00056 rlog[i].time -= t;
00057 fprintf(cf, "%f %d %d %d\n", rlog[i].time,
00058 rlog[i].cid, rlog[i].sid, umap[rlog[i].url]);
00059 }
00060
00061 fprintf(cf, "i %f %u\n", rlog[num_rlog-1].time, url);
00062
00063 fprintf(stderr,
00064 "%d unique clients, %d unique servers, %d unique urls.\n",
00065 client, server, url);
00066 }
00067
00068 static int compare_url(const void* a1, const void* b1)
00069 {
00070 const URL **a = (const URL**)a1, **b = (const URL**)b1;
00071 return ((*a)->access > (*b)->access) ? -1:
00072 ((*a)->access == (*b)->access) ? 0 : 1;
00073 }
00074
00075 void sort_url()
00076 {
00077
00078 URL** tbl = new URL*[urlHash.numEntries];
00079 Tcl_HashEntry *he;
00080 Tcl_HashSearch hs;
00081 int i = 0, sz = urlHash.numEntries;
00082 for (he = Tcl_FirstHashEntry(&urlHash, &hs);
00083 he != NULL;
00084 he = Tcl_NextHashEntry(&hs))
00085 tbl[i++] = (URL*)Tcl_GetHashValue(he);
00086 Tcl_DeleteHashTable(&urlHash);
00087
00088
00089 qsort((void *)tbl, sz, sizeof(URL*), compare_url);
00090 umap = new int[url];
00091
00092 for (i = 0; i < sz; i++) {
00093 umap[tbl[i]->id] = i;
00094 fprintf(sf, "%d %d %d %u\n", tbl[i]->sid, i,
00095 tbl[i]->size, tbl[i]->access);
00096 delete tbl[i];
00097 }
00098 delete []tbl;
00099 }
00100
00101 const unsigned long MAX_FILESIZE = 10000000;
00102
00103 double lf_analyze(TEntry& lfe)
00104 {
00105 double time;
00106 int ne, cid, sid, uid;
00107 Tcl_HashEntry *he;
00108
00109
00110
00111 if (lfe.tail.method != METHOD_GET)
00112 return -1;
00113 if ((lfe.tail.flags & QUERY_FOUND_FLAG) ||
00114 (lfe.tail.flags & CGI_BIN_FLAG))
00115 return -1;
00116 if ((lfe.tail.status != 200) && (lfe.tail.status != 304))
00117 return -1;
00118
00119
00120 if (lfe.head.size == 0)
00121 return -1;
00122
00123 if (lfe.head.size > MAX_FILESIZE)
00124 return -1;
00125
00126 time = (double)lfe.head.time_sec + (double)lfe.head.time_usec/(double)1000000.0;
00127
00128 if (initTime < 0) {
00129 initTime = time;
00130 time = 0;
00131 } else
00132 time -= initTime;
00133
00134
00135 if ((startTime > 0) && (time < startTime))
00136 return -1;
00137
00138
00139 if (!(he = Tcl_FindHashEntry(&cidHash, (const char *)lfe.head.client))) {
00140
00141 he = Tcl_CreateHashEntry(&cidHash, (const char *)lfe.head.client, &ne);
00142 Tcl_SetHashValue(he, ++client);
00143 cid = client;
00144 } else {
00145
00146 cid = (int)Tcl_GetHashValue(he);
00147 }
00148
00149
00150 if (!(he = Tcl_FindHashEntry(&sidHash, (const char *)lfe.head.server))) {
00151
00152 he = Tcl_CreateHashEntry(&sidHash, (const char *)lfe.head.server, &ne);
00153 Tcl_SetHashValue(he, ++server);
00154 sid = server;
00155 } else {
00156
00157 sid = (int)Tcl_GetHashValue(he);
00158 }
00159
00160
00161 if (!(he = Tcl_FindHashEntry(&urlHash, (const char*)lfe.url))) {
00162
00163 he = Tcl_CreateHashEntry(&urlHash, (const char*)lfe.url, &ne);
00164 URL* u = new URL(++url, sid, lfe.head.size);
00165 Tcl_SetHashValue(he, (const char*)u);
00166 uid = u->id;
00167 } else {
00168
00169 URL* u = (URL*)Tcl_GetHashValue(he);
00170 u->access++;
00171 uid = u->id;
00172 }
00173
00174 rlog[num_rlog++] = ReqLog(time, cid, sid, uid);
00175
00176
00177 if (startTime > 0)
00178 return time - startTime;
00179 else
00180 return time;
00181 }