00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include <stdio.h>
00014 #include <stdlib.h>
00015 #include <string.h>
00016 #include <ctype.h>
00017 #include <time.h>
00018 #include <sys/types.h>
00019 #include <sys/socket.h>
00020 #include <netinet/in.h>
00021 #include <arpa/inet.h>
00022 #include <tcl.h>
00023
00024 #include "logparse.h"
00025
00026 Tcl_HashTable cidHash;
00027 int client = 0;
00028
00029 Tcl_HashTable sidHash;
00030 int server = 0;
00031
00032 Tcl_HashTable urlHash;
00033 int url = 0;
00034 int* umap;
00035 struct URL {
00036 URL(int i, int sd, int sz) : access(1), id(i), sid(sd), size(sz) {}
00037 int access;
00038 int id;
00039 int sid, size;
00040 };
00041
00042 FILE *cf, *sf;
00043 double initTime = -1;
00044 double duration = -1;
00045 double startTime = -1;
00046
00047 struct ReqLog {
00048 ReqLog() {}
00049 ReqLog(double t, unsigned int c, unsigned int s, unsigned int u) :
00050 time(t), cid(c), sid(s), url(u) {}
00051 double time;
00052 unsigned int cid, sid, url;
00053 };
00054 ReqLog* rlog = NULL;
00055 unsigned int num_rlog = 0, sz_rlog = 0;
00056
00057 int compare(const void *a1, const void *b1)
00058 {
00059 const ReqLog *a = (const ReqLog*)a1, *b = (const ReqLog*)b1;
00060 return (a->time > b->time) ? 1 :
00061 (a->time == b->time) ? 0 : -1;
00062 }
00063
00064 void sort_rlog()
00065 {
00066 qsort((void *)rlog, num_rlog, sizeof(ReqLog), compare);
00067 double t = rlog[0].time;
00068 for (unsigned int i = 0; i < num_rlog; i++) {
00069 rlog[i].time -= t;
00070 fprintf(cf, "%f %d %d %d\n", rlog[i].time,
00071 rlog[i].cid, rlog[i].sid, umap[rlog[i].url]);
00072 }
00073 delete []umap;
00074
00075 fprintf(cf, "i %f %u\n", rlog[num_rlog-1].time, url);
00076 }
00077
00078 int compare_url(const void* a1, const void* b1)
00079 {
00080 const URL **a = (const URL**)a1, **b = (const URL**)b1;
00081 return ((*a)->access > (*b)->access) ? -1:
00082 ((*a)->access == (*b)->access) ? 0 : 1;
00083 }
00084
00085 void sort_url()
00086 {
00087
00088 URL** tbl = new URL*[urlHash.numEntries];
00089 Tcl_HashEntry *he;
00090 Tcl_HashSearch hs;
00091 int i = 0, sz = urlHash.numEntries;
00092 for (he = Tcl_FirstHashEntry(&urlHash, &hs);
00093 he != NULL;
00094 he = Tcl_NextHashEntry(&hs))
00095 tbl[i++] = (URL*)Tcl_GetHashValue(he);
00096 Tcl_DeleteHashTable(&urlHash);
00097
00098
00099 qsort((void *)tbl, sz, sizeof(URL*), compare_url);
00100 umap = new int[url];
00101
00102 for (i = 0; i < sz; i++) {
00103 umap[tbl[i]->id] = i;
00104 fprintf(sf, "%d %d %d %u\n", tbl[i]->sid, i,
00105 tbl[i]->size, tbl[i]->access);
00106 delete tbl[i];
00107 }
00108 delete []tbl;
00109 }
00110
00111 double lf_analyze(lf_entry& lfe)
00112 {
00113 double time;
00114 int ne, cid, sid, uid;
00115 Tcl_HashEntry *he;
00116
00117 time = lfe.rt;
00118
00119 if (initTime < 0) {
00120 initTime = time;
00121 time = 0;
00122 } else
00123 time -= initTime;
00124
00125
00126 if ((startTime > 0) && (time < startTime))
00127 return -1;
00128
00129
00130 if (lfe.size == 0)
00131 return -1;
00132
00133
00134 if (!(he = Tcl_FindHashEntry(&cidHash, (const char *)lfe.cid))) {
00135
00136 he = Tcl_CreateHashEntry(&cidHash, (const char *)lfe.cid, &ne);
00137 Tcl_SetHashValue(he, ++client);
00138 cid = client;
00139 } else {
00140
00141 cid = (int)Tcl_GetHashValue(he);
00142 }
00143
00144
00145 if (!(he = Tcl_FindHashEntry(&sidHash, lfe.sid))) {
00146
00147 he = Tcl_CreateHashEntry(&sidHash, lfe.sid, &ne);
00148 Tcl_SetHashValue(he, ++server);
00149 sid = server;
00150 } else {
00151
00152 sid = (int)Tcl_GetHashValue(he);
00153 }
00154
00155
00156 if (!(he = Tcl_FindHashEntry(&urlHash, lfe.url))) {
00157
00158 he = Tcl_CreateHashEntry(&urlHash, lfe.url, &ne);
00159 URL* u = new URL(++url, sid, lfe.size);
00160 Tcl_SetHashValue(he, (const char*)u);
00161 uid = u->id;
00162
00163 } else {
00164
00165 URL* u = (URL*)Tcl_GetHashValue(he);
00166 u->access++;
00167 uid = u->id;
00168 }
00169
00170 rlog[num_rlog++] = ReqLog(time, cid, sid, uid);
00171
00172
00173 if (startTime > 0)
00174 return time - startTime;
00175 else
00176 return time;
00177 }
00178
00179 int main(int argc, char**argv)
00180 {
00181 lf_entry lfntree;
00182 int ret;
00183 double ctime;
00184
00185
00186 Tcl_Interp *interp = Tcl_CreateInterp();
00187 if (Tcl_Init(interp) == TCL_ERROR) {
00188 printf("%s\n", interp->result);
00189 abort();
00190 }
00191 Tcl_InitHashTable(&cidHash, TCL_ONE_WORD_KEYS);
00192 Tcl_InitHashTable(&sidHash, TCL_STRING_KEYS);
00193 Tcl_InitHashTable(&urlHash, TCL_STRING_KEYS);
00194
00195 if ((cf = fopen("reqlog", "w")) == NULL) {
00196 printf("cannot open request log.\n");
00197 exit(1);
00198 }
00199 if ((sf = fopen("pglog", "w")) == NULL) {
00200 printf("cannot open page log.\n");
00201 exit(1);
00202 }
00203
00204 if ((argc < 2) || (argc > 4)) {
00205 printf("Usage: %s <trace size> [<time duration>] [<start_time>]\n", argv[0]);
00206 return 1;
00207 }
00208 if (argc >= 3) {
00209 duration = strtod(argv[2], NULL);
00210 if (argc == 4) {
00211 startTime = strtod(argv[3], NULL);
00212 printf("start time = %f\n", startTime);
00213 }
00214 }
00215
00216 sz_rlog = strtoul(argv[1], NULL, 10);
00217 rlog = new ReqLog[sz_rlog];
00218
00219 while(1) {
00220 ret = lf_get_next_entry(stdin, lfntree);
00221 if (ret > 0) {
00222 if (ret == 1) {
00223
00224 break;
00225 }
00226 fprintf(stderr, "Failed to get next entry.\n");
00227 exit(1);
00228 } else if (ret < 0) {
00229
00230 continue;
00231 }
00232
00233 ctime = lf_analyze(lfntree);
00234 delete []lfntree.url;
00235 delete []lfntree.sid;
00236 if ((duration > 0) && (ctime > duration))
00237 break;
00238 }
00239 Tcl_DeleteHashTable(&cidHash);
00240 Tcl_DeleteHashTable(&sidHash);
00241
00242 fprintf(stderr, "sort url\n");
00243 sort_url();
00244 fclose(sf);
00245
00246 fprintf(stderr, "sort requests\n");
00247 sort_rlog();
00248 fclose(cf);
00249
00250 fprintf(stderr,
00251 "%d unique clients, %d unique servers, %d unique urls.\n",
00252 client, server, url);
00253 return 0;
00254 }