Main Page | Namespace List | Class Hierarchy | Alphabetical List | Compound List | File List | Compound Members | File Members

http_connect.c

Go to the documentation of this file.
00001 /*
00002 http_connect
00003 
00004 Copyright July 5, 2001, The University of North Carolina at Chapel Hill 
00005 
00006 All rights reserved.  No part of this software may be sold or
00007 distributed in any form or by any means without the prior written
00008 permission of the Department of Computer Science, University of North
00009 Carolina at Chapel Hill.  Distribution and use of this software is
00010 subject to the Software License Agreement incorporated in this
00011 software. By having, retaining or using a copy of this software, you
00012 agree to be subject to  the terms of the Software License Agreement.
00013 
00014 *******
00015 
00016 Software License Agreement
00017 
00018 Permission is given to copy http_connect, and its files (the Software)
00019 and to use them locally, as long as foregoing Copyright Notice is not
00020 removed and the Software name is retained unaltered.  By opening,
00021 possessing, retaining, using, or having a copy of  the Software, you
00022 are deemed to have agreed to the terms of this Software License
00023 Agreement.
00024 
00025 The Software is provided strictly on an "as is" basis without warranty
00026 of any kind.   Neither the University of North Carolina at Chapel
00027 Hill, its faculty, staff or students, nor anyone else who has been
00028 involved in the creation, production or delivery of the Software
00029 shall be liable for any direct, indirect, consequential or incidental
00030 damages arising out of the use or inability to use the Software even
00031 if such entities or persons may be advised of the possibility of such
00032 damages.
00033 
00034 No part of this software may be sold or distributed in any form or by
00035 any means without the prior written permission of the Department of
00036 Computer Science, University of North Carolina at Chapel Hill.  Your
00037 use of the Software is limited to non-commercial, not-for-profit uses
00038 and activities.  To secure permission to make any other use of the
00039 Software, you should contact the person named below.
00040 
00041 
00042 Contact person:
00043 
00044     Frank D. Smith, University of North Carolina at Chapel Hill
00045         email: smithfd@cs.unc.edu
00046         phone: 919-962-1884
00047         fax:   919-962-1799
00048 */
00049 
00050 /* 
00051    This program performs an analysis of tcpdump output (the ASCII print
00052    lines that tcpdump generates to stdout) and produces a summary of the
00053    TCP connections used for HTTP.  It assumes that the tcpdump has been
00054    filtered for packets that are from TCP source port 80 and that
00055    the result has been sorted so that packets are in ascending time
00056    order within each TCP connection.  The script given below will properly
00057    prepare the input for this program given a tcpdump binary file that 
00058    may contain more than just HTTP packets (the file extensions are
00059    just examples, the program does not make any assumptions about input
00060    file names).  Note that this filtering produces a UNI-DIRECTIONAL 
00061    trace containing only those TCP segments sent from the server to the
00062    client.
00063 
00064    The output is a file with summary records for each connection (basically
00065    the data data summarized into response and request lengths).  There is
00066    also a <file name>.log file with summary counts.
00067 
00068    To get usage information, invoke the program with the -h switch.
00069 
00070    WARNING! THIS PROGRAM DEPENDS ON THE FORMAT OF THE OUTPUT OF TCPDUMP
00071    AS PRINTED ON STDOUT (ASCII).  IT HAS BEEN TESTED ONLY ON THE 
00072    FREEBSD VERSION OF TCPDUMP CORRESPONDING TO RELEASES UP THROUGH 4.1
00073 
00074    KNOWN BUG: Does not process tcpdump output for ECN flags correctly.
00075 
00076 #! /bin/sh
00077 tcpdump -n -tt -r $1 tcp src port 80 > $1.http-srv
00078 sort -s -o $1.http-srv-sort +1 -2 +3 -4 +0 -1 -T /tmp $1.http-srv
00079 
00080 */
00081 
00082 #include <stdlib.h>
00083 #include <stdio.h>
00084 #include <math.h>
00085 #include <sys/time.h>
00086 
00087 #define min(a,b) ((a) <= (b) ? (a) : (b))
00088 #define max(a,b) ((a) >= (b) ? (a) : (b))
00089 
00090 int report_ACK_err = 1;
00091 
00092 void Usage(char *s)
00093 {
00094   fprintf (stderr,"\nUsage: %s\n", s);
00095   fprintf (stderr,"    [-w file_name] (name for output file)\n");
00096   fprintf (stderr,"    [-r file_name] (name for input file)\n");
00097   fprintf (stderr,"If either -w or -r is omitted, stdout(stdin) is used\n");
00098   fprintf (stderr,"\n");
00099   exit(-1);
00100 }
00101 
00102   FILE *dumpFP, *outFP;
00103   FILE *logFP;
00104 
00105   struct timeval recvTime;
00106   struct timeval lastTime = {0,0};
00107 
00108   char ts[20]; /* ASCII timestamp in tcpdump output */
00109                /* max. ssssssssss.mmmmmm characters + EOL */
00110   char sh[25]; /* ASCII source host.port in tcpdump output */
00111                /* max. hhh.hhh.hhh.hhh.ppppp + EOL */
00112   char gt[3];  /* ">" symbol in tcpdump output */
00113   char lt[3];  /* "<" symbol in tcpdump output */
00114   char dh[25]; /* ASCII destination host.port in tcpdump output */
00115                /* max. hhh.hhh.hhh.hhh.ppppp + EOL */
00116   char fl[5];  /* ASCII TCP flags field in tcpdump output */
00117   char p1[50]; /* ASCII first field after flags in tcpdump output */
00118   char p2[50]; /* ASCII second field after flags */
00119   char p3[50]; /* ASCII third field after flags */
00120 
00121 /* These are read from the tcpdump records */
00122   unsigned long begin_seq, end_seq, seq_bytes, new_ack;
00123   unsigned long current_synseq;
00124 
00125   int has_seq, has_ack;
00126 
00127 /* In TCP ACKs should be monotonically increasing so the length of the current
00128    request in a sequence can be computed as the difference between the
00129    ACK value marking the end of the previous request and the ACK value
00130    marking the end of the current request.  Unfortunately, out-of-order
00131    segments can cause ACKs to appear as if they "go backward".  For segments
00132    without data (ACK_ONLY), simply ignoring the "backward" ACK is fine. 
00133    In many cases where there is data along with a "backward" ACK,
00134    this is the result of (un-necessary) retransmission of segments
00135    containing data and those cases are handled properly.  In other cases,
00136    the results can be incorrect for HTTP 1.1 connections.  Each case of
00137    suspected ACK mis-ordering that might lead to erroneous request or
00138    response lengths is noted for off-line investigation.
00139    */
00140 
00141   unsigned long current_request_end, last_request_end;
00142 
00143 /* In TCP segments the sequence numbers we see may NOT be monotonically
00144    increasing (retransmission, out-of-order).  Instead we record the 
00145    largest value seen at any point.  Then the length of the current
00146    response in a sequence can be computed as the difference between the
00147    (largest) sequence at the end of the previous response and the
00148    (largest) sequence at the end of the current response.  */
00149 
00150   unsigned long current_response_end, last_response_end;
00151 
00152 /* Various counters for logging connection summary information */
00153 
00154   int syn_count = 0; /* connections beginning with SYN in trace */
00155   int req_count = 0; /* number of identified requests */
00156   int rsp_count = 0; /* number of identified responses */
00157   int fin_count = 0; /* connections ending with FIN in trace */
00158   int rst_count = 0; /* connections ending with Reset (no FIN) */
00159   int trm_count = 0; /* connections ending with no Reset or FIN */
00160   int err_count = 0; /* connections with at least one suspected error */
00161   int act_req_count = 0; /* connections beginning in a request */
00162   int act_rsp_count = 0; /* connections beginning in a response */
00163   int pending_fin_count = 0; /* partial with only FIN(s) */
00164   int pending_rst_count = 0; /* partial with only Reset(s) */
00165   int pending_ack_count = 0; /* partial with only ACK(s) */
00166   int pending_oth_count = 0; /* partial -- others */
00167   int pending_cmb_count = 0; /* partials with FIN, Reset, ACK combinations */
00168 
00169 /* Various counters for events within a single connection */
00170   /* the following apply only when state == PENDING; see pending_xxx_counts
00171      above for explanations */
00172   int have_pending_acks = 0;
00173   int have_pending_fins = 0;
00174   int have_pending_rsts = 0;
00175   int have_pending_othr = 0;
00176   /* the following apply only when state != PENDING */
00177   int have_ACK_error = 0;   /* seen "backwards" ACK */
00178   int have_value_error = 0; /* seen suspect ACK or sequence # */
00179   int have_FINdata_error = 0; /* seen data after FIN */
00180 
00181   enum states {PENDING, SYN_SENT, FIN_SENT, RESET, IN_REQUEST, IN_RESPONSE};
00182 
00183   enum states connection_state = PENDING;
00184   enum states last_state = PENDING;
00185 
00186   enum inputs {SYN, FIN, RST, ACK_ONLY, DATA_ACK};
00187   enum inputs input_type;
00188 
00189   char current_src[25] = "";
00190   char src_host[25];
00191   char src_port[10];
00192 
00193   char current_dst[25] = "";
00194   char dst_host[25];
00195   char dst_port[10];
00196 
00197 
00198 /* A request is considered to start at the timestamp on the tcpdump record
00199    containing the first advance in the ACK field (a) following the connection
00200    establishment or (b) while the connection is sending response data. */
00201 
00202   char start_request_time[20];
00203 
00204 /* A response is considered to start at the timestamp on the tcpdump record
00205    containing the first advance in the sequence number field following a
00206    period when the ACK has been advanced (receiving request data).  */
00207 
00208   char start_response_time[20];
00209 
00210 /* A response is considered to end at the timestamp on the tcpdump record 
00211    that LAST advanced the data sequence number before the response ends 
00212    (a new request starts, a FIN is sent, etc.).  Since we would have to
00213    look ahead in the trace to be sure the current record has the LAST 
00214    advance, we go ahead and assume it is the last and record its 
00215    timestamp as response_end_time.  This way, when we know the sequence
00216    number will not advance more for this response from processsing some 
00217    subsequent record in the trace (FIN or ACK advance), the last timestamp
00218    has already been saved.  Similarly, a request is considered to end at
00219    the timestamp on the last record of the request. */
00220   
00221   char response_end_time[20];
00222   char request_end_time[20];
00223 
00224   char FIN_sent_time[20];   
00225   char RST_sent_time[20];
00226   char last_connection_time[20];
00227 
00228   char input_name[256] = "";
00229   char output_name[256] = "";
00230   char log_name[256] = "";
00231 
00232   char new_line[500];  /* assumes no tcpdump output line is longer */
00233 
00234   long elapsed;
00235 
00236   int new_address = 0;
00237   int rc = 0;
00238 
00239 void error_line(char *s);  
00240 void error_state(char *s);
00241 int parse_dump_record(void);
00242 void init_connection(void);
00243 void init_active(void);
00244 void check_tuple_reuse(void);
00245 int check_ACK_advance(unsigned long old_ack);
00246 void begin_REQ(void);
00247 void more_REQ(void);
00248 void begin_RSP(void);
00249 void more_RSP(void);
00250 void log_REQ(void);
00251 void log_RSP(void);
00252 void log_SYN(void);
00253 void log_END(char *how);
00254 void log_ACT(char *how);
00255 void log_nosyn(void);
00256 void log_connection(void);
00257 void log_log(void);
00258 long elapsed_ms(char *end, char *start);
00259 void get_host_port(char *adr, char *host, char *port);
00260 int  get_sequence(char *p, unsigned long *begin, unsigned long *end,
00261                            unsigned long *bytes);
00262 
00263 void main (int argc, char* argv[])
00264 {
00265   int i;
00266 
00267   /* Parse the command line */
00268   i = 1;
00269   while (i < argc) {
00270     if (strcmp (argv[i], "-r") == 0) {
00271       /* -r flag is followed by name of file to read */
00272       if (++i >= argc) Usage (argv[0]);
00273       strcpy (input_name, argv[i]);
00274     }
00275     else if (strcmp (argv[i], "-w") == 0) {
00276       /* -w flag is followed by the name of file to write */
00277       if (++i >= argc) Usage (argv[0]);
00278       strcpy (output_name, argv[i]);
00279     }
00280     else 
00281       Usage (argv[0]);
00282     i++;
00283   }
00284 
00285   /* Open files */
00286   /* Note: program is written to also be used as a filter */
00287 
00288   if (strcmp(output_name, "") == 0) 
00289     /* if no explicit output file named with -w, use stdout */
00290      outFP = stdout;
00291   else 
00292      {
00293       if ((outFP = fopen (output_name, "w")) == NULL) {
00294           fprintf (stderr, "error opening %s\n", output_name);
00295           exit (-1);
00296           }
00297      }
00298 
00299   if (strcmp(input_name, "") == 0)
00300     /* if no explicit input file named with -r, use stdin */
00301      dumpFP = stdin;
00302   else 
00303      {
00304       if ((dumpFP = fopen (input_name, "r")) == NULL) {
00305           fprintf (stderr, "error opening %s\n", input_name);
00306           exit (-1);
00307          }
00308      }
00309   
00310   strcpy(log_name, output_name);
00311   strcat(log_name, ".log");
00312   if ((logFP = fopen (log_name, "w")) == NULL) {
00313       fprintf (stderr, "error opening %s\n", log_name);
00314       exit (-1);
00315      }
00316 
00317   /* begin main loop; once through loop for each line in the tcpdump */
00318   /* a <continue> anywhere in the loop (usually after an error case)
00319      implies beginning of processing a new tcpdump record */
00320 
00321   while (!feof (dumpFP)) {
00322 
00323 /* printf("State is %d, last_state is %d\n", connection_state, last_state); */
00324 
00325     /* Get and parse line of tcpdump file */
00326 
00327     fgets (new_line, sizeof(new_line), dumpFP);
00328 
00329     /* get line pieces; this works because there are always 8 or more 
00330        fields separated by white space in tcpdump ASCII-format lines */
00331     sscanf (new_line, "%s %s %s %s %s %s %s %s %s",
00332                       &ts, &lt, &sh, &gt, &dh, &fl, &p1, &p2, &p3);
00333 
00334    /* If any part of the connection tuple (source host.port,destination
00335       host.port) differs from the current values, there are no more 
00336       tcpdump records for that connection so treat as end of connection.
00337       The action taken at the end of a connection depends on the current
00338       state of processing in that connection */  
00339 
00340    if ((strcmp(current_src, sh) != 0) ||  /* new source host/port */
00341        (strcmp(current_dst, dh) != 0))    /* new dest. host/port */
00342        {
00343         log_connection(); 
00344 
00345         /* begin processing this record as being from a potential new
00346            TCP connection so initialize connection's state */
00347 
00348         strcpy(current_src, sh);  /* new connection tuple */
00349         strcpy(current_dst, dh);  /* host and port for src & dest */
00350 
00351         have_pending_acks = 0;
00352         have_pending_fins = 0;
00353         have_pending_rsts = 0;
00354         have_pending_othr = 0;
00355 
00356         current_synseq = 0;
00357         connection_state = PENDING;  /* unconnected pending a SYN */
00358         last_state = PENDING;
00359         new_address = 1;        /* true only for very first record
00360                                    from a different TCP connection -- 
00361                                    avoids multiple error messages */
00362        }
00363    
00364    /* break dump record into essential data fields; initializes the
00365       following variables: begin_seq, end_seq, seq_bytes, new_ack,
00366                            has_ack, has_seq, input_type.
00367       Also checks for suspect sequence and ACK values.
00368    */
00369 
00370    if ((rc = parse_dump_record()) < 0) 
00371        continue;
00372 
00373    /* processing records from a TCP connections is based on a notion of
00374       the current "state" of the connection.  The defined states are
00375         PENDING := record is from different connection than before but
00376                    a beginning SYN has not yet been identified.
00377         SYN_SENT:= have identified SYN sent from source port 80 (server)
00378         FIN_SENT:= have identified a FIN sent from source port 80 (server)
00379                    This terminates any data from server.
00380         RESET   := have identified a Reset sent from source port 80 (server)
00381                    This means that the server should not accept any more
00382                    client data.
00383         IN_REQUEST := processing ACKs sent from source port 80 (server)
00384                       in response to data (request) from the client.  In
00385                       this state, need to identify end of client data (request)
00386                       and start of server data (response).
00387         IN_RESPONSE := processing data sequence #s from source port 80
00388                        In this state, need to identify the end of server data
00389                        (response) and, possibly the beginning of a new request.
00390       Whenever the state changes, the prior state is also noted.
00391 
00392       Fundamental to all of this is the notion that a server cannot  
00393       possibly be sending data in response to a request unless that
00394       data is accompanied or preceeded by an advance in the ACK sequence
00395       indicating receipt of the request data.  Similarly, we assume that
00396       any new data sent by a server that follows or is accompanied by an
00397       advance in the ACK sequence number is a response to the request 
00398       that caused the ACK sequence to advance.  Put another way, response
00399       data (sequence # advance) marks the end of a request and ACK 
00400       advance marks the end of a response.  Of course other events such
00401       as FIN or Reset can mark ends also.  The use of ACK advance to mark
00402       the end of a response assumes that HTTP/1.1 browsers don't overlap 
00403       requests on a single TCP connection even if they may "batch" requests, 
00404       i.e., a new request will not be generated until the response has 
00405       been received. If requests and responses are batched but not 
00406       overlapped this will understate the number of objects requested 
00407       and overstate request and response sizes.
00408 
00409       This use of advancing sequence numbers (ACK or data) to mark
00410       requests and responses is disturbed by segment reorderings in 
00411       the network.  In some cases, such as reordering of only data segments
00412       in a response, there is no problem since only the highest value seen
00413       is used.  Reordering of ACKs (especially with data) presents real
00414       problems since boundaries between requests and responses are missed
00415       which can result in overstating request and response sizes.  For this
00416       reason, all such cases of ACK misordering are logged and reported.
00417    */
00418 
00419    switch (connection_state)
00420 
00421      /* a <break> anywhere ends processing of the current tcpdump record 
00422         by ending the switch (which continues the main read loop) */
00423 
00424       {
00425        case PENDING:
00426          {
00427           switch (input_type)
00428              {
00429              case FIN:
00430                 {
00431                  /* Ignore random FIN before something useful */ 
00432 
00433                  have_pending_fins++;
00434                  break;
00435                 }
00436 
00437              case SYN:
00438                 {
00439                   /* normal connection start, initialize connection state */
00440 
00441                  init_connection();
00442                  break;
00443                 }
00444 
00445              case RST:
00446                 {
00447                   /* Ignore random Reset before something useful */
00448 
00449                  have_pending_rsts++;
00450                  break;
00451                 }
00452 
00453              /* The trace may start after a connection is established and we
00454                 do not see a SYN.  Determine if the connection is most likely
00455                 in a request or in a response and log its status.  As before,
00456                 a request is indicated if the ACK sequence (after the first
00457                 one in the trace) is advancing and a response is indicated if
00458                 the data sequence number is advancing. */
00459 
00460              case ACK_ONLY:
00461                 {
00462                   /* ignore initial ACK (has absolute value, not relative) */
00463 
00464                  if (new_address == 1)
00465                    {
00466                     new_address = 0;
00467                     have_pending_acks++;
00468                    }
00469                  else
00470                     {
00471                      /* If ACK advances, the connection is in a request */
00472 
00473                      if ((new_ack > 2) &&
00474                          (new_ack < 16384))
00475                         {
00476                          log_ACT("REQ");
00477 
00478                          last_request_end = 1;
00479                          current_request_end = new_ack;
00480 
00481                          last_response_end = 1;
00482                          current_response_end = 1;
00483 
00484                        /* record the request start time as beginning at the 
00485                           tcpdump time stamp on this record */
00486 
00487                           strcpy(start_request_time, ts);
00488                           strcpy(request_end_time, ts);
00489 
00490                           last_state = connection_state;
00491                           connection_state = IN_REQUEST;
00492  
00493                           init_active();  /* initialize connection state */
00494                          }
00495                      else /* ignore ACKs that are not advancing */
00496                         have_pending_acks++;
00497                     }
00498                  break;
00499                 }
00500 
00501              case DATA_ACK:          
00502                 {
00503                  /* If data sequence advances, connection is in response */
00504 
00505                  if ((seq_bytes > 1) &&
00506                      (seq_bytes < 65535))
00507                     {
00508                      log_ACT("RSP");
00509 
00510                      /* assume tcpdump relative addressing and initialize */
00511 
00512                      last_request_end = 1;    
00513                      current_request_end = 1;
00514 
00515                      last_response_end = 0;     
00516                      current_response_end = seq_bytes;
00517 
00518                      /* record timestamp of tcpdump record as current
00519                         value of both start and end times of response
00520                         (in case no later end time is found) */
00521 
00522                      strcpy(start_response_time, ts); 
00523                      strcpy(response_end_time, ts);
00524 
00525                      last_state = connection_state;
00526                      connection_state = IN_RESPONSE;
00527  
00528                      init_active();
00529                     }
00530                  else /* ignore data lengths of 0 or 1 */
00531                     have_pending_othr++;
00532                  break;
00533                 }
00534              default:
00535                 break;
00536              }  /* end switch on input_type */
00537           break;
00538          } /* end case PENDING */
00539 
00540        case SYN_SENT:
00541          {
00542 
00543          /* Treat this case as the establishment of a connection.  Usually
00544             the first activity on the connection will be request data from
00545             the client, but some servers appear to "pre-send" data (maybe
00546             the headers) speculatively before ACKing any client data. */
00547 
00548           switch (input_type)
00549              {
00550              case FIN:
00551 
00552               /* A FIN marks the end of either or both request and response */
00553 
00554                 {
00555                  if ((has_ack == 1) &&
00556                      (new_ack > (current_request_end + 1)))
00557                                /* ignore possible ACK of FIN */
00558 
00559                    { /* this record had advanced the ACK sequence so
00560                         save current request data and log it (since
00561                         the FIN ends the connection it also ends the 
00562                         request). */
00563                
00564                      begin_REQ();
00565                      log_REQ();
00566                     }
00567 
00568                  /* If the data sequence number advances, then there was
00569                     response data.  Save the current response info. and
00570                     log it (since the FIN ends the connection, it also
00571                     ends the response). */
00572 
00573                  if ((has_seq == 1) &&
00574                      (end_seq > current_response_end))  
00575                     {
00576                      begin_RSP();
00577                      log_RSP();
00578                     }
00579                  last_state = SYN_SENT;
00580                  connection_state = FIN_SENT;
00581 
00582                  /* record timestamp of first FIN seen on connection */
00583                  if (strcmp(FIN_sent_time, "") == 0)
00584                     strcpy(FIN_sent_time, ts);
00585                  break;
00586                 }
00587 
00588              case SYN:
00589                 {
00590                   /* In some cases the same host/port pairs are reused in a
00591                      single trace; check for plausible reuse */
00592 
00593                  check_tuple_reuse();
00594                  break;
00595                 }
00596 
00597              case RST:
00598                 {
00599                  connection_state = RESET;
00600                  last_state = SYN_SENT;
00601 
00602                  /* record timestamp of first Reset on the connection */
00603 
00604                  if (strcmp(RST_sent_time, "") == 0)
00605                     strcpy(RST_sent_time, ts);
00606                  break;
00607                 }
00608 
00609              case ACK_ONLY:
00610                 {
00611                  /* since there is no data sequence # present in the record
00612                     and, therefore, the server is not sending data so
00613                     all the client's data may not have arrived.  Note the
00614                     current extent of ACKed client data and change state */
00615 
00616                  if (new_ack > (current_request_end + 1))
00617                     begin_REQ();
00618                  break;
00619                 }
00620 
00621              case DATA_ACK:          
00622                 {
00623                 /* check for presence of data sequence # in the common cases
00624                    in SYN_SENT state, data presence indicates that 
00625                    request data has been received and server is sending data
00626                    that should be a response */
00627 
00628                  if (new_ack > (current_request_end + 1)) 
00629                     {/* this record had advanced the ACK sequence so
00630                      save current request info. and change state */
00631 
00632                      begin_REQ();
00633 
00634                      /* the server's data sequence may not have advanced;
00635                         but if it has, treat it as the start of a response
00636                         and the end of the client (request) data */
00637 
00638                      if (end_seq > current_response_end)
00639                         {
00640                           /* start of response ends the request, log
00641                           it and change state to look for end of response */  
00642 
00643                          log_REQ();
00644                          begin_RSP();
00645                         }
00646                     }
00647                  else 
00648                    /* some servers appear to send response data immediately
00649                       on completing the TCP connection without receiving 
00650                       any request data */
00651 
00652                     if ((end_seq > last_response_end) && 
00653                         (seq_bytes > 0))
00654                        begin_RSP();
00655                 break;
00656                 }
00657              default:
00658                 break;
00659              } /* end switch on input type */
00660           break;
00661          } /* end case SYN_SENT */
00662 
00663        case FIN_SENT:
00664           {
00665            switch (input_type)
00666               {
00667                case SYN:
00668                   {
00669                   /* In some cases the same host/port pairs are reused in a
00670                      single trace; check for plausible reuse */
00671 
00672                    check_tuple_reuse();
00673                    break;
00674                   }
00675 
00676                 case FIN:  /* ignore multiple FINs */
00677                    break;
00678 
00679                 case RST:  /* ignore reset after FIN */
00680                    break;
00681 
00682                 case ACK_ONLY:
00683                     /* If there is an ACK (only) coming after a FIN is sent, it
00684                       is ether the normal one in the 4-way termination or it
00685                       is to ACK more request data which will be ignored (there
00686                       can be no response).  
00687                       In either case, it is ignored here. */
00688 
00689                    break;
00690 
00691                 case DATA_ACK:
00692                   {
00693 
00694               /* All others may have sequence number fields.  Treat them as
00695                  errors if they advance the sequence number after a FIN.  
00696                  The usual case here is just retransmissions (including 
00697                  retransmission of the FIN) which should not advance it
00698                  because the highest sequence number possible was on the FIN */
00699 
00700                    if ((end_seq > (current_response_end + 2)) &&
00701                        (have_FINdata_error == 0))
00702                       {
00703                        error_state("new data in FIN_SENT state");
00704                        have_FINdata_error = 1;
00705                       }
00706                    break;
00707                   }
00708                 default:
00709                    break;
00710               } /* end switch on input type */
00711             break; 
00712           } /* end case FIN_SENT */
00713 
00714        case RESET:
00715            {
00716             /* A Reset nominally means an abnormal close of the connection 
00717                with any queued data discarded before transmitting it.  We
00718                observe, however, that under some (unknown) circumstances
00719                segments that advance the data sequence number continue in 
00720                the trace after the Reset.  If these are part of a response, 
00721                it makes sense to count them in the response size since 
00722                the server obviously sent them and the network has to handle 
00723                them.  This also means that a FIN following the Reset may be
00724                a valid indication of the end of a response. Just ignore 
00725                anything else unexpected (e.g., Reset) */
00726 
00727             switch (input_type)
00728                {
00729                 case RST:
00730                    break;
00731 
00732                 case SYN:
00733                   {
00734                   /* In some cases the same host/port pairs are reused in a
00735                      single trace; check for plausible reuse */
00736 
00737                    check_tuple_reuse();
00738                    break;
00739                   }
00740 
00741                 case ACK_ONLY:
00742 
00743                /* an ACK might advance possibly indicating a request.  However,
00744                   since the connection is Reset and there should be no new
00745                   response, just ignore it */
00746 
00747                     break;
00748 
00749                 case FIN:
00750                   {
00751 
00752             /* Only if the state is IN_RESPONSE do we try to continue looking
00753                for the end of the response.  FIN is treated as the true end
00754                of a response. */
00755 
00756                    if (last_state == IN_RESPONSE)
00757                       {/* ends the current response, ignore any ACK */
00758                        if ((has_seq == 1) &&
00759                            (end_seq > current_response_end)) 
00760                           more_RSP();
00761                        log_RSP();
00762                   
00763                        last_state = RESET;
00764                        connection_state = FIN_SENT;
00765 
00766                        /* record timestamp of first FIN on connection */
00767 
00768                        if (strcmp(FIN_sent_time, "") == 0)
00769                           strcpy(FIN_sent_time, ts);
00770                       }
00771                     break;
00772                    }
00773 
00774                case DATA_ACK:
00775                  {
00776                 /* segments with data may advance the data sequence number as 
00777                    more parts of the response.  They may also advance the ACK
00778                    which normally would indicate a new request.  However,
00779                    after a Reset, it probably will be ignored by the 
00780                    server so it is also ignored here except to mark the end
00781                    of the response. */
00782 
00783                    if (last_state == IN_RESPONSE)
00784                       {
00785                        if (new_ack > (last_request_end + 1)) /* new request */
00786                           { /* implies end of current response */
00787                            log_RSP(); 
00788                            last_state = RESET; /* will ignore all else */
00789                            break; 
00790                           }
00791  
00792                      /* As long as the data sequence # advances, continue to
00793                         save info about the current response. */
00794 
00795                        if ((end_seq > current_response_end) &&
00796                            (seq_bytes > 0))
00797                           more_RSP();
00798                       }
00799                   break;
00800                  }
00801                default:
00802                   break;
00803                } /* end switch on input type */
00804             break;
00805            } /* end case RESET */
00806 
00807        case IN_RESPONSE:
00808            {
00809             /* In this state, look for events that will indicate the end of
00810                the response data (ACK advances for request, FIN, Reset).  If
00811                the event is ACK advance, this also initiates the start of a
00812                new request.  */
00813 
00814             switch (input_type)
00815                {
00816                 case FIN:
00817                   {         
00818                    /* FIN is complicated since the segment that carries it may
00819                     also advance the ACK (new request), advance the data
00820                     sequence # and end the connection.  If the ACK advances,
00821                     any sequence # advance is for the new request; otherwise
00822                     a sequence # advance extends and completes the current 
00823                     response. */
00824 
00825                    if (has_ack == 1) 
00826                       {
00827                        if ((rc = check_ACK_advance(current_request_end)) < 0)
00828                           break;
00829                       }
00830 
00831                    /* The ACK advance amount has to be greater than 1 to be
00832                      considered a real new request.  This is primarily to
00833                     filter out just an ACK for a FIN from the client.  Note
00834                     the ACK must advance so duplicate ACKs are ignored. */
00835 
00836                    if ((has_ack == 1) &&
00837                        (new_ack > (current_request_end + 1)))
00838                       { /* implies end of current response, begin request 
00839                            Log the info. for the current response. */
00840 
00841                        log_RSP();  /*end of previous response */
00842 
00843                        /* on FIN, assume end of request also since server is
00844                           closing the connection. */
00845 
00846                        begin_REQ();
00847                        log_REQ(); 
00848 
00849                        /* if the data sequence # also advances, this segment
00850                           (only -- because of the FIN) carries the last part
00851                           of the response */
00852  
00853                        if ((has_seq == 1) &&
00854                            (end_seq > last_response_end)) 
00855                           {/* response begins and ends in the FIN segment */
00856                            begin_RSP();
00857                            log_RSP();  /* also end of any response */
00858                           }
00859 
00860                        /* Note that the null "else" here is the case that the
00861                           data sequence # does not advance (there is no 
00862                           response to the request) -- probably an HTTP/1.1
00863                           broswer attempting multiple requests on a TCP 
00864                           connection where the server doesn't play nice. */
00865 
00866                       }
00867                    else 
00868                       /* the ACK did not advance so no request; if the data
00869                        sequence # advances, it extends the current response */
00870 
00871                       {
00872                        if ((has_seq == 1) &&
00873                            (end_seq > current_response_end)) 
00874                           more_RSP();
00875                        log_RSP();    /* FIN always implies end of response */
00876                       }
00877 
00878                    last_state = IN_RESPONSE;
00879                    connection_state = FIN_SENT;
00880 
00881                    if (strcmp(FIN_sent_time, "") == 0)
00882                       strcpy(FIN_sent_time, ts);
00883                    break;
00884                   }
00885 
00886                case RST:
00887 
00888                  /* Look for a Reset and only change state to RESET to
00889                    continue processing.  This is explained in comments in 
00890                    processing for the RESET state when the last_state is
00891                    IN_RESPONSE. */
00892 
00893                  {/* ignore any advance in seq# on Reset */
00894                   /* it may not be wise to trust ACK on RST */
00895 
00896                   last_state = IN_RESPONSE;
00897                   connection_state = RESET;   
00898 
00899                   if (strcmp(RST_sent_time, "") == 0)
00900                      strcpy(RST_sent_time, ts);
00901                   break;
00902                  }
00903 
00904                case SYN:
00905                  {
00906                   /* In some cases the same host/port pairs are reused in a
00907                      single trace; check for plausible reuse */
00908 
00909                   check_tuple_reuse();
00910                   break;
00911                  }
00912 
00913                case ACK_ONLY:
00914 
00915                  /* Begin checking for cases where the ACK may advance
00916                     and really indicate that a new request starts. */
00917 
00918                  {
00919                   /* The ACK advance amount has to be greater than 1 to be
00920                      considered a real new request.  This is primarily to
00921                      filter out just an ACK for a FIN from the client.  Note
00922                      the ACK must advance so duplicate or OOO
00923                      ACKs are ignored. */
00924 
00925                   if (new_ack > (last_request_end + 1)) 
00926                      { /* implies end of current response, begin request.
00927                          Log the info. for the current response and change
00928                          states. */
00929                       log_RSP();
00930 
00931                     /* since there is no data sequence # present in the record
00932                     and, therefore, the server is not sending data so
00933                     all the client's data may not have arrived.  Note the
00934                     current extent of ACKed client data and start time */
00935 
00936                       begin_REQ();
00937                      }
00938                   break;
00939                  }
00940 
00941                case DATA_ACK:
00942 
00943             /* cases with data and ACKs have two important sub-cases: (a) 
00944                ACK sequence number advances -- implies the start of a
00945                new request and the end of the current response; if the
00946                data sequence number also advances, it is assumed that
00947                all the request data has been received and the first of
00948                the response data is contained in the segment. (b) the
00949                ACK sequence number does not advance so any advance in
00950                the data sequence number is more of the current response. */
00951 
00952                {
00953                 if ((rc = check_ACK_advance(last_request_end)) < 0)
00954                    break;
00955 
00956                  /* The ACK advance amount has to be greater than 1 to be
00957                     considered a real new request.  This is primarily to
00958                     filter out just an ACK for a FIN from the client.  Note
00959                     the ACK must advance so duplicate ACKs are ignored. */
00960 
00961                 if (new_ack > (last_request_end + 1)) 
00962                    { /* implies end of current response, begin request 
00963                          Log the info. for the current response. */
00964 
00965                     log_RSP();  
00966                     begin_REQ();
00967 
00968                     /* If there is also new data (sequence # advances), then
00969                        the request is considered completed and a new response
00970                        has started but not necessarily completed. */
00971 
00972                     if ((end_seq > current_response_end) &&
00973                         (seq_bytes > 0))
00974                        {/* Log info. about this request */
00975                         log_REQ();
00976                         begin_RSP();
00977                        }
00978                    }
00979                 else
00980                     /* the ACK did not advance so no request; if the data
00981                        sequence # advances, it extends the current response */
00982 
00983                     if (end_seq > current_response_end) 
00984                        more_RSP();
00985                 break;
00986                }
00987              default:
00988                 break;
00989             } /* end switch on input type */
00990            break;
00991           }  /* end case IN_RESPONSE */
00992 
00993        case IN_REQUEST:
00994            {
00995             /* In this state check for events that indicate the end of the
00996                request data (advance of data sequence #, FIN, Reset).  An
00997                advance in the data sequence # also marks the beginning of
00998                the response to the request */
00999 
01000             switch (input_type)
01001                {
01002                 case FIN:
01003 
01004             /* FIN is somewhat less complicated here since the segment that
01005                implies that the server will send no more data.  Treat this
01006                as effectively ending the request.  If the ACK advances, it
01007                is considered as part of the request and if the data 
01008                sequence # advances, it is the response data (all in this
01009                segment). */
01010 
01011                 { /* FIN can ACK more of current request */
01012 
01013                   /* The ACK advance amount has to be greater than 1 to be
01014                     considered as extending the request.  This is primarily 
01015                     to filter out just an ACK for a FIN from the client.  Note
01016                     the ACK must advance so duplicate ACKs are ignored. */
01017 
01018                  if ((has_ack == 1) &&
01019                      (new_ack > (current_request_end + 1)))
01020                     more_REQ();
01021                  else
01022                     if (has_ack == 1)
01023                        {
01024                         if ((rc = check_ACK_advance(current_request_end)) < 0)
01025                            break;
01026                        }
01027 
01028                  log_REQ();  /* on FIN, assume end of request */
01029 
01030                 /* the server's data sequence may not have advanced;
01031                    but if it has, treat it as the complete response */
01032 
01033                  if ((has_seq == 1) &&
01034                      (end_seq > last_response_end))  
01035                     {
01036                      begin_RSP();
01037                      log_RSP();  /* also end of any response */
01038                     }
01039 
01040                  last_state = IN_REQUEST;
01041                  connection_state = FIN_SENT;
01042 
01043                  if (strcmp(FIN_sent_time, "") == 0)
01044                     strcpy(FIN_sent_time, ts);
01045                  break;
01046                 }
01047 
01048              case RST:
01049                {
01050                 /* Treat a Reset as ending the request with no response */
01051                 /* ignore any advance in ack on Reset */
01052                 /* treat as ending any request */
01053 
01054                 log_REQ();                
01055                 last_state = IN_REQUEST;
01056                 connection_state = RESET;
01057 
01058                 if (strcmp(RST_sent_time, "") == 0)
01059                    strcpy(RST_sent_time, ts);
01060                 break;
01061                }
01062 
01063              case SYN:
01064                 {
01065                  /* In some cases the same host/port pairs are reused in a
01066                     single trace; check for plausible reuse */
01067 
01068                  check_tuple_reuse();
01069                  break;
01070                 }
01071 
01072              case ACK_ONLY:
01073                 {
01074                  /* The ACK advance amount has to be greater than 1 to be
01075                     considered as extending the requesst.  This is primarily 
01076                     to filter out just an ACK for a FIN from the client.  Note
01077                     the ACK must advance so duplicate or OOO 
01078                     ACKs are ignored. */
01079 
01080                  if (new_ack > (current_request_end + 1))
01081                     more_REQ();
01082                  break;
01083                 }
01084 
01085              case DATA_ACK:
01086                {
01087                 /* cases with data and ACKs have two important sub-cases: 
01088                   (a) ACK sequence number advances which extends the amount
01089                   of request data, and (b) the data sequence # advances
01090                   which ends the request and is the beginning of the 
01091                   response to that request. */
01092                 /* The ACK advance amount has to be greater than 1 to be
01093                    considered as extending the request.  This is primarily 
01094                    to filter out just an ACK for a FIN from the client.  Note
01095                    the ACK must advance so duplicate ACKs are ignored. */
01096 
01097                 if (new_ack > (current_request_end + 1))
01098                    more_REQ();
01099                 else
01100                     if ((rc = check_ACK_advance(current_request_end)) < 0)
01101                        break;
01102 
01103                 /* the server's data sequence may not have advanced;
01104                    but if it has, treat it as the start of a response
01105                    and the end of the request data */
01106 
01107                 if ((end_seq > last_response_end) && 
01108                     (seq_bytes > 0))
01109                    {
01110                     /* record info. for current request and change state 
01111                        to look for the end of the response*/
01112 
01113                     log_REQ();
01114                     begin_RSP();
01115                    }
01116                 break;
01117                }
01118              default:
01119                 break;
01120             } /* end switch on input type */ 
01121           break;
01122          }  /* end case IN_REQUEST */
01123 
01124        default:
01125        break;
01126       } /* end switch on connection state */
01127 
01128    /* save the last known time for the connection from current record */
01129    strcpy(last_connection_time, ts); 
01130 
01131  }  /* end main loop */  
01132   log_log();
01133   close (dumpFP);
01134   close (outFP);
01135   close (logFP);
01136 } /* end main() */
01137 
01138 
01139 /* Initialize connection state when a new connection is recognized by
01140    the presence of a SYN. */ 
01141 
01142 void init_connection(void)
01143 {
01144  log_SYN();                /* log start of connection */
01145  connection_state = SYN_SENT;  /* new connection state */
01146 
01147  /* save SYN sequence number for duplicate detection */
01148  if (has_seq == 1)
01149      current_synseq = begin_seq;
01150  else
01151      error_line ("SYN without valid sequence #");
01152 
01153  /* assume tcpdump relative addressing and initialize */
01154  /* note that initializing to 1 instead of 0 adjusts for ACK
01155     being the next expected sequence number */
01156 
01157  last_request_end = 1;       /* need "last" and "current" */
01158  last_response_end = 1;      /* values since there may be */
01159  current_response_end = 1;   /* > 1 request per connection */
01160  current_request_end = 1;
01161 
01162  strcpy(FIN_sent_time, "");
01163  strcpy(RST_sent_time, "");
01164  strcpy(last_connection_time, "");
01165 
01166  have_ACK_error = 0;
01167  have_value_error = 0;
01168  have_FINdata_error = 0;
01169 }
01170 
01171 /* Initialize connection state when a new connection is recognized by a
01172    change in the host/port 4-tuple, there is no SYN but request or 
01173    response activity can be determined.  Request or response state is
01174    initialized in the main program when the type of activity is determined */ 
01175 
01176 void init_active(void)
01177 {
01178  strcpy(FIN_sent_time, "");
01179  strcpy(RST_sent_time, "");
01180  strcpy(last_connection_time, "");
01181 
01182  have_ACK_error = 0;
01183  have_value_error = 0;
01184  have_FINdata_error = 0;
01185 
01186 }
01187 
01188 /* Check for some indications of possible errors 
01189    such as the ACK appearing to move back -- may indicate
01190    out-of-order or something worse. */
01191 
01192 int check_ACK_advance(unsigned long old_ack)
01193 {
01194  
01195  if ((new_ack < old_ack) &&
01196       report_ACK_err)
01197     {
01198      if (have_ACK_error == 0)
01199         {
01200          error_state("ACK error -- backward");
01201          have_ACK_error = 1;
01202         }
01203      return(-1);
01204     }
01205  else
01206     return (0);
01207 }
01208 
01209 /* Check to see if reuse of a connection in a trace is reasonable */
01210 
01211 void check_tuple_reuse(void)
01212 {
01213  /* ignore duplicate SYNs (have same SYN sequence number) */
01214 
01215  if ((has_seq == 1) &&
01216      (current_synseq != begin_seq))
01217     {
01218       /* if a non-duplicate SYN comes before any other activity,
01219          treat it as terminating the incomplete connection and starting
01220          another */
01221      
01222      if ((connection_state == SYN_SENT) ||
01223          ((connection_state == RESET) && (last_state == SYN_SENT)))
01224         {
01225          log_END("TRM");
01226          init_connection();
01227          return;
01228         }
01229 
01230 
01231     /* It is quite possible to have a valid new
01232     connection that reuses the same source and destination
01233     IP.port address 4-tuple.  Treat this as valid if 
01234     there has been a FIN from the server or at least
01235     2*MSL has passed since seeing any previous packet
01236     from this connection (in case we missed the FIN).  If there
01237     was a FIN from the server, allow for the case it was an
01238     active close by the server (normal for HTTP 1.0), the 
01239     stack is a BSD derivative, and SO_REUSEADDR is is use
01240     (see Stevens vol. 1, pp 245) */ 
01241 
01242      if (connection_state == FIN_SENT)
01243         elapsed = 60001;  /* force beyond 2MSL test */
01244      else        
01245         elapsed = elapsed_ms(ts, last_connection_time);
01246 
01247      if (elapsed < 60000)  /* MSL of 30 seconds, minimum */
01248         error_state("Non-duplicate SYN in connection");
01249      else
01250         {
01251          /* perform all actions associated with the end of
01252             one connection and the beginning of the next 
01253             (see state PENDING) */
01254          switch (connection_state)
01255             {
01256             case FIN_SENT:
01257                  log_END("FIN");
01258                  break;
01259             case RESET:
01260                  if (last_state == IN_RESPONSE)
01261                     log_RSP();
01262 
01263                  log_END("RST");
01264                  break;
01265             case IN_RESPONSE:
01266                  log_RSP();
01267                  log_END("TRM");
01268                  break;
01269             case IN_REQUEST:
01270                  log_REQ();
01271                  log_END("TRM");
01272                  break;
01273             default:
01274                  break;
01275             }
01276          init_connection();
01277         }
01278     }
01279 }
01280 
01281 /* Record information about the beginning of a new request */
01282 
01283 void begin_REQ(void)
01284 {
01285  current_request_end = new_ack;
01286 
01287  /* record the request start time as beginning at the 
01288     tcpdump time stamp on this record */
01289 
01290  strcpy(start_request_time, ts);
01291  strcpy(request_end_time, ts);
01292 
01293  last_state = connection_state;
01294  connection_state = IN_REQUEST;
01295 }
01296 
01297 /* Record information about an in-progress request */
01298 
01299 void more_REQ(void)
01300 {
01301  current_request_end = new_ack;
01302  strcpy(request_end_time, ts);
01303 }
01304 
01305 /* Record information about the beginning of a new response */
01306 
01307 void begin_RSP(void)
01308 {
01309  current_response_end = end_seq;
01310 
01311  /* record timestamp of tcpdump record as current
01312  value of both start and end times of response
01313  (in case no later end time is found) */
01314 
01315  strcpy(start_response_time, ts); 
01316  strcpy(response_end_time, ts);
01317 
01318  last_state = connection_state;
01319  connection_state = IN_RESPONSE;
01320 }
01321 
01322 /* Record information about an in-progress response */
01323 
01324 void more_RSP(void)
01325 {
01326  current_response_end = end_seq;
01327  /* save the new (potential) response end time */
01328  strcpy(response_end_time, ts);
01329 }
01330 
01331 
01332 /* Breaks dump record into essential data fields; initializes the
01333    following variables: begin_seq, end_seq, seq_bytes, new_ack,
01334                         has_ack, has_seq, input_type.
01335    Also checks for suspect sequence and ACK values.
01336 */
01337 
01338 int parse_dump_record()
01339 {
01340     begin_seq = end_seq = seq_bytes = new_ack = 0;
01341     has_ack = has_seq = 0;
01342 
01343    /* The following flag combinations are flagged because they (a) make no
01344       real sense for SYNs and (b) should be very rare. */
01345 
01346     if ((strcmp(fl, "SFRP") == 0) ||
01347         (strcmp(fl, "SFR") == 0) ||
01348         (strcmp(fl, "SFP") == 0) ||
01349         (strcmp(fl, "SF") == 0) ||
01350         (strcmp(fl, "SRP") == 0) ||
01351         (strcmp(fl, "SR") == 0))
01352        {
01353          /* If out of PENDING state (initial SYN recognized), just note
01354             the error, ignore it and continue.  */
01355 
01356         if (connection_state != PENDING)
01357             error_line ("SYN in combination with F or R");
01358         return(-1);
01359        }
01360 
01361     /* In tcpdump format, the fields coming after flags are, in order,
01362        data-seqno (format "bbb:eee(ccc)") and ack (format "ack xxx").
01363        Both the data-seqno and ack fields may not be present if they
01364        do not have valid information.  If the data-seqno field is not
01365        present, the first field after the flag is the string "ack";
01366        if both are not present the field after the flag is the 
01367        string "win".  Also check to see if tcpdump used absolute instead
01368        of relative values */
01369 
01370     if (strcmp(p1, "ack") == 0)  /* ack and no data sequence no. */
01371        {
01372         has_seq = 0;
01373         has_ack = 1; 
01374         new_ack = strtoul(p2, (char **)NULL, 10);
01375        }
01376     else
01377        {
01378         if (strcmp(p1, "win") == 0) /* no ack, no data */
01379            {
01380             has_ack = 0;
01381             has_seq = 0;
01382            }
01383         else 
01384            {
01385             /* assume it is a valid sequence number field */
01386             /* parse sequence field in header */
01387             /* sequence field format is
01388                <begin_seq #>:<end_seq #>(<seq_bytes count>) */
01389 
01390             if ((rc = get_sequence(p1, &begin_seq, &end_seq, &seq_bytes)) < 0)
01391                {
01392                 error_line ("invalid sequence # field");
01393                 return (-1);
01394                }
01395             has_seq = 1;
01396 
01397             /* check the field following the data sequence # for an ACK */
01398 
01399             if (strcmp(p2, "ack") == 0)  
01400                {
01401                 has_ack = 1; 
01402                 new_ack = strtoul(p3, (char **)NULL, 10);
01403                }
01404             else
01405                has_ack = 0;
01406            }
01407        }
01408 
01409    /* classify flag combinations into equivalence classes for later steps */
01410 
01411       if ((strcmp(fl, "F") == 0) || 
01412           (strcmp(fl, "FP") == 0) ||
01413           (strcmp(fl, "FR") == 0) ||
01414           (strcmp(fl, "FRP") == 0))
01415          input_type = FIN;
01416       else
01417          {
01418           if ((strcmp(fl, "R") == 0) ||
01419               (strcmp(fl, "RP") == 0))
01420              input_type = RST;
01421           else
01422              {
01423               if ((strcmp(fl, "S") == 0) || 
01424                   (strcmp(fl, "SP") == 0))
01425                  input_type = SYN;
01426               else
01427                 {
01428                  if ((has_ack == 1) &&
01429                      (has_seq == 0))
01430                     input_type = ACK_ONLY;
01431                  else
01432                     if ((has_seq == 1) &&
01433                         (has_ack == 1))
01434                        input_type = DATA_ACK;
01435                     else
01436                        {   
01437                         error_line("Unexpected Data/ACK combination");
01438                         return (-1);
01439                        }
01440                 }
01441              }
01442          }
01443 
01444 
01445       if ((connection_state == IN_RESPONSE) ||
01446           (connection_state == IN_REQUEST) ||
01447           (connection_state == SYN_SENT) ||
01448           ((connection_state == RESET) && (last_state == IN_RESPONSE)))
01449          {   
01450            /* allow for gaps of up to one normal TCP window */
01451 
01452           if (((input_type == FIN) ||
01453                (input_type == DATA_ACK)) &&
01454                (end_seq > (current_response_end + 65535)))
01455              {
01456               if (have_value_error == 0)
01457                  {
01458                   error_line ("suspect sequence # value");
01459                   have_value_error = 1;
01460                  }
01461               return (-1);
01462              }
01463 
01464           /* ACK is required for each 2 segments with space for gaps */
01465 
01466           if (((input_type == FIN) ||
01467                (input_type == ACK_ONLY) ||
01468                (input_type == DATA_ACK)) &&
01469                (new_ack > (current_request_end + 16384)))
01470              {
01471               if (have_value_error == 0)
01472                  {
01473                   error_line ("suspect ACK value");
01474                   have_value_error = 1;
01475                  }
01476               return (-1);
01477              }
01478          }
01479  return(0);
01480 }
01481 
01482 /* Output data associated with ending a connection */
01483 
01484 void log_connection(void)
01485 {
01486 /* if no more tcpdump records found while processing an http
01487    request, log (perhaps incomplete) client request */  
01488 
01489    if (connection_state == IN_REQUEST)
01490       log_REQ();
01491    else
01492       {
01493        /* if no more records found while processing an http 
01494           response, log (perhaps incomplete) response information */
01495 
01496        if ((connection_state == IN_RESPONSE) ||
01497            ((connection_state == RESET) && last_state == IN_RESPONSE))
01498           { /* don't log if just ACKed 1 (assume  FIN) */
01499            if (current_response_end > (last_response_end + 1))
01500                log_RSP();
01501           }
01502       }
01503 
01504    /* make log entry indicating type of connection termination;
01505       entry for connection is made only if a valid start (SYN) was
01506       previously recognized */ 
01507 
01508    if (connection_state != PENDING)  /* saw SYN */
01509       {
01510        if (connection_state == FIN_SENT) 
01511            log_END("FIN");
01512        else
01513           {
01514            if (connection_state == RESET) 
01515                log_END("RST");
01516            else
01517                log_END("TRM");
01518           }
01519       }
01520    else
01521       {
01522        if (((have_pending_fins > 0) +
01523             (have_pending_rsts > 0) +
01524             (have_pending_othr > 0) +
01525             (have_pending_acks > 0)) > 1)
01526           pending_cmb_count++;
01527        else
01528           {
01529            pending_fin_count += (have_pending_fins > 0);
01530            pending_rst_count += (have_pending_rsts > 0);
01531            pending_ack_count += (have_pending_acks > 0);
01532            pending_oth_count += (have_pending_othr > 0);
01533           }
01534       }
01535 }
01536 
01537 
01538 void log_log(void)
01539 {
01540  fprintf(logFP, "Input tcpdump file: %s \n", input_name);
01541  fprintf(logFP, "Output connection file: %s \n", output_name);
01542  fprintf(logFP, "   SYNs     %8d \n", syn_count);
01543  fprintf(logFP, "   REQs     %8d \n", req_count);
01544  fprintf(logFP, "   ACT-REQs %8d \n", act_req_count);
01545  fprintf(logFP, "   RSPs     %8d \n", rsp_count);
01546  fprintf(logFP, "   ACT-RSPs %8d \n", act_rsp_count);
01547  fprintf(logFP, "   FINs     %8d \n", fin_count);
01548  fprintf(logFP, "   RSTs     %8d \n", rst_count);
01549  fprintf(logFP, "   TRMs     %8d \n", trm_count);
01550  fprintf(logFP, "   ERRs     %8d \n", err_count);
01551  fprintf(logFP, "Partial Connections:\n");
01552  fprintf(logFP, " FIN only   %8d \n", pending_fin_count);
01553  fprintf(logFP, " RST only   %8d \n", pending_rst_count);
01554  fprintf(logFP, " ACK only   %8d \n", pending_ack_count);
01555  fprintf(logFP, " Combos     %8d \n", pending_cmb_count);
01556  fprintf(logFP, " Other      %8d \n", pending_oth_count);
01557 }
01558 
01559 /* A set of event-specific data logging functions.  A critical part of
01560    the logging functions for Requests and Responses is to save the 
01561    "current" value of the sequence number (ACK or data) that marks the
01562    end of it as the "last" value.  This is done to tell when the 
01563    sequence number advances again for multiple request/response pairs
01564    in a connection and to allow computing its size as (current - last). */
01565 
01566 void log_REQ(void)
01567 {
01568 /* parse sourse host/port */
01569   get_host_port(current_src, src_host, src_port);
01570 
01571 /* parse destination host/port */
01572   get_host_port(current_dst, dst_host, dst_port);
01573 
01574   /* for requests we log the request start time  -- the tcpdump 
01575      timestamp on the first record associated with a request -- 
01576      along with the TCP connection information and the size of the 
01577      request data */
01578 
01579   fprintf(outFP, "%s %-15s %5s > %-15s %4s: REQ %12d  %s\n", 
01580                                     start_request_time,  
01581                                     dst_host, dst_port, src_host, src_port,  
01582                                     current_request_end - last_request_end,
01583                                     request_end_time);
01584   /* IMPORTANT */
01585   last_request_end = current_request_end;
01586   req_count++;
01587 }
01588 
01589 void log_RSP(void)
01590 {
01591 /* parse sourse host/port */
01592   get_host_port(current_src, src_host, src_port);
01593 
01594 /* parse destination host/port */
01595   get_host_port(current_dst, dst_host, dst_port);
01596 
01597   /* for responses we log the response end time  -- the tcpdump 
01598      timestamp on the last record associated with a response -- 
01599      along with the TCP connection information, the size of the 
01600      response data, and the response start time -- the tcpdump
01601      timestamp on the first record associated with the response. */
01602 
01603   fprintf(outFP, "%s %-15s %5s > %-15s %4s: RSP %12d  %s\n", 
01604                                    response_end_time, 
01605                                    dst_host, dst_port, src_host, src_port,  
01606                                    current_response_end - last_response_end,
01607                                    start_response_time);
01608 #ifdef FOO
01609   fprintf(outFP, "%s %-15s %5s > %-15s %4s RSP %d %s\n", start_response_time, 
01610                                    src_host, src_port, dst_host, dst_port, 
01611                                    current_response_end - last_response_end,
01612                                    response_end_time);
01613   fprintf(outFP, "%s %s > %s RSP %d\n", start_response_time, current_src, 
01614                                    current_dst, 
01615                                    current_response_end - last_response_end);
01616 #endif
01617   /* IMPORTANT */
01618   last_response_end = current_response_end;
01619   rsp_count++;
01620 }
01621 
01622 void log_SYN(void)
01623 {
01624 /* parse sourse host/port */
01625   get_host_port(current_src, src_host, src_port);
01626 
01627 /* parse destination host/port */
01628   get_host_port(current_dst, dst_host, dst_port);
01629 
01630   fprintf(outFP, "%s %-15s %5s > %-15s %4s: SYN\n", ts, 
01631                                      dst_host, dst_port, src_host, src_port);  
01632   syn_count++;
01633 }
01634 
01635 void log_END(char *how)
01636 {
01637   char logical_end_time[20];
01638   
01639 /* parse sourse host/port */
01640   get_host_port(current_src, src_host, src_port);
01641 
01642 /* parse destination host/port */
01643   get_host_port(current_dst, dst_host, dst_port);
01644 
01645   if (strcmp(how, "FIN") == 0)
01646      {
01647       fin_count++;
01648       strcpy(logical_end_time, FIN_sent_time);
01649      }
01650   else
01651     {
01652      if (strcmp(how, "RST") == 0)
01653         {
01654          rst_count++;
01655          strcpy(logical_end_time, RST_sent_time);
01656         }
01657      else
01658         if (strcmp(how, "TRM") == 0)
01659            {
01660             trm_count++;
01661             strcpy(logical_end_time, last_connection_time);
01662            }
01663     }
01664 
01665   /* for termination of a connection we record the tcpdump timestamp of
01666      the last record of any kind associated with that conneciton along
01667      with the TCP connection 4-tuple and the way the connection ended
01668      (FIN, Reset, or just no more records in the trace). */
01669 
01670   fprintf(outFP, "%s %-15s %5s > %-15s %4s: %s               %s\n", 
01671                                     last_connection_time, 
01672                                     dst_host, dst_port, src_host, src_port,  
01673                                     how, logical_end_time);
01674 }
01675 
01676 void log_ACT(char *how)
01677 {
01678 /* parse sourse host/port */
01679   get_host_port(current_src, src_host, src_port);
01680 
01681 /* parse destination host/port */
01682   get_host_port(current_dst, dst_host, dst_port);
01683 
01684   /* for activity on a SYN-less connection we record the tcpdump timestamp
01685      of the first record of activiy associated with that conneciton along
01686      with the TCP connection 4-tuple and the way the connection started
01687      (Request or Response). */
01688 
01689   fprintf(outFP, "%s %-15s %5s > %-15s %4s: ACT-%s\n", ts, 
01690                                     dst_host, dst_port, src_host, src_port,  
01691                                     how);
01692   if (strcmp(how, "REQ") == 0)
01693      act_req_count++;
01694   else
01695      if (strcmp(how, "RSP") == 0)
01696         act_rsp_count++;
01697 }
01698 
01699 void error_line(char * s)
01700 {
01701 /* parse sourse host/port */
01702   get_host_port(sh, src_host, src_port);
01703 
01704 /* parse destination host/port */
01705   get_host_port(dh, dst_host, dst_port);
01706 
01707   fprintf(outFP, "%s %-15s %5s > %-15s %4s: ERR: %s\n", ts, 
01708                                    dst_host, dst_port, src_host, src_port, s);
01709   err_count++;
01710 }
01711 
01712 void error_state(char * s)
01713 {
01714 /* parse sourse host/port */
01715   get_host_port(sh, src_host, src_port);
01716 
01717 /* parse destination host/port */
01718   get_host_port(dh, dst_host, dst_port);
01719 
01720   fprintf(outFP, "%s %-15s %5s > %-15s %4s: ERR: %s\n", ts, 
01721                                    dst_host, dst_port, src_host, src_port, s);
01722   err_count++;
01723 }
01724 
01725 void get_host_port(char *adr, char *host, char *port)
01726 {
01727  char *fp;
01728  char *fpx;
01729  char adr_field[50];
01730 
01731  strcpy(adr_field, adr);
01732  /* break string at '.' separating host and port fields (last in string) */
01733  fp = (char *)rindex(adr_field, '.');
01734  *fp = '\0';   /* replace '.' with string terminator */
01735  strcpy(host, adr_field); /* copies host name up to terminator */ 
01736 
01737  fp++;  /* move pointer past terminator to 1st char in port field */
01738  fpx = (char *)index(fp, ':');   /* see if we have the ':' after a dst port */
01739  if (fpx != NULL)
01740      *fpx = '\0';  /* if so, replace with string terminator */
01741  strcpy(port, fp); 
01742 }
01743 
01744 int get_sequence(char *p, unsigned long *begin, unsigned long *end,
01745                            unsigned long *bytes)
01746 {
01747  char seq_field[50];
01748  char *cursor = seq_field;
01749  char *fp;
01750 
01751  strcpy (seq_field, p);
01752 
01753  fp = (char *)strsep(&cursor, ":" );
01754  if ((cursor == (char *)NULL) ||
01755      (fp == (char *)NULL))
01756     return (-1);
01757  else
01758     *begin = strtoul(fp, (char **)NULL, 10);
01759 
01760  fp = (char *)strsep(&cursor, "(" );
01761  if ((cursor == (char *)NULL) ||
01762      (fp == (char *)NULL))
01763     return (-1);
01764  else
01765     *end = strtoul(fp, (char **)NULL, 10);
01766 
01767  fp = (char *)strsep(&cursor, ")" );
01768  if ((cursor == (char *)NULL) ||
01769      (fp == (char *)NULL))
01770     return (-1);
01771  else
01772     *bytes = strtoul(fp, (char **)NULL, 10);
01773  return(0);
01774 }
01775 
01776 /*--------------------------------------------------------------*/ 
01777 /* subtract two timevals (t1 - t0) with result in tdiff         */
01778 /* tdiff, t1 and t0 are all pointers to struct timeval          */
01779 /*--------------------------------------------------------------*/ 
01780 static void
01781 tvsub(tdiff, t1, t0)
01782 struct timeval *tdiff, *t1, *t0;
01783 {
01784 
01785         tdiff->tv_sec = t1->tv_sec - t0->tv_sec;
01786         tdiff->tv_usec = t1->tv_usec - t0->tv_usec;
01787         if (tdiff->tv_usec < 0)
01788            {
01789             tdiff->tv_sec--;
01790             tdiff->tv_usec += 1000000;
01791            }
01792 }
01793 
01794 /*--------------------------------------------------------------*/ 
01795 /* compute the elapsed time in milliseconds to end_time         */
01796 /* from some past time given by start_time (both formatted timevals) */
01797 /*--------------------------------------------------------------*/ 
01798 long elapsed_ms(char *end, char *start)
01799 {
01800  struct timeval delta, end_time, start_time;
01801  long elapsed_time;
01802 
01803  char end_tmp[20];
01804  char start_tmp[20];
01805 
01806  char *cursor;
01807  char *cp;
01808 
01809  strcpy(end_tmp, end);
01810  cursor = end_tmp;
01811  cp = (char *)strsep(&cursor, "." );
01812  end_time.tv_sec = atoi(end_tmp);
01813  end_time.tv_usec = atoi(cursor);
01814 
01815  strcpy(start_tmp, start);
01816  cursor = start_tmp;
01817  cp = (char *)strsep(&cursor, "." );
01818  start_time.tv_sec = atoi(start_tmp);
01819  start_time.tv_usec = atoi(cursor);
01820 
01821  tvsub(&delta, &end_time, &start_time);
01822  /* express as milliseconds */
01823  elapsed_time = (delta.tv_sec * 1000) + (delta.tv_usec/1000);
01824  return (elapsed_time);
01825 }
01826 

Generated on Tue Apr 20 12:14:19 2004 for NS2.26SourcesOriginal by doxygen 1.3.3