diff -r -u webalizer-2.01-10.init/Makefile.in webalizer-2.01-10/Makefile.in --- webalizer-2.01-10.init/Makefile.in 2000-10-16 21:15:53.000000000 -0700 +++ webalizer-2.01-10/Makefile.in 2007-01-23 23:02:18.000000000 -0800 @@ -23,8 +23,8 @@ MANDIR = @mandir@/man1 ETCDIR = @ETCDIR@ CC = @CC@ -CFLAGS = @CFLAGS@ -LIBS = @LIBS@ +CFLAGS = @CFLAGS@ -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -DMODIFY_LOG_LINES -Wall -Werror +LIBS = @LIBS@ -lpcre DEFS = -DETCDIR=\"@ETCDIR@\" @DEFS@ @OPTS@ LDFLAGS= @LDFLAGS@ INSTALL= @INSTALL@ diff -r -u webalizer-2.01-10.init/Makefile.std webalizer-2.01-10/Makefile.std --- webalizer-2.01-10.init/Makefile.std 2000-10-16 21:15:53.000000000 -0700 +++ webalizer-2.01-10/Makefile.std 2007-01-23 23:02:18.000000000 -0800 @@ -24,7 +24,7 @@ MANDIR = ${prefix}/man/man1 ETCDIR = /etc CC = gcc -CFLAGS = -Wall -O2 +CFLAGS = -Wall -O2 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -DMODIFY_LOG_LINES LIBS = -lgd -lpng -lz -lm LDFLAGS= -L/usr/local/lib diff -r -u webalizer-2.01-10.init/country-codes.txt webalizer-2.01-10/country-codes.txt --- webalizer-2.01-10.init/country-codes.txt 2000-09-28 20:49:25.000000000 -0700 +++ webalizer-2.01-10/country-codes.txt 2007-01-23 23:02:18.000000000 -0800 @@ -1,7 +1,8 @@ +AC Ascension Island AD Andorra AE United Arab Emirates AF Afghanistan -AG Antigua and Barbuda +AG Antigua & Barbuda AI Anguilla AL Albania AM Armenia @@ -14,7 +15,7 @@ AU Australia AW Aruba AZ Azerbaijan -BA Bosnia and Herzegovina +BA Bosnia & Herzegovina BB Barbados BD Bangladesh BE Belgium @@ -35,10 +36,11 @@ BZ Belize CA Canada CC Cocos (Keeling) Islands +CD Congo, Dem. Rep. of the CF Central African Republic -CG Congo +CG Congo, Republic of CH Switzerland -CI Cote D'Ivoire (Ivory Coast) +CI Cote D'Ivoire CK Cook Islands CL Chile CM Cameroon @@ -67,7 +69,7 @@ FI Finland FJ Fiji FK Falkland Islands (Malvinas) -FM Micronesia +FM Micronesia, Fed. State of FO Faroe Islands FR France FX France, Metropolitan @@ -76,6 +78,7 @@ GD Grenada GE Georgia GF French Guiana +GG Guernsey GH Ghana GI Gibraltar GL Greenland @@ -84,13 +87,13 @@ GP Guadeloupe GQ Equatorial Guinea GR Greece -GS S. Georgia and S. Sandwich Isls. +GS S.Georgia & S.Sandwich Isls. GT Guatemala GU Guam GW Guinea-Bissau GY Guyana HK Hong Kong -HM Heard and McDonald Islands +HM Heard & McDonald Islands HN Honduras HR Croatia (Hrvatska) HT Haiti @@ -98,12 +101,14 @@ ID Indonesia IE Ireland IL Israel +IM Isle of Man IN India -IO British Indian Ocean Territory +IO British Indian Ocean Terr. IQ Iraq IR Iran IS Iceland IT Italy +JE Jersey JM Jamaica JO Jordan JP Japan @@ -112,12 +117,13 @@ KH Cambodia KI Kiribati KM Comoros -KN Saint Kitts and Nevis +KN Saint Kitts & Nevis KP Korea (North) KR Korea (South) KW Kuwait KY Cayman Islands KZ Kazakhstan +LA Lao People's Dem. Republic LA Laos LB Lebanon LC Saint Lucia @@ -128,15 +134,15 @@ LT Lithuania LU Luxembourg LV Latvia -LY Libya +LY Libyan Arab Jamahiriya MA Morocco MC Monaco -MD Moldova +MD Moldova, Republic of MG Madagascar MH Marshall Islands -MK Macedonia +MK Macedonia, Republic of ML Mali -MM Myanmar +MM Burma (Myanmar) MN Mongolia MO Macau MP Northern Mariana Islands @@ -162,7 +168,7 @@ NR Nauru NT Neutral Zone NU Niue -NZ New Zealand (Aotearoa) +NZ New Zealand OM Oman PA Panama PE Peru @@ -171,38 +177,39 @@ PH Philippines PK Pakistan PL Poland -PM St. Pierre and Miquelon -PN Pitcairn +PM St. Pierre & Miquelon +PN Pitcairn Island PR Puerto Rico +PS Palestinian Territories PT Portugal PW Palau PY Paraguay QA Qatar -RE Reunion +RE Reunion Island RO Romania RU Russian Federation RW Rwanda SA Saudi Arabia -Sb Solomon Islands +SB Solomon Islands SC Seychelles SD Sudan SE Sweden SG Singapore SH St. Helena SI Slovenia -SJ Svalbard and Jan Mayen Islands +SJ Svalbard & Jan Mayen Isls. SK Slovak Republic SL Sierra Leone SM San Marino SN Senegal SO Somalia SR Suriname -ST Sao Tome and Principe +ST Sao Tome & Principe SU USSR (former) SV El Salvador -SY Syria +SY Syrian Arab Republic SZ Swaziland -TC Turks and Caicos Islands +TC Turks & Caicos Islands TD Chad TF French Southern Territories TG Togo @@ -214,7 +221,7 @@ TO Tonga TP East Timor TR Turkey -TT Trinidad and Tobago +TT Trinidad & Tobago TV Tuvalu TW Taiwan TZ Tanzania @@ -225,14 +232,14 @@ US United States UY Uruguay UZ Uzbekistan -VA Vatican City State (Holy See) -VC Saint Vincent and the Grenadines +VA Holy See (Vatican City) +VC Saint Vincent & the Grenadines VE Venezuela -VG Virgin Islands (British) -VI Virgin Islands (U.S.) -VN Viet Nam +VG Virgin Islands (UK) +VI Virgin Islands (US) +VN Vietnam VU Vanuatu -WF Wallis and Futuna Islands +WF Wallis & Futuna Islands WS Samoa YE Yemen YT Mayotte @@ -241,12 +248,20 @@ ZM Zambia ZR Zaire ZW Zimbabwe -COM US Commercial -EDU US Educational -GOV US Government -INT International -MIL US Military -NET Network -ORG Non-Profit Organization -ARPA Old style Arpanet -NATO Nato field +BIZ Business (biz) +COM Commercial (com) +EDU Educational (edu) +GOV US Government (gov) +INT International (int) +MIL US Military (mil) +NET Network (net) +ORG Non-Profit Organization (org) +PRO Professional firm (pro) +ARPA Old style Arpanet (arpa) +INFO Information (info) +NAME Family name (name) +NATO Nato field (nato) +A1 Anonymous Proxy +A2 Satellite Provider +AP Asia/Pacific Region +EU Europe Region diff -r -u webalizer-2.01-10.init/dns_resolv.c webalizer-2.01-10/dns_resolv.c --- webalizer-2.01-10.init/dns_resolv.c 2002-04-16 13:46:20.000000000 -0700 +++ webalizer-2.01-10/dns_resolv.c 2007-01-23 23:02:18.000000000 -0800 @@ -178,7 +178,7 @@ int i; int save_verbose=verbose; - u_long listEntries = 0; + u_int64_t listEntries = 0; struct sigaction sigPipeAction; struct stat dbStat; @@ -338,7 +338,7 @@ if (time_me || (verbose>1)) { if (verbose<2 && time_me) printf("DNS: "); - printf("%lu %s ",listEntries, msg_addresses); + printf("%lld %s ",listEntries, msg_addresses); /* get processing time (end-start) */ temp_time = (float)(end_time-start_time)/CLK_TCK; diff -r -u webalizer-2.01-10.init/dns_resolv.h webalizer-2.01-10/dns_resolv.h --- webalizer-2.01-10.init/dns_resolv.h 2000-09-28 20:51:02.000000000 -0700 +++ webalizer-2.01-10/dns_resolv.h 2007-01-23 23:02:18.000000000 -0800 @@ -33,7 +33,7 @@ #define DNS_CHILD_RUNNING 0x2 #define MAXCHILD 100 /* Maximum number of DNS children */ -#define DNS_CACHE_TTL 86400*3 /* TTL of an Entry in the DNS cache */ +#define DNS_CACHE_TTL 86400*32 /* TTL of an Entry in the DNS cache */ #endif /* USE_DNS */ #endif /* _DNS_RESOLV_H */ diff -r -u webalizer-2.01-10.init/graphs.c webalizer-2.01-10/graphs.c --- webalizer-2.01-10.init/graphs.c 2001-06-15 01:34:24.000000000 -0700 +++ webalizer-2.01-10/graphs.c 2007-01-23 23:02:18.000000000 -0800 @@ -71,7 +71,7 @@ FILE *out; /* output file for PNG */ char maxvaltxt[32]; /* graph values */ float percent; /* percent storage */ -u_long julday; /* julday value */ +u_int64_t julday; /* julday value */ struct pie_data { int x; int y; /* line x,y */ int mx; int my; }; /* midpoint x,y */ @@ -88,19 +88,19 @@ int year_graph6x( char *fname, /* file name use */ char *title, /* title for graph */ int fmonth, /* begin month number */ - u_long data1[12], /* data1 (hits) */ - u_long data2[12], /* data2 (files) */ - u_long data3[12], /* data3 (sites) */ - double data4[12], /* data4 (kbytes) */ - u_long data5[12], /* data5 (views) */ - u_long data6[12]) /* data6 (visits) */ + u_int64_t data1[MH], /* data1 (hits) */ + u_int64_t data2[MH], /* data2 (files) */ + u_int64_t data3[MH], /* data3 (sites) */ + u_int64_t data4[MH], /* data4 (kbytes) */ + u_int64_t data5[MH], /* data5 (views) */ + u_int64_t data6[MH]) /* data6 (visits) */ { /* local variables */ int i,j,x1,y1,x2; int s_mth; - u_long maxval=1; + u_int64_t maxval=1; double fmaxval=0.0; /* initalize the graph */ @@ -132,12 +132,12 @@ 238,s_month[s_mth-1],black); /* specific array */ s_mth++; if (s_mth > 12) s_mth = 1; - if (data1[i] > maxval) maxval = data1[i]; /* get max val */ - if (data2[i] > maxval) maxval = data2[i]; - if (data5[i] > maxval) maxval = data5[i]; + if (data1[MH-12+i] > maxval) maxval = data1[MH-12+i]; /* get max val */ + if (data2[MH-12+i] > maxval) maxval = data2[MH-12+i]; + if (data5[MH-12+i] > maxval) maxval = data5[MH-12+i]; } if (maxval <= 0) maxval = 1; - sprintf(maxvaltxt, "%lu", maxval); + sprintf(maxvaltxt, "%lld", maxval); gdImageStringUp(im,gdFontSmall,8,26+(strlen(maxvaltxt)*6),maxvaltxt,black); if (graph_legend) /* print color coded legends? */ @@ -177,7 +177,7 @@ for (i=0; i<12; i++) { if (s_mth > 12) s_mth = 1; - percent = ((float)data1[s_mth++ -1] / (float)maxval); + percent = ((float)data1[MH-12+s_mth++ -1] / (float)maxval); if (percent <= 0.0) continue; x1 = 26 + (i*23); x2 = x1 + 13; @@ -191,7 +191,7 @@ for (i=0; i<12; i++) { if (s_mth > 12) s_mth = 1; - percent = ((float)data2[s_mth++ -1] / (float)maxval); + percent = ((float)data2[MH-12+s_mth++ -1] / (float)maxval); if (percent <= 0.0) continue; x1 = 29 + (i*23); x2 = x1 + 13; @@ -205,7 +205,7 @@ for (i=0; i<12; i++) { if (s_mth > 12) s_mth = 1; - percent = ((float)data5[s_mth++ -1] / (float)maxval); + percent = ((float)data5[MH-12+s_mth++ -1] / (float)maxval); if (percent <= 0.0) continue; x1 = 32 + (i*23); x2 = x1 + 13; @@ -217,11 +217,11 @@ maxval=0; for (i=0; i<12; i++) { - if (data3[i] > maxval) maxval = data3[i]; /* get max val */ - if (data6[i] > maxval) maxval = data6[i]; + if (data3[MH-12+i] > maxval) maxval = data3[MH-12+i]; /* get max val */ + if (data6[MH-12+i] > maxval) maxval = data6[MH-12+i]; } if (maxval <= 0) maxval = 1; - sprintf(maxvaltxt, "%lu", maxval); + sprintf(maxvaltxt, "%lld", maxval); gdImageStringUp(im, gdFontSmall,493,26+(strlen(maxvaltxt)*6), maxvaltxt, black); @@ -230,7 +230,7 @@ for (i=0; i<12; i++) { if (s_mth > 12) s_mth = 1; - percent = ((float)data6[s_mth++ -1] / (float)maxval); + percent = ((float)data6[MH-12+s_mth++ -1] / (float)maxval); if (percent <= 0.0) continue; x1 = 310 + (i*15); x2 = x1 + 8; @@ -244,7 +244,7 @@ for (i=0; i<12; i++) { if (s_mth > 12) s_mth = 1; - percent = ((float)data3[s_mth++ -1] / (float)maxval); + percent = ((float)data3[MH-12+s_mth++ -1] / (float)maxval); if (percent <= 0.0) continue; x1 = 314 + (i*15); x2 = x1 + 7; @@ -255,7 +255,7 @@ fmaxval=0.0; for (i=0; i<12; i++) - if (data4[i] > fmaxval) fmaxval = data4[i]; /* get max val */ + if (data4[MH-12+i] > fmaxval) fmaxval = data4[MH-12+i];/* get max val */ if (fmaxval <= 0.0) fmaxval = 1.0; sprintf(maxvaltxt, "%.0f", fmaxval); gdImageStringUp(im, gdFontSmall,493,130+(strlen(maxvaltxt)*6), @@ -266,7 +266,7 @@ for (i=0; i<12; i++) { if (s_mth > 12) s_mth = 1; - percent = ((float)data4[s_mth++ -1] / (float)fmaxval); + percent = ((float)data4[MH-12+s_mth++ -1] / (float)fmaxval); if (percent <= 0.0) continue; x1 = 311 + (i*15); x2 = x1 + 9; @@ -299,17 +299,17 @@ char *title, /* graph title */ int month, /* graph month */ int year, /* graph year */ - u_long data1[31], /* data1 (hits) */ - u_long data2[31], /* data2 (files) */ - u_long data3[31], /* data3 (sites) */ - double data4[31], /* data4 (kbytes) */ - u_long data5[31], /* data5 (views) */ - u_long data6[31]) /* data6 (visits) */ + u_int64_t data1[31], /* data1 (hits) */ + u_int64_t data2[31], /* data2 (files) */ + u_int64_t data3[31], /* data3 (sites) */ + u_int64_t data4[31], /* data4 (kbytes) */ + u_int64_t data5[31], /* data5 (views) */ + u_int64_t data6[31]) /* data6 (visits) */ { /* local variables */ int i,j,s,x1,y1,x2; - u_long maxval=0; + u_int64_t maxval=0; double fmaxval=0.0; /* calc julian date for month */ @@ -354,7 +354,7 @@ if (data5[i] > maxval) maxval = data5[i]; } if (maxval <= 0) maxval = 1; - sprintf(maxvaltxt, "%lu", maxval); + sprintf(maxvaltxt, "%lld", maxval); gdImageStringUp(im, gdFontSmall,8,26+(strlen(maxvaltxt)*6), maxvaltxt,black); @@ -434,7 +434,7 @@ if (data6[i]>maxval) maxval = data6[i]; } if (maxval <= 0) maxval = 1; - sprintf(maxvaltxt, "%lu", maxval); + sprintf(maxvaltxt, "%lld", maxval); gdImageStringUp(im, gdFontSmall,8,180+(strlen(maxvaltxt)*6), maxvaltxt, black); @@ -502,14 +502,14 @@ int day_graph3( char *fname, char *title, - u_long data1[24], - u_long data2[24], - u_long data3[24]) + u_int64_t data1[24], + u_int64_t data2[24], + u_int64_t data3[24]) { /* local variables */ int i,j,s,x1,y1,x2; - u_long maxval=0; + u_int64_t maxval=0; /* initalize the graph */ init_graph(title,512,256); @@ -531,7 +531,7 @@ if (data3[i] > maxval) maxval = data3[i]; } if (maxval <= 0) maxval = 1; - sprintf(maxvaltxt, "%lu", maxval); + sprintf(maxvaltxt, "%lld", maxval); gdImageStringUp(im, gdFontSmall, 8, 26+(strlen(maxvaltxt)*6), maxvaltxt, black); @@ -607,8 +607,8 @@ /* */ /*****************************************************************/ -int pie_chart(char *fname, char *title, u_long t_val, - u_long data1[], char *legend[]) +int pie_chart(char *fname, char *title, u_int64_t t_val, + u_int64_t data1[], char *legend[]) { int i,x,percent,y=47; double s_arc=0.0; diff -r -u webalizer-2.01-10.init/graphs.h webalizer-2.01-10/graphs.h --- webalizer-2.01-10.init/graphs.h 2000-09-28 20:50:30.000000000 -0700 +++ webalizer-2.01-10/graphs.h 2007-01-23 23:02:18.000000000 -0800 @@ -1,11 +1,11 @@ #ifndef _GRAPHS_H #define _GRAPHS_H -extern int month_graph6(char *, char *, int, int, u_long *, - u_long *, u_long *, double *, u_long *, u_long *); +extern int month_graph6(char *, char *, int, int, u_int64_t *, + u_int64_t *, u_int64_t *, u_int64_t *, u_int64_t *, u_int64_t *); extern int year_graph6x(char *, char *, int, - u_long *, u_long *, u_long *, double *, u_long *, u_long *); -extern int day_graph3(char *, char *, u_long *, u_long *, u_long *); -extern int pie_chart(char *, char *, u_long, u_long *, char **); + u_int64_t *, u_int64_t *, u_int64_t *, u_int64_t *, u_int64_t *, u_int64_t *); +extern int day_graph3(char *, char *, u_int64_t *, u_int64_t *, u_int64_t *); +extern int pie_chart(char *, char *, u_int64_t, u_int64_t *, char **); #endif /* _GRAPHS_H */ diff -r -u webalizer-2.01-10.init/hashtab.c webalizer-2.01-10/hashtab.c --- webalizer-2.01-10.init/hashtab.c 2001-06-15 01:34:24.000000000 -0700 +++ webalizer-2.01-10/hashtab.c 2007-01-23 23:02:18.000000000 -0800 @@ -75,11 +75,12 @@ #ifdef USE_DNS DNODEPTR new_dnode(char *); /* new DNS node */ #endif /* USE_DNS */ +PNODEPTR new_pnode(char *); /* new dir node */ void update_entry(char *); /* update entry/exit */ void update_exit(char *); /* page totals */ -u_long hash(char *); /* hash function */ +u_int64_t hash(char *); /* hash function */ /* local data */ @@ -93,6 +94,7 @@ #ifdef USE_DNS DNODEPTR host_table[MAXHASH]; /* DNS hash table */ #endif /* USE_DNS */ +PNODEPTR di_htab[MAXHASH]; /* for URL dir stats */ /*********************************************/ @@ -111,6 +113,7 @@ #ifdef USE_DNS /* del_dlist(host_table); */ /* delete DNS hash table */ #endif /* USE_DNS */ + del_plist(di_htab); } /*********************************************/ @@ -154,12 +157,12 @@ int put_hnode( char *str, /* Hostname */ int type, /* obj type */ - u_long count, /* hit count */ - u_long file, /* File flag */ - double xfer, /* xfer size */ - u_long *ctr, /* counter */ - u_long visit, /* visits */ - u_long tstamp,/* timestamp */ + u_int64_t count, /* hit count */ + u_int64_t file, /* File flag */ + u_int64_t xfer, /* xfer size */ + u_int64_t *ctr, /* counter */ + u_int64_t visit, /* visits */ + u_int64_t tstamp,/* timestamp */ char *lasturl, /* lasturl */ HNODEPTR *htab) /* ptr>next */ { @@ -341,8 +344,8 @@ /* PUT_UNODE - insert/update URL node */ /*********************************************/ -int put_unode(char *str, int type, u_long count, double xfer, - u_long *ctr, u_long entry, u_long exit, UNODEPTR *htab) +int put_unode(char *str, int type, u_int64_t count, u_int64_t xfer, + u_int64_t *ctr, u_int64_t entry, u_int64_t exit, UNODEPTR *htab) { UNODEPTR cptr,nptr; @@ -468,7 +471,7 @@ /* PUT_RNODE - insert/update referrer node */ /*********************************************/ -int put_rnode(char *str, int type, u_long count, u_long *ctr, RNODEPTR *htab) +int put_rnode(char *str, int type, u_int64_t count, u_int64_t *ctr, RNODEPTR *htab) { RNODEPTR cptr,nptr; @@ -516,7 +519,7 @@ if (nptr!=NULL) { if (type==OBJ_GRP) nptr->flag=OBJ_GRP; - else if (isinlist(hidden_refs,nptr->string)!=NULL) + else if (isinlist(hidden_refs,skip_proto(nptr->string))!=NULL) nptr->flag=OBJ_HIDE; } return nptr==NULL; @@ -587,7 +590,7 @@ /* PUT_ANODE - insert/update user agent node */ /*********************************************/ -int put_anode(char *str, int type, u_long count, u_long *ctr, ANODEPTR *htab) +int put_anode(char *str, int type, u_int64_t count, u_int64_t *ctr, ANODEPTR *htab) { ANODEPTR cptr,nptr; @@ -702,7 +705,7 @@ /* PUT_SNODE - insert/update search str node */ /*********************************************/ -int put_snode(char *str, u_long count, SNODEPTR *htab) +int put_snode(char *str, u_int64_t count, SNODEPTR *htab) { SNODEPTR cptr,nptr; @@ -810,12 +813,12 @@ int put_inode( char *str, /* ident str */ int type, /* obj type */ - u_long count, /* hit count */ - u_long file, /* File flag */ - double xfer, /* xfer size */ - u_long *ctr, /* counter */ - u_long visit, /* visits */ - u_long tstamp,/* timestamp */ + u_int64_t count, /* hit count */ + u_int64_t file, /* File flag */ + u_int64_t xfer, /* xfer size */ + u_int64_t *ctr, /* counter */ + u_int64_t visit, /* visits */ + u_int64_t tstamp,/* timestamp */ INODEPTR *htab) /* hashtable */ { INODEPTR cptr,nptr; @@ -1044,14 +1047,148 @@ #endif /* USE_DNS */ /*********************************************/ -/* HASH - return hash value for string */ +/* NEW_PNODE - dir node creation */ /*********************************************/ -u_long hash(char *str) +PNODEPTR new_pnode(char *str) { - u_long hashval; - for (hashval = 0; *str != '\0'; str++) - hashval = *str + 31 * hashval; + PNODEPTR newptr; + char *sptr; + + if (strlen(str) >= MAXURLH) + { + if (verbose) + { + fprintf(stderr,"[new_pnode] %s (%d)",msg_big_one,strlen(str)); + if (debug_mode) + fprintf(stderr,":\n--> %s",str); + fprintf(stderr,"\n"); + } + str[MAXURLH-1]=0; + } + + if ( (sptr=malloc(strlen(str)+1))==NULL) return (PNODEPTR)NULL; + strcpy(sptr,str); + + if (( newptr = malloc(sizeof(struct pnode))) != NULL) + { + newptr->string=sptr; + newptr->count = 0; + newptr->flag = OBJ_REG; + } + else free(sptr); + return newptr; +} + +/*********************************************/ +/* PUT_PNODE - insert/update dir node */ +/*********************************************/ + +int put_pnode(char *str, int type, u_int64_t count, u_int64_t xfer, + u_int64_t *ctr, PNODEPTR *htab) +{ + PNODEPTR cptr,nptr; + + if (str[0]=='-') return 0; + + /* check if hashed */ + if ( (cptr = htab[hash(str)]) == NULL) + { + /* not hashed */ + if ( (nptr=new_pnode(str)) != NULL) + { + nptr->flag = type; + nptr->count= count; + nptr->xfer = xfer; + nptr->next = NULL; + htab[hash(str)] = nptr; + (*ctr)++; + } + } + else + { + /* hashed */ + while (cptr != NULL) + { + if (strcmp(cptr->string,str)==0) + { + if (type==cptr->flag) + { + /* found... bump counter */ + cptr->count+=count; + cptr->xfer += xfer; + return 0; + } + } + cptr = cptr->next; + } + /* not found... */ + if ( (nptr = new_pnode(str)) != NULL) + { + nptr->flag = type; + nptr->count= count; + nptr->xfer = xfer; + nptr->next = htab[hash(str)]; + htab[hash(str)]=nptr; + (*ctr)++; + } + } + if (nptr!=NULL) + { + if (isinlist(hidden_dirs,nptr->string)!=NULL) + nptr->flag=OBJ_HIDE; + } + return nptr==NULL; +} + +/*********************************************/ +/* DEL_PLIST - delete dir hash table */ +/*********************************************/ + +void del_plist(PNODEPTR *htab) +{ + /* free memory used by hash table */ + PNODEPTR aptr,temp; + int i; + + for (i=0;inext; + free (aptr->string); /* free up dir string memory */ + free (aptr); /* free up dir struct node */ + aptr = temp; + } + htab[i]=NULL; + } + } +} + +/************************************************************/ +/* HASH - return 64 bit FNV hash value for string */ +/* */ +/* See http://www.isthe.com/chongo/tech/comp/fnv/index.html */ +/************************************************************/ + +u_int64_t hash(char *str) +{ + u_int64_t hashval; + + /* perform a 64 bit Fowler/Noll/Vo FNV-1a hash onb the string */ + for (hashval = FNV1A_64_INIT; *str != '\0'; str++) { + /* xor the bottom with the current octet */ + hashval ^= (u_int64_t)*str; +#if defined(NO_FNV_GCC_OPTIMIZATION) + hashval *= FNV_64_PRIME; +#else + hashval += (hashval << 1) + (hashval << 4) + (hashval << 5) + + (hashval << 7) + (hashval << 8) + (hashval << 40); +#endif + } return hashval % MAXHASH; } @@ -1133,7 +1270,7 @@ /* MONTH_UPDATE_EXIT - eom exit page update */ /*********************************************/ -void month_update_exit(u_long tstamp) +void month_update_exit(u_int64_t tstamp) { HNODEPTR nptr; int i; @@ -1157,10 +1294,10 @@ /* TOT_VISIT - calculate total visits */ /*********************************************/ -u_long tot_visit(HNODEPTR *list) +u_int64_t tot_visit(HNODEPTR *list) { HNODEPTR hptr; - u_long tot=0; + u_int64_t tot=0; int i; for (i=0;i[%s]\n",msg_hlnk_hs); if (ntop_urls || ntop_urlsK) fprintf(out_fp,"[%s]\n",msg_hlnk_u); + if (ntop_dirs || ntop_dirsK) + fprintf(out_fp,"[%s]\n",msg_hlnk_d); if (ntop_entry) fprintf(out_fp,"[%s]\n",msg_hlnk_en); if (ntop_exit) @@ -508,8 +537,8 @@ void month_total_table() { int i,days_in_month; - u_long max_files=0,max_hits=0,max_visits=0,max_pages=0; - double max_xfer=0.0; + u_int64_t max_files=0,max_hits=0,max_visits=0,max_pages=0; + u_int64_t max_xfer=0; days_in_month=(l_day-f_day)+1; for (i=0;i<31;i++) @@ -528,102 +557,146 @@ fprintf(out_fp,"\n"); /* Total Hits */ fprintf(out_fp,"%s\n" \ - "%lu" \ + "%llu" \ "\n",msg_mtot_th,t_hit); + /* Normal Hits */ + fprintf(out_fp,"" \ + "%s\n" \ + "%lld" \ + "\n",msg_mtot_nh,t_code2xx304); + /* Abnormal Hits */ + fprintf(out_fp,"" \ + "%s\n" \ + "%lld" \ + "\n",msg_mtot_anh,t_hit-t_code2xx304); + fprintf(out_fp,"\n"); /* Total Files */ fprintf(out_fp,"%s\n" \ - "%lu" \ + "%lld" \ "\n",msg_mtot_tf,t_file); /* Total Pages */ fprintf(out_fp,"%s %s\n" \ - "%lu" \ + "%lld" \ "\n",msg_h_total, msg_h_pages, t_page); /* Total Visits */ fprintf(out_fp,"%s %s\n" \ - "%lu" \ + "%lld" \ "\n",msg_h_total, msg_h_visits, t_visit); /* Total XFer */ fprintf(out_fp,"%s\n" \ - "%.0f" \ + "%lld" \ "\n",msg_mtot_tx,t_xfer/1024); fprintf(out_fp,"\n"); /**********************************************/ /* Unique Sites */ fprintf(out_fp,"" \ "%s\n" \ - "%lu" \ + "%lld" \ "\n",msg_mtot_us,t_site); /* Unique URL's */ fprintf(out_fp,"" \ "%s\n" \ - "%lu" \ + "%lld" \ "\n",msg_mtot_uu,t_url); /* Unique Referrers */ if (t_ref != 0) fprintf(out_fp,"" \ "%s\n" \ - "%lu" \ + "%lld" \ "\n",msg_mtot_ur,t_ref); /* Unique Usernames */ if (t_user != 0) fprintf(out_fp,"" \ "%s\n" \ - "%lu" \ + "%lld" \ "\n",msg_mtot_ui,t_user); /* Unique Agents */ if (t_agent != 0) fprintf(out_fp,"" \ "%s\n" \ - "%lu" \ + "%lld" \ "\n",msg_mtot_ua,t_agent); fprintf(out_fp,"\n"); /**********************************************/ /* Hourly/Daily avg/max totals */ fprintf(out_fp,"" \ ".\n"\ - "" \ + "" \ "%s \n" \ - "" \ + "" \ "%s \n", GREY,GREY,GREY,msg_h_avg,GREY,msg_h_max); fprintf(out_fp,"\n"); /* Max/Avg Hits per Hour */ fprintf(out_fp,"" \ "%s\n" \ - "%lu\n" \ - "%lu" \ - "\n",msg_mtot_mhh, t_hit/(24*days_in_month),mh_hit); + "%.2Lf\n"\ + "%lld" \ + "\n",msg_mtot_mhh, + (long double)t_hit/(24*days_in_month),mh_hit); /* Max/Avg Hits per Day */ fprintf(out_fp,"" \ "%s\n" \ - "%lu\n" \ - "%lu" \ - "\n",msg_mtot_mhd, t_hit/days_in_month, max_hits); + "%.2Lf\n"\ + "%llu" \ + "\n",msg_mtot_mhd, + (long double)t_hit/days_in_month, max_hits); + fprintf(out_fp,"\n"); + /* Avg Normal Hits per Hour */ + fprintf(out_fp,"" \ + "%s\n" \ + "%.2Lf\n" \ + "\n", \ + msg_mtot_mnhh, (long double)t_code2xx304/(24*days_in_month),BLACK); + /* Avg Normal Hits per Day */ + fprintf(out_fp,"" \ + "%s\n" \ + "%.2Lf\n" \ + "\n", \ + msg_mtot_mnhd, (long double)t_code2xx304/days_in_month,BLACK); + /* Avg Abormal Hits per Hour */ + fprintf(out_fp,"" \ + "%s\n" \ + "%.2Lf\n" \ + "\n", \ + msg_mtot_manhh, + (long double)(t_hit-t_code2xx304)/(24*days_in_month),BLACK); + /* Avg Abormal Hits per Day */ + fprintf(out_fp,"" \ + "%s\n" \ + "%.2Lf\n" \ + "\n", \ + msg_mtot_manhd, + (long double)(t_hit-t_code2xx304)/days_in_month,BLACK); + fprintf(out_fp,"\n"); /* Max/Avg Files per Day */ fprintf(out_fp,"" \ "%s\n" \ - "%lu\n" \ - "%lu" \ - "\n",msg_mtot_mfd, t_file/days_in_month,max_files); + "%.2Lf\n"\ + "%lld" \ + "\n",msg_mtot_mfd, + (long double)t_file/days_in_month,max_files); /* Max/Avg Pages per Day */ fprintf(out_fp,"" \ "%s\n" \ - "%lu\n" \ - "%lu" \ - "\n",msg_mtot_mpd, t_page/days_in_month,max_pages); + "%.2Lf\n"\ + "%lld" \ + "\n",msg_mtot_mpd, + (long double)t_page/days_in_month,max_pages); /* Max/Avg Visits per Day */ fprintf(out_fp,"" \ "%s\n" \ - "%lu\n" \ - "%lu" \ - "\n",msg_mtot_mvd, t_visit/days_in_month,max_visits); + "%.2Lf\n"\ + "%lld" \ + "\n",msg_mtot_mvd, + (long double)t_visit/days_in_month,max_visits); /* Max/Avg KBytes per Day */ fprintf(out_fp,"" \ "%s\n" \ - "%.0f\n" \ - "%.0f" \ + "%.2Lf\n"\ + "%lld" \ "\n",msg_mtot_mkd, - (t_xfer/1024)/days_in_month,max_xfer/1024); + (long double)(t_xfer/1024)/days_in_month,max_xfer/1024); fprintf(out_fp,"\n"); /**********************************************/ /* response code totals */ @@ -634,7 +707,7 @@ { if (response[i].count != 0) fprintf(out_fp,"%s\n" \ - "%lu" \ + "%lld" \ "\n", response[i].desc, response[i].count); } @@ -693,28 +766,28 @@ fprintf(out_fp,"" \ "%d\n", i+1); fprintf(out_fp,"" \ - "%lu\n" \ - "%3.02f%%\n", + "%lld\n" \ + "%3.02Lf%%\n", tm_hit[i],PCENT(tm_hit[i],t_hit)); fprintf(out_fp,"" \ - "%lu\n" \ - "%3.02f%%\n", + "%lld\n" \ + "%3.02Lf%%\n", tm_file[i],PCENT(tm_file[i],t_file)); fprintf(out_fp,"" \ - "%lu\n" \ - "%3.02f%%\n", + "%lld\n" \ + "%3.02Lf%%\n", tm_page[i],PCENT(tm_page[i],t_page)); fprintf(out_fp,"" \ - "%lu\n" \ - "%3.02f%%\n", + "%lld\n" \ + "%3.02Lf%%\n", tm_visit[i],PCENT(tm_visit[i],t_visit)); fprintf(out_fp,"" \ - "%lu\n" \ - "%3.02f%%\n", + "%lld\n" \ + "%3.02Lf%%\n", tm_site[i],PCENT(tm_site[i],t_site)); fprintf(out_fp,"" \ - "%.0f\n" \ - "%3.02f%%\n", + "%lld\n" \ + "%3.02Lf%%\n", tm_xfer[i]/1024,PCENT(tm_xfer[i],t_xfer)); } fprintf(out_fp,"\n"); @@ -729,8 +802,8 @@ void hourly_total_table() { int i,days_in_month; - u_long avg_file=0; - double avg_xfer=0.0; + u_int64_t avg_file=0; + u_int64_t avg_xfer=0; days_in_month=(l_day-f_day)+1; @@ -783,27 +856,27 @@ fprintf(out_fp,"" \ "%d\n",i); fprintf(out_fp, - "%lu\n" \ - "%lu\n" \ - "%3.02f%%\n", + "%lld\n" \ + "%lld\n" \ + "%3.02Lf%%\n", th_hit[i]/days_in_month,th_hit[i], PCENT(th_hit[i],t_hit)); fprintf(out_fp, - "%lu\n" \ - "%lu\n" \ - "%3.02f%%\n", + "%lld\n" \ + "%lld\n" \ + "%3.02Lf%%\n", th_file[i]/days_in_month,th_file[i], PCENT(th_file[i],t_file)); fprintf(out_fp, - "%lu\n" \ - "%lu\n" \ - "%3.02f%%\n", + "%lld\n" \ + "%lld\n" \ + "%3.02Lf%%\n", th_page[i]/days_in_month,th_page[i], PCENT(th_page[i],t_page)); fprintf(out_fp, - "%.0f\n" \ - "%.0f\n" \ - "%3.02f%%\n", + "%lld\n" \ + "%lld\n" \ + "%3.02Lf%%\n", (th_xfer[i]/days_in_month)/1024,th_xfer[i]/1024, PCENT(th_xfer[i],t_xfer)); avg_file += th_file[i]/days_in_month; @@ -819,8 +892,9 @@ void top_sites_table(int flag) { - u_long cnt=0, h_reg=0, h_grp=0, h_hid=0, tot_num; + u_int64_t cnt=0, h_reg=0, h_grp=0, h_hid=0, tot_num; int i; + u_int64_t tmp; HNODEPTR hptr, *pointer; cnt=a_ctr; pointer=h_array; @@ -837,8 +911,8 @@ } if ( (tot_num=h_reg+h_grp)==0 ) return; /* split if none */ - i=(flag)?ntop_sitesK:ntop_sites; /* Hits or KBytes?? */ - if (tot_num > i) tot_num = i; /* get max to do... */ + tmp=(flag)?ntop_sitesK:ntop_sites; /* Hits or KBytes?? */ + if (tot_num > tmp) tot_num = tmp; /* get max to do... */ if ((!flag) || (flag&&!ntop_sites)) /* now do tag */ fprintf(out_fp,"\n"); @@ -846,12 +920,13 @@ fprintf(out_fp,"\n"); fprintf(out_fp,"\n"); if (flag) fprintf(out_fp,"\n", + "%s %lld %s %lld %s %s %s\n", GREY, msg_top_top,tot_num,msg_top_of, t_site,msg_top_s,msg_h_by,msg_h_xfer); else fprintf(out_fp,"\n", - GREY,msg_top_top, tot_num, msg_top_of, t_site, msg_top_s); + "%s %lld %s %lld %s %s %s\n", + GREY,msg_top_top, tot_num, msg_top_of, t_site, + msg_top_s,msg_h_by,msg_h_hits); fprintf(out_fp,"\n"); fprintf(out_fp,"\n",GREY); @@ -880,20 +955,20 @@ fprintf(out_fp, "\n" \ - "\n" \ - "\n" \ - "\n" \ - "\n" \ - "\n" \ - "\n" \ - "\n" \ - "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ "\n", @@ -908,7 +983,7 @@ fprintf(out_fp,"\n"); if ((!flag) || (flag&&!ntop_sites)) { - if ( (all_sites) && ((h_reg+h_grp)>ntop_sites) ) + if ( (all_sites) && ((h_reg+h_grp)>(unsigned int)ntop_sites) ) { if (all_sites_page(h_reg, h_grp)) { @@ -930,7 +1005,7 @@ /* ALL_SITES_PAGE - HTML page of all sites */ /*********************************************/ -int all_sites_page(u_long h_reg, u_long h_grp) +int all_sites_page(u_int64_t h_reg, u_int64_t h_grp) { HNODEPTR hptr, *pointer; char site_fname[256], buffer[256]; @@ -961,12 +1036,12 @@ if (hptr->flag == OBJ_GRP) { fprintf(out_fp, - "%-8lu %6.02f%% %8lu %6.02f%% %8.0f %6.02f%% %8lu %6.02f%% %s\n", + "%-8llu %6.02Lf%% %8llu %6.02Lf%% %8llu %6.02Lf%% %8llu %6.02Lf%% %s\n", hptr->count, - (t_hit==0)?0:((float)hptr->count/t_hit)*100.0,hptr->files, - (t_file==0)?0:((float)hptr->files/t_file)*100.0,hptr->xfer/1024, - (t_xfer==0)?0:((float)hptr->xfer/t_xfer)*100.0,hptr->visit, - (t_visit==0)?0:((float)hptr->visit/t_visit)*100.0, + (t_hit==0)?0:((long double)hptr->count/t_hit)*100.0,hptr->files, + (t_file==0)?0:((long double)hptr->files/t_file)*100.0,hptr->xfer/1024, + (t_xfer==0)?0:((long double)hptr->xfer/t_xfer)*100.0,hptr->visit, + (t_visit==0)?0:((long double)hptr->visit/t_visit)*100.0, hptr->string); h_grp--; } @@ -982,12 +1057,12 @@ if (hptr->flag == OBJ_REG) { fprintf(out_fp, - "%-8lu %6.02f%% %8lu %6.02f%% %8.0f %6.02f%% %8lu %6.02f%% %s\n", + "%-8llu %6.02Lf%% %8llu %6.02Lf%% %8llu %6.02Lf%% %8llu %6.02Lf%% %s\n", hptr->count, - (t_hit==0)?0:((float)hptr->count/t_hit)*100.0,hptr->files, - (t_file==0)?0:((float)hptr->files/t_file)*100.0,hptr->xfer/1024, - (t_xfer==0)?0:((float)hptr->xfer/t_xfer)*100.0,hptr->visit, - (t_visit==0)?0:((float)hptr->visit/t_visit)*100.0, + (t_hit==0)?0:((long double)hptr->count/t_hit)*100.0,hptr->files, + (t_file==0)?0:((long double)hptr->files/t_file)*100.0,hptr->xfer/1024, + (t_xfer==0)?0:((long double)hptr->xfer/t_xfer)*100.0,hptr->visit, + (t_visit==0)?0:((long double)hptr->visit/t_visit)*100.0, hptr->string); h_reg--; } @@ -1005,7 +1080,10 @@ void top_urls_table(int flag) { - u_long cnt=0,u_reg=0,u_grp=0,u_hid=0, tot_num; + u_int64_t cnt=0,u_reg=0,u_grp=0,u_hid=0, tot_num; + u_int64_t tmp; + u_int64_t top_count=0; + u_int64_t top_xfer=0; int i; UNODEPTR uptr, *pointer; @@ -1023,26 +1101,27 @@ } if ( (tot_num=u_reg+u_grp)==0 ) return; /* split if none */ - i=(flag)?ntop_urlsK:ntop_urls; /* Hits or KBytes?? */ - if (tot_num > i) tot_num = i; /* get max to do... */ + tmp=(flag)?ntop_urlsK:ntop_urls; /* Hits or KBytes?? */ + if (tot_num > tmp) tot_num = tmp; /* get max to do... */ if ((!flag) || (flag&&!ntop_urls)) /* now do tag */ fprintf(out_fp,"\n"); fprintf(out_fp,"
" \ - "%s %lu %s %lu %s %s %s
" \ - "%s %lu %s %lu %s
" \ "#%d%lu%3.02f%%%lu%3.02f%%%.0f%3.02f%%%lu%3.02f%%%lld%3.02Lf%%%lld%3.02Lf%%%lld%3.02Lf%%%lld%3.02Lf%%", i+1,hptr->count, - (t_hit==0)?0:((float)hptr->count/t_hit)*100.0,hptr->files, - (t_file==0)?0:((float)hptr->files/t_file)*100.0,hptr->xfer/1024, - (t_xfer==0)?0:((float)hptr->xfer/t_xfer)*100.0,hptr->visit, - (t_visit==0)?0:((float)hptr->visit/t_visit)*100.0); + (t_hit==0)?0:((long double)hptr->count/t_hit)*100.0,hptr->files, + (t_file==0)?0:((long double)hptr->files/t_file)*100.0,hptr->xfer/1024, + (t_xfer==0)?0:((long double)hptr->xfer/t_xfer)*100.0,hptr->visit, + (t_visit==0)?0:((long double)hptr->visit/t_visit)*100.0); if ((hptr->flag==OBJ_GRP)&&hlite_groups) fprintf(out_fp,"%s
\n"); fprintf(out_fp,"\n"); if (flag) fprintf(out_fp,"\n", + "%s %lld %s %lld %s %s %s\n", GREY,msg_top_top,tot_num,msg_top_of, t_url,msg_top_u,msg_h_by,msg_h_xfer); else fprintf(out_fp,"\n", - GREY,msg_top_top,tot_num,msg_top_of,t_url,msg_top_u); + "%s %lld %s %lld %s %s %s\n", + GREY,msg_top_top,tot_num,msg_top_of,t_url, + msg_top_u,msg_h_by,msg_h_nhits); fprintf(out_fp,"\n"); fprintf(out_fp,"\n",GREY); fprintf(out_fp,"\n", - DKGREEN,msg_h_hits); + DKGREEN,msg_h_nhits); fprintf(out_fp,"\n", RED,msg_h_xfer); @@ -1064,15 +1143,17 @@ fprintf(out_fp, "\n" \ - "\n" \ - "\n" \ - "\n"\ - "\n" \ + "\n" \ + "\n" \ + "\n"\ + "\n" \ "\n"); + fprintf(out_fp, + "\n" \ + "\n" \ + "\n" \ + "\n"\ + "\n" \ + "\n", + (t_code2xx304-top_count), + (t_code2xx304==0)?0:((long double)(t_code2xx304-top_count)/t_code2xx304)*100.0, + (t_xfer-top_xfer)/1024, + (t_xfer==0)?0:((long double)(t_xfer-top_xfer)/t_xfer)*100.0, + msg_other_u); + fprintf(out_fp,"\n"); + fprintf(out_fp,"\n"); + fprintf(out_fp, + "\n" \ + "\n" \ + "\n" \ + "\n"\ + "\n" \ + "\n", + t_code2xx304, + (long double)100.0, + t_xfer/1024, + (long double)100.0, + msg_all_u); + fprintf(out_fp,"\n"); if ((!flag) || (flag&&!ntop_urls)) { - if ( (all_urls) && ((u_reg+u_grp)>ntop_urls) ) + if ( (all_urls) && ((u_reg+u_grp)>(unsigned int)ntop_urls) ) { if (all_urls_page(u_reg, u_grp)) { @@ -1135,7 +1243,7 @@ /* ALL_URLS_PAGE - HTML page of all urls */ /*********************************************/ -int all_urls_page(u_long u_reg, u_long u_grp) +int all_urls_page(u_int64_t u_reg, u_int64_t u_grp) { UNODEPTR uptr, *pointer; char url_fname[256], buffer[256]; @@ -1165,11 +1273,11 @@ uptr=*pointer++; if (uptr->flag == OBJ_GRP) { - fprintf(out_fp,"%-8lu %6.02f%% %8.0f %6.02f%% %s\n", + fprintf(out_fp,"%-8llu %6.02Lf%% %-8llu %6.02Lf%% %s\n", uptr->count, - (t_hit==0)?0:((float)uptr->count/t_hit)*100.0, + (t_hit==0)?0:((long double)uptr->count/t_hit)*100.0, uptr->xfer/1024, - (t_xfer==0)?0:((float)uptr->xfer/t_xfer)*100.0, + (t_xfer==0)?0:((long double)uptr->xfer/t_xfer)*100.0, uptr->string); u_grp--; } @@ -1184,11 +1292,11 @@ uptr=*pointer++; if (uptr->flag == OBJ_REG) { - fprintf(out_fp,"%-8lu %6.02f%% %8.0f %6.02f%% %s\n", + fprintf(out_fp,"%-8llu %6.02Lf%% %-8llu %6.02Lf%% %s\n", uptr->count, - (t_hit==0)?0:((float)uptr->count/t_hit)*100.0, + (t_hit==0)?0:((long double)uptr->count/t_hit)*100.0, uptr->xfer/1024, - (t_xfer==0)?0:((float)uptr->xfer/t_xfer)*100.0, + (t_xfer==0)?0:((long double)uptr->xfer/t_xfer)*100.0, uptr->string); u_reg--; } @@ -1201,13 +1309,228 @@ } /*********************************************/ +/* TOP_DIRS_TABLE - generate top n dir table */ +/*********************************************/ + +void top_dirs_table(int flag) +{ + u_int64_t cnt=0,d_reg=0,d_grp=0,d_hid=0, tot_num; + u_int64_t tmp; + u_int64_t top_count=0; + u_int64_t top_xfer=0; + int i; + PNODEPTR pptr, *pointer; + + cnt=a_ctr; pointer=d_array; + while (cnt--) + { + /* calculate totals */ + switch ( (int)((PNODEPTR)(*pointer)->flag) ) + { + case OBJ_REG: d_reg++; break; + case OBJ_GRP: d_grp++; break; + case OBJ_HIDE: d_hid++; break; + } + pointer++; + } + + if ( (tot_num=d_reg+d_grp)==0 ) return; /* split if none */ + tmp=(flag)?ntop_dirsK:ntop_dirs; /* Hits or KBytes?? */ + if (tot_num > tmp) tot_num = tmp; /* get max to do... */ + if ((!flag) || (flag&&!ntop_dirs)) /* now do tag */ + fprintf(out_fp,"\n"); + + fprintf(out_fp,"
" \ - "%s %lu %s %lu %s %s %s
" \ - "%s %lu %s %lu %s
" \ "#" \ "%s" \ "%s%d%lu%3.02f%%%.0f%3.02f%%%lld%3.02Lf%%%lld%3.02Lf%%", i+1,uptr->count, - (t_hit==0)?0:((float)uptr->count/t_hit)*100.0, + (t_code2xx304==0)?0:((long double)uptr->count/t_code2xx304)*100.0, uptr->xfer/1024, - (t_xfer==0)?0:((float)uptr->xfer/t_xfer)*100.0); + (t_xfer==0)?0:((long double)uptr->xfer/t_xfer)*100.0); + top_count += uptr->count; + top_xfer += uptr->xfer; if (uptr->flag==OBJ_GRP) { @@ -1111,9 +1192,36 @@ } } fprintf(out_fp,"
%lld%3.02Lf%%%lld%3.02Lf%%%s
%lld%3.02Lf%%%lld%3.02Lf%%%s
\n"); + fprintf(out_fp,"\n"); + if (flag) fprintf(out_fp,"\n", + GREY,msg_top_top,tot_num,msg_top_of, + t_dir,msg_top_d,msg_h_by,msg_h_xfer); + else fprintf(out_fp,"\n", + GREY,msg_top_top,tot_num,msg_top_of,t_dir, + msg_top_d,msg_h_by,msg_h_nhits); + fprintf(out_fp,"\n"); + fprintf(out_fp,"\n",GREY); + fprintf(out_fp,"\n", + DKGREEN,msg_h_nhits); + fprintf(out_fp,"\n", + RED,msg_h_xfer); + fprintf(out_fp,"\n", + CYAN,msg_h_dir); + fprintf(out_fp,"\n"); + + pointer=d_array; i=0; + while (tot_num) + { + pptr=*pointer++; /* point to the URL node */ + if (pptr->flag != OBJ_HIDE) + { + /* shade grouping? */ + if (shade_groups && (pptr->flag==OBJ_GRP)) + fprintf(out_fp,"\n", GRPCOLOR); + else fprintf(out_fp,"\n"); + + fprintf(out_fp, + "\n" \ + "\n" \ + "\n" \ + "\n"\ + "\n" \ + "\n", + pptr->string); + else fprintf(out_fp,"%s\n",pptr->string); + } + else + { + fprintf(out_fp,"%s\n",pptr->string); + } + tot_num--; + i++; + } + } + fprintf(out_fp,"\n"); + fprintf(out_fp, + "\n" \ + "\n" \ + "\n" \ + "\n"\ + "\n" \ + "\n", + (t_code2xx304-top_count), + (t_code2xx304==0)?0:((long double)(t_code2xx304-top_count)/t_code2xx304)*100.0, + (t_xfer-top_xfer)/1024, + (t_xfer==0)?0:((long double)(t_xfer-top_xfer)/t_xfer)*100.0, + msg_other_u); + fprintf(out_fp,"\n"); + fprintf(out_fp,"\n"); + fprintf(out_fp, + "\n" \ + "\n" \ + "\n" \ + "\n"\ + "\n" \ + "\n", + t_code2xx304, + (long double)100.0, + t_xfer/1024, + (long double)100.0, + msg_all_u); + fprintf(out_fp,"\n"); + if ((!flag) || (flag&&!ntop_dirs)) + { + if ( (all_dirs) && ((d_reg+d_grp)>(unsigned int)ntop_dirs) ) + { + if (all_dirs_page(d_reg, d_grp)) + { + fprintf(out_fp,"",GRPCOLOR); + fprintf(out_fp,"\n",msg_v_dirs); + if (flag) /* do we need to sort first? */ + qsort(d_array,a_ctr,sizeof(PNODEPTR),qs_dir_cmph); + } + } + } + fprintf(out_fp,"
" \ + "%s %lld %s %lld %s %s %s
" \ + "%s %lld %s %lld %s %s %s
" \ + "#" \ + "%s" \ + "%s" \ + "%s
%d%lld%3.02Lf%%%lld%3.02Lf%%", + i+1,pptr->count, + (t_code2xx304==0)?0:((long double)pptr->count/t_code2xx304)*100.0, + pptr->xfer/1024, + (t_xfer==0)?0:((long double)pptr->xfer/t_xfer)*100.0); + top_count += pptr->count; + top_xfer += pptr->xfer; + + if (pptr->flag==OBJ_GRP) + { + if (hlite_groups) + fprintf(out_fp,"%s
%lld%3.02Lf%%%lld%3.02Lf%%%s
%lld%3.02Lf%%%lld%3.02Lf%%%s
\n"); + fprintf(out_fp,""); + fprintf(out_fp,"", + cur_year,cur_month,html_ext); + fprintf(out_fp,"%s
\n

\n"); +} + +/*********************************************/ +/* ALL_DIRS_PAGE - HTML page of all dirs */ +/*********************************************/ + +int all_dirs_page(u_int64_t d_reg, u_int64_t d_grp) +{ + PNODEPTR pptr, *pointer; + char dir_fname[256], buffer[256]; + FILE *out_fp; + int i=(d_grp)?1:0; + + /* generate file name */ + sprintf(dir_fname,"dir_%04d%02d.%s",cur_year,cur_month,html_ext); + + /* open file */ + if ( (out_fp=open_out_file(dir_fname))==NULL ) return 0; + + sprintf(buffer,"%s %d - %s",l_month[cur_month-1],cur_year,msg_h_dir); + write_html_head(buffer, out_fp); + + fprintf(out_fp,"

\n");
+
+   fprintf(out_fp," %12s      %12s      %s\n",
+           msg_h_hits,msg_h_xfer,msg_h_dir);
+   fprintf(out_fp,"----------------  ----------------  " \
+                  "--------------------\n\n");
+
+   /* do groups first (if any) */
+   pointer=d_array;
+   while (d_grp)
+   {
+      pptr=*pointer++;
+      if (pptr->flag == OBJ_GRP)
+      {
+         fprintf(out_fp,"%-8llu %6.02Lf%%  %8lld %6.02Lf%%  %s\n",
+            pptr->count,
+            (t_hit==0)?0:((long double)pptr->count/t_hit)*100.0,
+            pptr->xfer/1024,
+            (t_xfer==0)?0:((long double)pptr->xfer/t_xfer)*100.0,
+            pptr->string);
+         d_grp--;
+      }
+   }
+
+   if (i) fprintf(out_fp,"\n");
+
+   /* now do invididual sites (if any) */
+   pointer=d_array;
+   while (d_reg)
+   {
+      pptr=*pointer++;
+      if (pptr->flag == OBJ_REG)
+      {
+         fprintf(out_fp,"%-8llu %6.02Lf%%  %8lld %6.02Lf%%  %s\n",
+            pptr->count,
+            (t_hit==0)?0:((long double)pptr->count/t_hit)*100.0,
+            pptr->xfer/1024,
+            (t_xfer==0)?0:((long double)pptr->xfer/t_xfer)*100.0,
+            pptr->string);
+         d_reg--;
+      }
+   }
+
+   fprintf(out_fp,"
\n"); + write_html_tail(out_fp); + fclose(out_fp); + return 1; +} + +/*********************************************/ /* TOP_ENTRY_TABLE - top n entry/exit urls */ /*********************************************/ void top_entry_table(int flag) { - u_long cnt=0, u_entry=0, u_exit=0, tot_num; - u_long t_entry=0, t_exit=0; + u_int64_t cnt=0, u_entry=0, u_exit=0, tot_num; + u_int64_t t_entry=0, t_exit=0; + u_int64_t top_count=0; + u_int64_t top_visit=0; int i; UNODEPTR uptr, *pointer; @@ -1216,18 +1539,18 @@ { if ( (int)((UNODEPTR)(*pointer)->flag) == OBJ_REG ) { - if ( (u_long)((UNODEPTR)(*pointer)->entry) ) - { u_entry++; t_entry+=(u_long)((UNODEPTR)(*pointer)->entry); } - if ( (u_long)((UNODEPTR)(*pointer)->exit) ) - { u_exit++; t_exit +=(u_long)((UNODEPTR)(*pointer)->exit); } + if ( (u_int64_t)(((UNODEPTR)(*pointer))->entry) ) + { u_entry++; t_entry+=(u_int64_t)(((UNODEPTR)(*pointer))->entry); } + if ( (u_int64_t)(((UNODEPTR)(*pointer))->exit) ) + { u_exit++; t_exit +=(u_int64_t)(((UNODEPTR)(*pointer))->exit); } } pointer++; } /* calculate how many we have */ tot_num=(flag)?u_exit:u_entry; - if (flag) { if (tot_num > ntop_exit ) tot_num=ntop_exit; } - else { if (tot_num > ntop_entry) tot_num=ntop_entry; } + if (flag) { if (tot_num > (unsigned int)ntop_exit ) tot_num=ntop_exit; } + else { if (tot_num > (unsigned int)ntop_entry) tot_num=ntop_entry; } /* return if none to do */ if (!tot_num) return; @@ -1238,16 +1561,17 @@ fprintf(out_fp,"\n"); fprintf(out_fp,"\n"); fprintf(out_fp,"\n", + "%s %lld %s %lld %s %s %s\n", GREY,msg_top_top,tot_num,msg_top_of, - (flag)?u_exit:u_entry,(flag)?msg_top_ex:msg_top_en); + (flag)?u_exit:u_entry, + (flag)?msg_top_ex:msg_top_en,msg_h_by,msg_h_visits); fprintf(out_fp,"\n"); fprintf(out_fp,"\n", GREY); fprintf(out_fp,"\n", - DKGREEN,msg_h_hits); + DKGREEN,msg_h_nhits); fprintf(out_fp,"\n", YELLOW,msg_h_visits); @@ -1265,16 +1589,18 @@ fprintf(out_fp,"\n"); fprintf(out_fp, "\n" \ - "\n" \ - "\n" \ - "\n" \ - "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ "\n"); + fprintf(out_fp,"\n"); + fprintf(out_fp, + "\n" \ + "\n" \ + "\n" \ + "\n"\ + "\n" \ + "\n", + (t_code2xx304-top_count), + (t_code2xx304==0)?0:((long double)(t_code2xx304-top_count)/t_code2xx304)*100.0, + (u_int64_t)(t_visit-top_visit), + (t_visit==0)?0:((long double)(t_visit-top_visit)/t_visit)*100.0, + msg_other_u); + fprintf(out_fp,"\n"); + fprintf(out_fp,"\n"); + fprintf(out_fp, + "\n" \ + "\n" \ + "\n" \ + "\n"\ + "\n" \ + "\n", + t_code2xx304, + (long double)100.0, + t_visit, + (long double)100.0, + msg_all_u); fprintf(out_fp,"
" \ - "%s %lu %s %lu %s
" \ "#" \ "%s" \ "%s
%d%lu%3.02f%%%lu%3.02f%%%lld%3.02Lf%%%lld%3.02Lf%%", i+1,uptr->count, - (t_hit==0)?0:((float)uptr->count/t_hit)*100.0, + (t_code2xx304==0)?0:((long double)uptr->count/t_code2xx304)*100.0, (flag)?uptr->exit:uptr->entry, - (flag)?((t_exit==0)?0:((float)uptr->exit/t_exit)*100.0) - :((t_entry==0)?0:((float)uptr->entry/t_entry)*100.0)); + (flag)?((t_exit==0)?0:((long double)uptr->exit/t_exit)*100.0) + :((t_entry==0)?0:((long double)uptr->entry/t_entry)*100.0)); + top_count += uptr->count; + top_visit += ((flag) ? uptr->exit : uptr->entry); /* check for a service prefix (ie: http://) */ if (strstr(uptr->string,"://")!=NULL) @@ -1299,6 +1625,33 @@ } } fprintf(out_fp,"
%lld%3.02Lf%%%lld%3.02Lf%%%s
%lld%3.02Lf%%%lld%3.02Lf%%%s
\n

\n"); } @@ -1308,7 +1661,7 @@ void top_refs_table() { - u_long cnt=0, r_reg=0, r_grp=0, r_hid=0, tot_num; + u_int64_t cnt=0, r_reg=0, r_grp=0, r_hid=0, tot_num; int i; RNODEPTR rptr, *pointer; @@ -1328,21 +1681,25 @@ } if ( (tot_num=r_reg+r_grp)==0 ) return; /* split if none */ - if (tot_num > ntop_refs) tot_num=ntop_refs; /* get max to do... */ + if (tot_num > (unsigned int)ntop_refs) + { + tot_num=ntop_refs; /* get max to do... */ + } fprintf(out_fp,"\n"); fprintf(out_fp,"\n"); fprintf(out_fp,"\n"); fprintf(out_fp,"\n", - GREY, msg_top_top, tot_num, msg_top_of, t_ref, msg_top_r); + "%s %lld %s %lld %s %s %s\n", + GREY, msg_top_top, tot_num, msg_top_of, t_ref, + msg_top_r, msg_h_by, msg_h_nhits); fprintf(out_fp,"\n"); fprintf(out_fp,"\n", GREY); fprintf(out_fp,"\n", - DKGREEN,msg_h_hits); + DKGREEN,msg_h_nhits); fprintf(out_fp,"\n", CYAN,msg_h_ref); @@ -1361,11 +1718,11 @@ fprintf(out_fp, "\n" \ - "\n" \ - "\n" \ + "\n" \ + "\n" \ "\n"); tot_num--; @@ -1387,7 +1741,7 @@ } } fprintf(out_fp,"\n"); - if ( (all_refs) && ((r_reg+r_grp)>ntop_refs) ) + if ( (all_refs) && ((r_reg+r_grp)>(unsigned int)ntop_refs) ) { if (all_refs_page(r_reg, r_grp)) { @@ -1406,7 +1760,7 @@ /* ALL_REFS_PAGE - HTML page of all refs */ /*********************************************/ -int all_refs_page(u_long r_reg, u_long r_grp) +int all_refs_page(u_int64_t r_reg, u_int64_t r_grp) { RNODEPTR rptr, *pointer; char ref_fname[256], buffer[256]; @@ -1434,9 +1788,9 @@ rptr=*pointer++; if (rptr->flag == OBJ_GRP) { - fprintf(out_fp,"%-8lu %6.02f%% %s\n", + fprintf(out_fp,"%-8llu %6.02Lf%% %s\n", rptr->count, - (t_hit==0)?0:((float)rptr->count/t_hit)*100.0, + (t_hit==0)?0:((long double)rptr->count/t_hit)*100.0, rptr->string); r_grp--; } @@ -1450,9 +1804,9 @@ rptr=*pointer++; if (rptr->flag == OBJ_REG) { - fprintf(out_fp,"%-8lu %6.02f%% %s\n", + fprintf(out_fp,"%-8llu %6.02Lf%% %s\n", rptr->count, - (t_hit==0)?0:((float)rptr->count/t_hit)*100.0, + (t_hit==0)?0:((long double)rptr->count/t_hit)*100.0, rptr->string); r_reg--; } @@ -1470,7 +1824,7 @@ void top_agents_table() { - u_long cnt, a_reg=0, a_grp=0, a_hid=0, tot_num; + u_int64_t cnt, a_reg=0, a_grp=0, a_hid=0, tot_num; int i; ANODEPTR aptr, *pointer; @@ -1490,14 +1844,18 @@ } if ( (tot_num=a_reg+a_grp)==0 ) return; /* split if none */ - if (tot_num > ntop_agents) tot_num=ntop_agents; /* get max to do... */ + if (tot_num > (unsigned int)ntop_agents) + { + tot_num=ntop_agents; /* get max to do... */ + } fprintf(out_fp,"\n"); fprintf(out_fp,"
" \ - "%s %lu %s %lu %s
" \ "#" \ "%s" \ "%s
%d%lu%3.02f%%%lld%3.02Lf%%", i+1,rptr->count, - (t_hit==0)?0:((float)rptr->count/t_hit)*100.0); + (t_code2xx304==0)?0:((long double)rptr->count/t_code2xx304)*100.0); if (rptr->flag==OBJ_GRP) { @@ -1375,11 +1732,8 @@ } else { - if (rptr->string[0] != '-') - fprintf(out_fp,"%s", - rptr->string, rptr->string); - else - fprintf(out_fp,"%s", rptr->string); + /* do not print as anchor tags to avoid referrer spamming */ + fprintf(out_fp,"%s", skip_proto(rptr->string)); } fprintf(out_fp,"
\n"); fprintf(out_fp,"\n"); fprintf(out_fp,"\n", - GREY, msg_top_top, tot_num, msg_top_of, t_agent, msg_top_a); + "%s %lld %s %lld %s %s %s\n", + GREY, msg_top_top, tot_num, msg_top_of, t_agent, + msg_top_a, msg_h_by, msg_h_hits); fprintf(out_fp,"\n"); fprintf(out_fp,"\n", @@ -1523,11 +1881,11 @@ fprintf(out_fp, "\n" \ - "\n" \ - "\n" \ + "\n" \ + "\n" \ "\n", @@ -1539,7 +1897,7 @@ } } fprintf(out_fp,"\n"); - if ( (all_agents) && ((a_reg+a_grp)>ntop_agents) ) + if ( (all_agents) && ((a_reg+a_grp)>(unsigned int)ntop_agents) ) { if (all_agents_page(a_reg, a_grp)) { @@ -1558,7 +1916,7 @@ /* ALL_AGENTS_PAGE - HTML user agent page */ /*********************************************/ -int all_agents_page(u_long a_reg, u_long a_grp) +int all_agents_page(u_int64_t a_reg, u_int64_t a_grp) { ANODEPTR aptr, *pointer; char agent_fname[256], buffer[256]; @@ -1586,9 +1944,9 @@ aptr=*pointer++; if (aptr->flag == OBJ_GRP) { - fprintf(out_fp,"%-8lu %6.02f%% %s\n", + fprintf(out_fp,"%-8llu %6.02Lf%% %s\n", aptr->count, - (t_hit==0)?0:((float)aptr->count/t_hit)*100.0, + (t_hit==0)?0:((long double)aptr->count/t_hit)*100.0, aptr->string); a_grp--; } @@ -1602,9 +1960,9 @@ aptr=*pointer++; if (aptr->flag == OBJ_REG) { - fprintf(out_fp,"%-8lu %6.02f%% %s\n", + fprintf(out_fp,"%-8llu %6.02Lf%% %s\n", aptr->count, - (t_hit==0)?0:((float)aptr->count/t_hit)*100.0, + (t_hit==0)?0:((long double)aptr->count/t_hit)*100.0, aptr->string); a_reg--; } @@ -1622,7 +1980,7 @@ void top_search_table() { - u_long cnt,t_val=0, tot_num; + u_int64_t cnt,t_val=0, tot_num; int i; SNODEPTR sptr, *pointer; @@ -1631,25 +1989,26 @@ cnt=tot_num=a_ctr; pointer=s_array; while(cnt--) { - t_val+=(u_long)((SNODEPTR)(*pointer)->count); + t_val+=(u_int64_t)(((SNODEPTR)(*pointer))->count); pointer++; } - if ( tot_num > ntop_search) tot_num=ntop_search; + if ( tot_num > (unsigned int)ntop_search) tot_num=ntop_search; fprintf(out_fp,"\n"); fprintf(out_fp,"
" \ - "%s %lu %s %lu %s
" \ "#%d%lu%3.02f%%%lld%3.02Lf%%", i+1,aptr->count, - (t_hit==0)?0:((float)aptr->count/t_hit)*100.0); + (t_hit==0)?0:((long double)aptr->count/t_hit)*100.0); if ((aptr->flag==OBJ_GRP)&&hlite_groups) fprintf(out_fp,"%s
\n"); fprintf(out_fp,"\n"); fprintf(out_fp,"\n", - GREY, msg_top_top, tot_num, msg_top_of, a_ctr, msg_top_sr); + "%s %lld %s %lld %s %s %s\n", + GREY, msg_top_top, tot_num, msg_top_of, a_ctr, + msg_top_sr, msg_h_by, msg_h_nhits); fprintf(out_fp,"\n"); fprintf(out_fp,"\n", GREY); fprintf(out_fp,"\n", - DKGREEN,msg_h_hits); + DKGREEN,msg_h_nhits); fprintf(out_fp,"\n", CYAN,msg_h_search); @@ -1662,17 +2021,17 @@ fprintf(out_fp, "\n" \ "\n" \ - "\n" \ - "\n" \ + "\n" \ + "\n" \ "\n",sptr->string); tot_num--; i++; } fprintf(out_fp,"\n"); - if ( (all_search) && (a_ctr>ntop_search) ) + if ( (all_search) && (a_ctr>(unsigned int)ntop_search) ) { if (all_search_page(a_ctr, t_val)) { @@ -1691,7 +2050,7 @@ /* ALL_SEARCH_PAGE - HTML for search strings */ /*********************************************/ -int all_search_page(u_long tot_num, u_long t_val) +int all_search_page(u_int64_t tot_num, u_int64_t t_val) { SNODEPTR sptr, *pointer; char search_fname[256], buffer[256]; @@ -1710,16 +2069,16 @@ fprintf(out_fp,"
\n");
 
-   fprintf(out_fp," %12s      %s\n",msg_h_hits,msg_h_search);
+   fprintf(out_fp," %12s      %s\n",msg_h_nhits,msg_h_search);
    fprintf(out_fp,"----------------  ----------------------\n\n");
 
    pointer=s_array;
    while(tot_num)
    {
       sptr=*pointer++;
-      fprintf(out_fp,"%-8lu %6.02f%%  %s\n",
+      fprintf(out_fp,"%-8llu %6.02Lf%%  %s\n",
          sptr->count,
-         (t_val==0)?0:((float)sptr->count/t_val)*100.0,
+         (t_val==0)?0:((long double)sptr->count/t_val)*100.0,
          sptr->string);
       tot_num--;
    }
@@ -1735,7 +2094,7 @@
 
 void top_users_table()
 {
-   u_long cnt=0, i_reg=0, i_grp=0, i_hid=0, tot_num;
+   u_int64_t cnt=0, i_reg=0, i_grp=0, i_hid=0, tot_num;
    int i;
    INODEPTR iptr, *pointer;
 
@@ -1753,20 +2112,20 @@
    }
 
    if ( (tot_num=i_reg+i_grp)==0 ) return;              /* split if none    */
-   if (tot_num > ntop_users) tot_num = ntop_users;
+   if (tot_num > (unsigned int)ntop_users) tot_num = ntop_users;
 
    fprintf(out_fp,"\n");       /* now do  tag   */
 
    fprintf(out_fp,"
" \ - "%s %lu %s %lu %s
" \ "#" \ "%s" \ "%s
%d%lu%3.02f%%%lld%3.02Lf%%", i+1,sptr->count, - (t_val==0)?0:((float)sptr->count/t_val)*100.0); + (t_val==0)?0:((long double)sptr->count/t_val)*100.0); fprintf(out_fp,"%s
\n"); fprintf(out_fp,"\n"); fprintf(out_fp,"\n", + "%s %lld %s %lld %s\n", GREY,msg_top_top, tot_num, msg_top_of, t_user, msg_top_i); fprintf(out_fp,"\n"); fprintf(out_fp,"\n",GREY); fprintf(out_fp,"\n",DKGREEN,msg_h_hits); + "%s\n",DKGREEN,msg_h_nhits); fprintf(out_fp,"\n",LTBLUE,msg_h_files); fprintf(out_fp,"\n" \ - "\n" \ - "\n" \ - "\n" \ - "\n" \ - "\n" \ - "\n" \ - "\n" \ - "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ "\n", @@ -1816,7 +2175,7 @@ } fprintf(out_fp,"\n"); - if ( (all_users) && ((i_reg+i_grp)>ntop_users) ) + if ( (all_users) && ((i_reg+i_grp)>(unsigned int)ntop_users) ) { if (all_users_page(i_reg, i_grp)) { @@ -1835,7 +2194,7 @@ /* ALL_USERS_PAGE - HTML of all usernames */ /*********************************************/ -int all_users_page(u_long i_reg, u_long i_grp) +int all_users_page(u_int64_t i_reg, u_int64_t i_grp) { INODEPTR iptr, *pointer; char user_fname[256], buffer[256]; @@ -1866,12 +2225,12 @@ if (iptr->flag == OBJ_GRP) { fprintf(out_fp, - "%-8lu %6.02f%% %8lu %6.02f%% %8.0f %6.02f%% %8lu %6.02f%% %s\n", + "%-8llu %6.02Lf%% %8llu %6.02Lf%% %8llu %6.02Lf%% %8llu %6.02Lf%% %s\n", iptr->count, - (t_hit==0)?0:((float)iptr->count/t_hit)*100.0,iptr->files, - (t_file==0)?0:((float)iptr->files/t_file)*100.0,iptr->xfer/1024, - (t_xfer==0)?0:((float)iptr->xfer/t_xfer)*100.0,iptr->visit, - (t_visit==0)?0:((float)iptr->visit/t_visit)*100.0, + (t_hit==0)?0:((long double)iptr->count/t_hit)*100.0,iptr->files, + (t_file==0)?0:((long double)iptr->files/t_file)*100.0,iptr->xfer/1024, + (t_xfer==0)?0:((long double)iptr->xfer/t_xfer)*100.0,iptr->visit, + (t_visit==0)?0:((long double)iptr->visit/t_visit)*100.0, iptr->string); i_grp--; } @@ -1887,12 +2246,12 @@ if (iptr->flag == OBJ_REG) { fprintf(out_fp, - "%-8lu %6.02f%% %8lu %6.02f%% %8.0f %6.02f%% %8lu %6.02f%% %s\n", + "%-8llu %6.02Lf%% %8llu %6.02Lf%% %8llu %6.02Lf%% %8llu %6.02Lf%% %s\n", iptr->count, - (t_hit==0)?0:((float)iptr->count/t_hit)*100.0,iptr->files, - (t_file==0)?0:((float)iptr->files/t_file)*100.0,iptr->xfer/1024, - (t_xfer==0)?0:((float)iptr->xfer/t_xfer)*100.0,iptr->visit, - (t_visit==0)?0:((float)iptr->visit/t_visit)*100.0, + (t_hit==0)?0:((long double)iptr->count/t_hit)*100.0,iptr->files, + (t_file==0)?0:((long double)iptr->files/t_file)*100.0,iptr->xfer/1024, + (t_xfer==0)?0:((long double)iptr->xfer/t_xfer)*100.0,iptr->visit, + (t_visit==0)?0:((long double)iptr->visit/t_visit)*100.0, iptr->string); i_reg--; } @@ -1912,10 +2271,10 @@ { int i,j,x,tot_num=0,tot_ctry=0; int ctry_fnd; - u_long idx; + u_int64_t idx; HNODEPTR hptr; char *domain; - u_long pie_data[10]; + u_int64_t pie_data[10]; char *pie_legend[10]; char pie_title[48]; char pie_fname[48]; @@ -2001,7 +2360,7 @@ sprintf(pie_title,"%s %s %d",msg_ctry_use,l_month[cur_month-1],cur_year); sprintf(pie_fname,"ctry_usage_%04d%02d.png",cur_year,cur_month); - pie_chart(pie_fname,pie_title,t_hit,pie_data,pie_legend); /* do it */ + pie_chart(pie_fname,pie_title,t_code2xx304,pie_data,pie_legend); /* do it */ /* put the image tag in the page */ fprintf(out_fp,"\"%s\"\n"); fprintf(out_fp,"\n"); fprintf(out_fp,"\n", - GREY,msg_top_top,tot_num,msg_top_of,tot_ctry,msg_top_c); + "%s %d %s %d %s %s %s\n", + GREY,msg_top_top,tot_num,msg_top_of,tot_ctry, + msg_top_c, msg_h_by, msg_h_nhits); fprintf(out_fp,"\n"); fprintf(out_fp,"\n",GREY); fprintf(out_fp,"\n",DKGREEN,msg_h_hits); + "%s\n",DKGREEN,msg_h_nhits); fprintf(out_fp,"\n",LTBLUE,msg_h_files); fprintf(out_fp,"" \ "\n" \ - "\n" \ - "\n" \ - "\n" \ - "\n" \ - "\n" \ - "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ "\n", i+1,top_ctrys[i]->count, - (t_hit==0)?0:((float)top_ctrys[i]->count/t_hit)*100.0, + (t_code2xx304==0)?0:((long double)top_ctrys[i]->count/t_code2xx304)*100.0, top_ctrys[i]->files, - (t_file==0)?0:((float)top_ctrys[i]->files/t_file)*100.0, + (t_file==0)?0:((long double)top_ctrys[i]->files/t_file)*100.0, top_ctrys[i]->xfer/1024, - (t_xfer==0)?0:((float)top_ctrys[i]->xfer/t_xfer)*100.0, + (t_xfer==0)?0:((long double)top_ctrys[i]->xfer/t_xfer)*100.0, top_ctrys[i]->desc); } fprintf(out_fp,"\n"); @@ -2060,7 +2420,7 @@ HNODEPTR hptr, *pointer; FILE *out_fp; char filename[256]; - u_long cnt=a_ctr; + u_int64_t cnt=a_ctr; /* generate file name */ sprintf(filename,"%s/site_%04d%02d.%s", @@ -2084,7 +2444,7 @@ if (hptr->flag != OBJ_GRP) { fprintf(out_fp, - "%lu\t%lu\t%.0f\t%lu\t%s\n", + "%lld\t%lld\t%lld\t%lld\t%s\n", hptr->count,hptr->files,hptr->xfer/1024, hptr->visit,hptr->string); } @@ -2103,7 +2463,7 @@ UNODEPTR uptr, *pointer; FILE *out_fp; char filename[256]; - u_long cnt=a_ctr; + u_int64_t cnt=a_ctr; /* generate file name */ sprintf(filename,"%s/url_%04d%02d.%s", @@ -2125,7 +2485,7 @@ uptr=*pointer++; if (uptr->flag != OBJ_GRP) { - fprintf(out_fp,"%lu\t%.0f\t%s\n", + fprintf(out_fp,"%lld\t%lld\t%s\n", uptr->count,uptr->xfer/1024,uptr->string); } cnt--; @@ -2135,6 +2495,46 @@ } /*********************************************/ +/* DUMP_ALL_DIRS - dump all dirs to tab file */ +/*********************************************/ + +void dump_all_dirs() +{ + PNODEPTR pptr, *pointer; + FILE *out_fp; + char filename[256]; + u_int64_t cnt=a_ctr; + + /* generate file name */ + sprintf(filename,"%s/dir_%04d%02d.%s", + (dump_path)?dump_path:".",cur_year,cur_month,dump_ext); + + /* open file */ + if ( (out_fp=open_out_file(filename))==NULL ) return; + + /* need a header? */ + if (dump_header) + { + fprintf(out_fp,"%s\t%s\t%s\n",msg_h_hits,msg_h_xfer,msg_h_dir); + } + + /* dump 'em */ + pointer=d_array; + while (cnt) + { + pptr=*pointer++; + if (pptr->flag != OBJ_GRP) + { + fprintf(out_fp,"%lld\t%lld\t%s\n", + pptr->count,pptr->xfer/1024,pptr->string); + } + cnt--; + } + fclose(out_fp); + return; +} + +/*********************************************/ /* DUMP_ALL_REFS - dump all refs to tab file */ /*********************************************/ @@ -2143,7 +2543,7 @@ RNODEPTR rptr, *pointer; FILE *out_fp; char filename[256]; - u_long cnt=a_ctr; + u_int64_t cnt=a_ctr; /* generate file name */ sprintf(filename,"%s/ref_%04d%02d.%s", @@ -2165,7 +2565,7 @@ rptr=*pointer++; if (rptr->flag != OBJ_GRP) { - fprintf(out_fp,"%lu\t%s\n",rptr->count, rptr->string); + fprintf(out_fp,"%lld\t%s\n",rptr->count, rptr->string); } cnt--; } @@ -2204,7 +2604,7 @@ aptr=*pointer++; if (aptr->flag != OBJ_GRP) { - fprintf(out_fp,"%lu\t%s\n",aptr->count,aptr->string); + fprintf(out_fp,"%lld\t%s\n",aptr->count,aptr->string); } cnt--; } @@ -2221,7 +2621,7 @@ INODEPTR iptr, *pointer; FILE *out_fp; char filename[256]; - u_long cnt=a_ctr; + u_int64_t cnt=a_ctr; /* generate file name */ sprintf(filename,"%s/user_%04d%02d.%s", @@ -2245,7 +2645,7 @@ if (iptr->flag != OBJ_GRP) { fprintf(out_fp, - "%lu\t%lu\t%.0f\t%lu\t%s\n", + "%lld\t%lld\t%lld\t%lld\t%s\n", iptr->count,iptr->files,iptr->xfer/1024, iptr->visit,iptr->string); } @@ -2284,7 +2684,7 @@ while(cnt) { sptr=*pointer++; - fprintf(out_fp,"%lu\t%s\n",sptr->count,sptr->string); + fprintf(out_fp,"%lld\t%s\n",sptr->count,sptr->string); cnt--; } fclose(out_fp); @@ -2302,19 +2702,21 @@ int i,days_in_month; int lyear=0; int s_mth=0; - double gt_hit=0.0; - double gt_files=0.0; - double gt_pages=0.0; - double gt_xfer=0.0; - double gt_visits=0.0; + u_int64_t gt_hit=0; + u_int64_t gt_files=0; + u_int64_t gt_pages=0; + u_int64_t gt_xfer=0; + u_int64_t gt_visits=0; char index_fname[256]; char buffer[BUFSIZE]; + int earliest_month = 0; /* earliest month in the 1st MH month summary */ + int earliest_year = 0; /* earliest month in the 1st MH year summary */ if (verbose>1) printf("%s\n",msg_gen_sum); sprintf(buffer,"%s %s",msg_main_us,hname); - for (i=0;i<12;i++) /* get last month in history */ + for (i=0;ilyear) { lyear=hist_year[i]; s_mth=hist_month[i]; } @@ -2383,7 +2785,9 @@ fprintf(out_fp,"\n",DKGREEN,msg_h_hits); fprintf(out_fp,"\n"); - for (i=0;i<12;i++) + + /* output up to the most recent months */ + for (i=0;i%s %d\n", hist_year[s_mth], hist_month[s_mth], html_ext, s_month[hist_month[s_mth]-1], hist_year[s_mth]); - fprintf(out_fp,"\n", + fprintf(out_fp,"\n", hist_hit[s_mth]/days_in_month); - fprintf(out_fp,"\n", + fprintf(out_fp,"\n", hist_files[s_mth]/days_in_month); - fprintf(out_fp,"\n", + fprintf(out_fp,"\n", hist_page[s_mth]/days_in_month); - fprintf(out_fp,"\n", + fprintf(out_fp,"\n", hist_visit[s_mth]/days_in_month); - fprintf(out_fp,"\n", + fprintf(out_fp,"\n", hist_site[s_mth]); - fprintf(out_fp,"\n", + fprintf(out_fp,"\n", hist_xfer[s_mth]); - fprintf(out_fp,"\n", + fprintf(out_fp,"\n", hist_visit[s_mth]); - fprintf(out_fp,"\n", + fprintf(out_fp,"\n", hist_page[s_mth]); - fprintf(out_fp,"\n", + fprintf(out_fp,"\n", hist_files[s_mth]); - fprintf(out_fp,"\n", + fprintf(out_fp,"\n", hist_hit[s_mth]); gt_hit += hist_hit[s_mth]; gt_files += hist_files[s_mth]; gt_pages += hist_page[s_mth]; gt_xfer += hist_xfer[s_mth]; gt_visits+= hist_visit[s_mth]; - } - fprintf(out_fp,"\n"); - fprintf(out_fp,"\n",GREY,msg_h_totals); - fprintf(out_fp,"\n",GREY,gt_xfer); - fprintf(out_fp,"\n",GREY,gt_visits); - fprintf(out_fp,"\n",GREY,gt_pages); - fprintf(out_fp,"\n",GREY,gt_files); - fprintf(out_fp,"\n",GREY,gt_hit); - fprintf(out_fp,"\n"); - fprintf(out_fp,"
" \ - "%s %lu %s %lu %s
" \ "#" \ - "%s" \ "%s" \ @@ -1790,20 +2149,20 @@ fprintf(out_fp, "%d%lu%3.02f%%%lu%3.02f%%%.0f%3.02f%%%lu%3.02f%%%lld%3.02Lf%%%lld%3.02Lf%%%lld%3.02Lf%%%lld%3.02Lf%%", i+1,iptr->count, - (t_hit==0)?0:((float)iptr->count/t_hit)*100.0,iptr->files, - (t_file==0)?0:((float)iptr->files/t_file)*100.0,iptr->xfer/1024, - (t_xfer==0)?0:((float)iptr->xfer/t_xfer)*100.0,iptr->visit, - (t_visit==0)?0:((float)iptr->visit/t_visit)*100.0); + (t_code2xx304==0)?0:((long double)iptr->count/t_code2xx304)*100.0,iptr->files, + (t_file==0)?0:((long double)iptr->files/t_file)*100.0,iptr->xfer/1024, + (t_xfer==0)?0:((long double)iptr->xfer/t_xfer)*100.0,iptr->visit, + (t_visit==0)?0:((long double)iptr->visit/t_visit)*100.0); if ((iptr->flag==OBJ_GRP)&&hlite_groups) fprintf(out_fp,"%s
" \ - "%s %d %s %d %s
" \ "#" \ - "%s" \ "%s" \ @@ -2032,19 +2392,19 @@ if (top_ctrys[i]->count!=0) fprintf(out_fp,"
%d%lu%3.02f%%%lu%3.02f%%%.0f%3.02f%%%lld%3.02Lf%%%lld%3.02Lf%%%llu%3.02Lf%%%s
" \ "%s
%lu%llu%lu%lld%lu%lld%lu%lld%lu%lld%.0f%lld%lu%lld%lu%lld%lu%lld%lu
%llu
" \ - "%s" \ - "%.0f" \ - "%.0f" \ - "%.0f" \ - "%.0f" \ - "%.0f
\n"); - write_html_tail(out_fp); - fclose(out_fp); - return 0; + /* remember the oldest month we processed */ + earliest_month = hist_month[s_mth]; + earliest_year = hist_year[s_mth]; + } + + /* output HTML for any older months in the summary file */ + { + FILE *sumfile; /* open history summary file */ + char inbuf[BUFSIZ+1]; /* input buffer */ + int summary_year; /* year of summary as yyyy */ + int summary_month; /* month of summary as 2 digit number */ + char summary_mname[BUFSIZ+1]; /* month of summary as 3 char name */ + char summary_path[BUFSIZ+1]; /* usage_yyyymm.html file path */ + long double summary_dhits; /* daily average Hits for this month */ + long double summary_dfiles; /* daily average Files for this month */ + long double summary_dpages; /* daily average Pages for this month */ + long double summary_dvisits; /* daily average Visits for month */ + u_int64_t summary_tsites; /* total Sites for this month */ + u_int64_t summary_tkbytes; /* total KBytes for this month */ + u_int64_t summary_tvisits; /* total Visits for this month */ + u_int64_t summary_tpages; /* total Pages for this month */ + u_int64_t summary_tfiles; /* total Files for this month */ + u_int64_t summary_thits; /* total Hits for this month */ + + /* open the summary which is found in the history sub-directory */ + sumfile = fopen("../history/summary","r"); + if (sumfile != NULL) { + + /* process lines in summary file */ + while (fgets(inbuf, BUFSIZ, sumfile) != NULL) { + + /* parse the summary line */ + inbuf[BUFSIZ] = '\0'; + if (sscanf(inbuf, + "%4d %2d %3s " + "%s %Lf %Lf " + "%Lf %Lf %llu " + "%llu %llu %llu " + "%llu %llu\n", + &summary_year, &summary_month, summary_mname, + summary_path, &summary_dhits, &summary_dfiles, + &summary_dpages, &summary_dvisits, &summary_tsites, + &summary_tkbytes, &summary_tvisits, &summary_tpages, + &summary_tfiles, &summary_thits) == 14) { + + /* + * ignore this month if it is as or more recent than + * the earliest month printed in the MH month recent + * months as procssed in the previous section of code + */ + if (summary_year > earliest_year || + (summary_year == earliest_year && + summary_month >= earliest_month)) { + continue; + } + + /* output the HTML for this summary month */ + fprintf(out_fp, + "" + "%s %d\n", + summary_path, summary_mname, summary_year); + + fprintf(out_fp, + "%lld\n", + (u_int64_t)(summary_dhits+0.5)); + fprintf(out_fp, + "%lld\n", + (u_int64_t)(summary_dfiles+0.5)); + fprintf(out_fp, + "%lld\n", + (u_int64_t)(summary_dpages+0.5)); + fprintf(out_fp, + "%lld\n", + (u_int64_t)(summary_dvisits+0.5)); + fprintf(out_fp, + "%lld\n", + summary_tsites); + fprintf(out_fp, + "%lld\n", + summary_tkbytes); + fprintf(out_fp, + "%lld\n", + summary_tvisits); + fprintf(out_fp, + "%lld\n", + summary_tpages); + fprintf(out_fp, + "%lld\n", + summary_tfiles); + fprintf(out_fp, + "%llu\n", + summary_thits); + gt_hit += summary_thits; + gt_files += summary_tfiles; + gt_pages += summary_tpages; + gt_xfer += summary_tkbytes; + gt_visits+= summary_tvisits; + } + } + fclose(sumfile); + } + } + + /* output the pre-history file */ + { + FILE *prehist; /* open pre-history file */ + char inbuf[BUFSIZ+1]; /* input buffer */ + int summary_year; /* year of summary as yyyy */ + int summary_month; /* month of summary as 2 digit number */ + char summary_mname[BUFSIZ+1]; /* month of summary as 3 char name */ + char summary_path[BUFSIZ+1]; /* usage_yyyymm.html file path */ + long double summary_dhits; /* daily average Hits for this month */ + long double summary_dfiles; /* daily average Files for this month */ + long double summary_dpages; /* daily average Pages for this month */ + long double summary_dvisits; /* daily average Visits for month */ + u_int64_t summary_tsites; /* total Sites for this month */ + u_int64_t summary_tkbytes; /* total KBytes for this month */ + u_int64_t summary_tvisits; /* total Visits for this month */ + u_int64_t summary_tpages; /* total Pages for this month */ + u_int64_t summary_tfiles; /* total Files for this month */ + u_int64_t summary_thits; /* total Hits for this month */ + + /* open the pre-history which is found in the history sub-directory */ + prehist = fopen("../history/prehistory","r"); + if (prehist != NULL) { + + /* there is just 1 line in the pre-history file - process it */ + if (fgets(inbuf, BUFSIZ, prehist) != NULL) { + + /* parse the summary line */ + inbuf[BUFSIZ] = '\0'; + if (sscanf(inbuf, + "%4d %2d %3s " + "%s %Lf %Lf " + "%Lf %Lf %llu " + "%llu %llu %llu " + "%llu %llu\n", + &summary_year, &summary_month, summary_mname, + summary_path, &summary_dhits, &summary_dfiles, + &summary_dpages, &summary_dvisits, &summary_tsites, + &summary_tkbytes, &summary_tvisits, &summary_tpages, + &summary_tfiles, &summary_thits) == 14) { + + fprintf(out_fp, "\n"); + fprintf(out_fp, + "" + "Before %s %d\n", + summary_mname, summary_year); + fprintf(out_fp, + "%lld\n", + summary_tkbytes); + fprintf(out_fp, + "%lld\n", + summary_tvisits); + fprintf(out_fp, + "%lld\n", + summary_tpages); + fprintf(out_fp, + "%lld\n", + summary_tfiles); + fprintf(out_fp, + "%lld\n" + "\n", + summary_thits); + + gt_hit += summary_thits; + gt_files += summary_tfiles; + gt_pages += summary_tpages; + gt_xfer += summary_tkbytes; + gt_visits+= summary_tvisits; + } + } + fclose(prehist); + } + } + + fprintf(out_fp,"\n"); + fprintf(out_fp,"" \ + "%s\n",GREY,msg_h_totals); + fprintf(out_fp,"" \ + "%lld\n",GREY,gt_xfer); + fprintf(out_fp,"" \ + "%lld\n",GREY,gt_visits); + fprintf(out_fp,"" \ + "%lld\n",GREY,gt_pages); + fprintf(out_fp,"" \ + "%lld\n",GREY,gt_files); + fprintf(out_fp,"" \ + "%lld\n",GREY,gt_hit); + fprintf(out_fp,"\n"); + fprintf(out_fp,"\n"); + write_html_tail(out_fp); + fclose(out_fp); + return 0; } /*********************************************/ @@ -2444,7 +3023,7 @@ int qs_site_cmph(const void *cp1, const void *cp2) { - u_long t1, t2; + u_int64_t t1, t2; t1=(*(HNODEPTR *)cp1)->count; t2=(*(HNODEPTR *)cp2)->count; if (t1!=t2) return (t2xfer; t2=(*(HNODEPTR *)cp2)->xfer; if (t1!=t2) return (t2count; t2=(*(UNODEPTR *)cp2)->count; if (t1!=t2) return (t2xfer; t2=(*(UNODEPTR *)cp2)->xfer; if (t1!=t2) return (t2count; + t2=(*(PNODEPTR *)cp2)->count; + if (t1!=t2) return (t2string, + (*(PNODEPTR *)cp2)->string ); +} + +/*********************************************/ +/* QS_DIR_CMPK - QSort compare dir by bytes */ +/*********************************************/ + +int qs_dir_cmpk(const void *cp1, const void *cp2) +{ + u_int64_t t1, t2; + t1=(*(PNODEPTR *)cp1)->xfer; + t2=(*(PNODEPTR *)cp2)->xfer; + if (t1!=t2) return (t2string, + (*(PNODEPTR *)cp2)->string ); +} + +/*********************************************/ /* QS_URL_CMPN - QSort compare URL by entry */ /*********************************************/ int qs_url_cmpn(const void *cp1, const void *cp2) { - double t1, t2; + u_int64_t t1, t2; t1=(*(UNODEPTR *)cp1)->entry; t2=(*(UNODEPTR *)cp2)->entry; if (t1!=t2) return (t2exit; t2=(*(UNODEPTR *)cp2)->exit; if (t1!=t2) return (t2count; t2=(*(RNODEPTR *)cp2)->count; if (t1!=t2) return (t2count; t2=(*(ANODEPTR *)cp2)->count; if (t1!=t2) return (t2count; t2=(*(SNODEPTR *)cp2)->count; if (t1!=t2) return (t2count; t2=(*(INODEPTR *)cp2)->count; if (t1!=t2) return (t2next; + } + } + return ctr; /* return number loaded */ +} + +/*********************************************/ /* LOAD_REF_ARRAY - load up the sort array */ /*********************************************/ -u_long load_ref_array(RNODEPTR *pointer) +u_int64_t load_ref_array(RNODEPTR *pointer) { RNODEPTR rptr; int i; - u_long ctr = 0; + u_int64_t ctr = 0; /* load the array */ for (i=0;i #include #include +#include +#include /* ensure getopt */ #ifdef HAVE_GETOPT_H @@ -65,9 +67,108 @@ /* internal function prototypes */ void fmt_logrec(char *); -int parse_record_web(char *); -int parse_record_ftp(char *); -int parse_record_squid(char *); +static int parse_record_web(char *); +static int parse_record_ftp(char *); +static int parse_record_squid(char *); +static void form_url_dirname(char *, char *); + +/* + * pre-compiled Perl-like regular expressions + */ +struct pcre_set { + char *pattern; /* regular expression, or NULL ==> end of list */ + pcre *re; /* compiled pattern */ + pcre_extra *pe; /* optimized compiled pattern */ +}; +/* keep the pc_re enum in the same order as the pc[] array */ +enum pc_re { + RE_GET_URL=0, /* get URL our of an HTTP command */ + RE_STRIP_ARGS, /* strip off HTTP args and HTTP command fields */ + RE_UN_SLASHLASH, /* convert //'s into / */ + RE_SLASHDOT, /* convert /./'s in / */ + RE_UN_DOTDOT, /* remove foo/../ where foo doesn't end in . */ + RE_TRAIL_DOTDOT, /* convert trialing /foo/.. into / */ + RE_LEAD_DOTDOT, /* remove leading /../'s and /./'s */ + RE_UN_DOTDOT2, /* remove foo./../ */ +}; +/* keep the pc_re enum in the same order as the pc[] array */ +static struct pcre_set pc[] = { + + /* RE_GET_URL */ + {"^\"?\\S+\\s+(\\S+)(\\s.*)?\"?$", NULL, NULL}, + + /* RE_STRIP_ARGS */ + {"^([^?\\s]+)[?\\s]", NULL, NULL}, + + /* RE_UN_SLASHLASH */ + {"^(.*?)//+(.*)$", NULL, NULL}, + + /* RE_SLASHDOT */ + {"^(.*?/)(\\./)+(.*)$", NULL, NULL}, + + /* RE_UN_DOTDOT */ + {"^(.*/)?[^/.]+/\\.\\./(.*)$", NULL, NULL}, + + /* RE_TRAIL_DOTDOT */ + {"^(.*/)\\.\\.$", NULL, NULL}, + + /* RE_LEAD_DOTDOT */ + {"^(/\\.{1,2})+(/.*)$", NULL, NULL}, + + /* RE_UN_DOTDOT2 */ + {"^(.*/)?[^/]+/\\.\\./(.*)$", NULL, NULL}, + + {NULL, NULL, NULL} /* must be last */ +}; +static int pcre_ready = 0; /* 1 ==> regular expressions pre-compiled */ +#define OV_LEN (3*16) /* substring info vector len - must be multiple of 3 */ +static int pcredbg = 0; /* 1 ==> output debugging to stderr */ + + +/******************************************************/ +/* PCRE_SETUP - compile Perl-like regular expressions */ +/******************************************************/ + +static void pcre_setup(void) +{ + const char *comp_err; /* regular expression compile error */ + int comp_offset; /* offset in pattern where error occured */ + int i; + + /* firewall - no need to setup twice */ + if (pcre_ready != 0) { + return; + } + + /* compile each regular expression */ + for (i=0; pc[i].pattern != NULL; ++i) { + + /* compile the regular expression pattern */ + pc[i].re = pcre_compile(pc[i].pattern, 0, &comp_err, + &comp_offset, NULL); + if (pc[i].re == NULL) { + fprintf(stderr, + "FATAL: regular expression %d error at offset %d: %s\n", + i, comp_offset, comp_err); + fprintf(stderr, "FATAL: bad reexp: %s\n", pc[i].pattern); + exit(1); + } + + /* optimize compiled regular expression */ + pc[i].pe = pcre_study(pc[i].re, 0, &comp_err); + if (comp_err != NULL) { + fprintf(stderr, + "FATAL: regular expression %d optimize failed: %s\n", + i, comp_err); + exit(2); + } + } + + /* everything setup */ + pcre_ready = 1; + return; +} + /*********************************************/ /* FMT_LOGREC - terminate log fields w/zeros */ @@ -100,12 +201,20 @@ int parse_record(char *buffer) { + int ret; /* return status */ + + /* setup regular expressions if needed */ + if ((ntop_dirs || ntop_dirsK || dump_dirs) && pcre_ready == 0) { + pcre_setup(); + } + /* clear out structure */ memset(&log_rec,0,sizeof(struct log_struct)); /* log_rec.hostname[0]=0; log_rec.datetime[0]=0; log_rec.url[0]=0; + log_rec.dir[0]=0; log_rec.resp_code=0; log_rec.xfer_size=0; log_rec.refer[0]=0; @@ -121,17 +230,25 @@ switch (log_type) { default: - case LOG_CLF: return parse_record_web(buffer); break; /* clf */ - case LOG_FTP: return parse_record_ftp(buffer); break; /* ftp */ - case LOG_SQUID: return parse_record_squid(buffer); break; /* squid */ + case LOG_CLF: ret = parse_record_web(buffer); break; /* clf */ + case LOG_FTP: ret = parse_record_ftp(buffer); break; /* ftp */ + case LOG_SQUID: ret = parse_record_squid(buffer); break; /* squid */ + } + + /* form the dir name of the URL path component */ + if (ntop_dirs || ntop_dirsK || dump_dirs) { + form_url_dirname(log_rec.url, log_rec.dir); } + + /* return status */ + return ret; } /*********************************************/ /* PARSE_RECORD_FTP - ftp log handler */ /*********************************************/ -int parse_record_ftp(char *buffer) +static int parse_record_ftp(char *buffer) { int size; int i,j; @@ -157,9 +274,9 @@ j=atoi(cp1); /* get year */ /* minimal sanity check */ - if (*(cpy+2)!=':' || *(cpy+5)!=':') return 0; - if (j<1990 || j>2100) return 0; - if (i<1 || i>31) return 0; + if (*(cpy+2)!=':' || *(cpy+5)!=':') return -1; + if (j<1990 || j>2100) return -1; + if (i<1 || i>31) return -1; /* format date/time field */ sprintf(log_rec.datetime,"[%02d/%s/%4d:%s -0000]",i,cpx,j,cpy); @@ -210,17 +327,20 @@ /* return appropriate response code */ log_rec.resp_code=(*(eob-2)=='i')?206:200; - return 1; + return 0; } /*********************************************/ /* PARSE_RECORD_WEB - web log handler */ /*********************************************/ -int parse_record_web(char *buffer) +static int parse_record_web(char *buffer) { int size; char *cp1, *cp2, *cpx, *eob, *eos; +#if defined(MODIFY_LOG_LINES) + char *p; +#endif /* MODIFY_LOG_LINES */ size = strlen(buffer); /* get length of buffer */ eob = buffer+size; /* calculate end of buffer */ @@ -235,11 +355,10 @@ *cp2 = '\0'; if (*cp1 != '\0') { - if (verbose) + if (debug_mode) { fprintf(stderr,"%s",msg_big_host); - if (debug_mode) fprintf(stderr,": %s\n",cpx); - else fprintf(stderr,"\n"); + fprintf(stderr,": %s\n",cpx); } while (*cp1 != '\0') cp1++; } @@ -262,16 +381,15 @@ } *cp2--='\0'; - if (cp1 >= eob) return 0; + if (cp1 >= eob) return -1; /* check if oversized username */ if (*cp1 != '[') { - if (verbose) + if (debug_mode) { fprintf(stderr,"%s",msg_big_user); - if (debug_mode) fprintf(stderr,": %s\n",cpx); - else fprintf(stderr,"\n"); + fprintf(stderr,": %s\n",cpx); } while ( (*cp1 != '[') && (cp1 < eob) ) cp1++; } @@ -289,11 +407,10 @@ *cp2 = '\0'; if (*cp1 != '\0') { - if (verbose) + if (debug_mode) { fprintf(stderr,"%s",msg_big_date); - if (debug_mode) fprintf(stderr,": %s\n",cpx); - else fprintf(stderr,"\n"); + fprintf(stderr,": %s\n",cpx); } while (*cp1 != '\0') cp1++; } @@ -302,7 +419,7 @@ /* minimal sanity check on timestamp */ if ( (log_rec.datetime[0] != '[') || (log_rec.datetime[3] != '/') || - (cp1 >= eob)) return 0; + (cp1 >= eob)) return -1; /* HTTP request */ cpx = cp1; @@ -314,18 +431,99 @@ *cp2 = '\0'; if (*cp1 != '\0') { - if (verbose) + if (debug_mode) { fprintf(stderr,"%s",msg_big_req); - if (debug_mode) fprintf(stderr,": %s\n",cpx); - else fprintf(stderr,"\n"); + fprintf(stderr,": %s\n",cpx); } while (*cp1 != '\0') cp1++; } if (cp1 < eob) cp1++; if ( (log_rec.url[0] != '"') || - (cp1 >= eob) ) return 0; + (cp1 >= eob) ) return -1; + + if ( (log_rec.url[1] == '"') || + (log_rec.url[1] == '-' && log_rec.url[2] == '"') ) { + return 1; /* silent ignore of an empty record */ + } + +#if defined(MODIFY_LOG_LINES) + /* -Z means to ignore any record that does not have an absolute URL */ + if (ignore_rel_url) { + + /* look past the "HTTP command */ + for (p = log_rec.url; *p && *p != ' '; ++p) { + } + if (*p == ' ') { + + /* look past the initial "HTTP command for the 1st non-alphanum */ + for (++p; *p && isascii(*p) && isalnum(*p); ++p) { + } + + /* if we are at a :/ then we have an absolute URL */ + if (*p != ':' || *(p+1) != '/') { + + /* URL is not of the form alphanum:/... */ + return 1; /* silent ignore of non abs record */ + } + + } else { + /* no leading "HTTP command */ + return -1; + } + } + + /* -z means to strip off any method://host.name from the URL */ + if (strip_abs_url) { + char *beyond = log_rec.url; + char *start; + + /* look past the "HTTP command */ + for (start = beyond; *start && *start != ' '; ++start) { + } + if (*start == ' ') { + + /* look past the HTTP command for the first non-alphanumeric */ + for (p = ++start; *p && isascii(*p) && isalnum(*p); ++p) { + } + + /* if that character is a :, then we have absulute URL to strip */ + if (*p == ':') { + + /* we will beyond the method: at a minimum */ + beyond = p+1; + + /* must now strip off // after the method: */ + if (beyond[0] == '/' && beyond[1] == '/') { + + /* beyond through the method:// */ + beyond += 2; + + /* skip the host until the next / */ + for (p = beyond; *p && *p != '/'; ++p) { + } + if (*p == '/') { + /* beyond through the method://host.name */ + beyond = p; + } + } + + /* shift the URL down and this remove the method://host.name */ + p = start; + while (*beyond) { + *p++ = *beyond++; + } + *p = '\0'; + } + + /* must hav a space after the "HTTP command */ + } else { + /* no leading "HTTP command */ + return -1; + } + } +#endif /* MODIFY_LOG_LINES */ /* response code */ log_rec.resp_code = atoi(cp1); @@ -337,7 +535,7 @@ else log_rec.xfer_size = strtoul(cp1,NULL,10); /* done with CLF record */ - if (cp1>=eob) return 1; + if (cp1>=eob) return 0; while ( (*cp1 != '\0') && (*cp1 != '\n') && (cp1 < eob) ) cp1++; if (cp1 < eob) cp1++; @@ -351,11 +549,10 @@ *cp2 = '\0'; if (*cp1 != '\0') { - if (verbose) + if (debug_mode) { fprintf(stderr,"%s",msg_big_ref); - if (debug_mode) fprintf(stderr,": %s\n",cpx); - else fprintf(stderr,"\n"); + fprintf(stderr,": %s\n",cpx); } while (*cp1 != '\0') cp1++; } @@ -369,14 +566,14 @@ while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++; *cp2 = '\0'; - return 1; /* maybe a valid record, return with TRUE */ + return 0; /* maybe a valid record, return with TRUE */ } /*********************************************/ /* PARSE_RECORD_SQUID - squid log handler */ /*********************************************/ -int parse_record_squid(char *buffer) +static int parse_record_squid(char *buffer) { int size; time_t i; @@ -410,11 +607,10 @@ *cp2='\0'; if (*cp1 != '\0') { - if (verbose) + if (debug_mode) { fprintf(stderr,"%s",msg_big_host); - if (debug_mode) fprintf(stderr,": %s\n",cpx); - else fprintf(stderr,"\n"); + fprintf(stderr,": %s\n",cpx); } while (*cp1 != '\0') cp1++; } @@ -447,11 +643,10 @@ *cp2 = '\0'; if (*cp1 != '\0') { - if (verbose) + if (debug_mode) { fprintf(stderr,"%s",msg_big_req); - if (debug_mode) fprintf(stderr,": %s\n",cpx); - else fprintf(stderr,"\n"); + fprintf(stderr,": %s\n",cpx); } while (*cp1 != '\0') cp1++; } @@ -468,11 +663,10 @@ *cp2 = '\0'; if (*cp1 != '\0') { - if (verbose) + if (debug_mode) { fprintf(stderr,"%s",msg_big_req); - if (debug_mode) fprintf(stderr,": %s\n",cpx); - else fprintf(stderr,"\n"); + fprintf(stderr,": %s\n",cpx); } while (*cp1 != '\0') cp1++; } @@ -492,11 +686,386 @@ *cp2--='\0'; - if (cp1 >= eob) return 0; + if (cp1 >= eob) return -1; /* strip trailing space(s) */ while (*cp2==' ') *cp2--='\0'; /* we have no interest in the remaining fields */ - return 1; + return 0; +} + +/*****************************************************************/ +/* FORM_URL_DIRNAME - form the dirname of the URL path component */ +/*****************************************************************/ + +static void form_url_dirname(char *http_cmd, char *dir) +{ + int ovector[OV_LEN+1]; /* vector for substring information */ + int match_ret; /* pcre_exec return code */ + int code; /* 1st matched substring code */ + int code2; /* 2nd matched substring code */ + const char *substr; /* 1st matched substring or NULL */ + const char *substr2; /* 2nd matched substring or NULL */ + const char *url; /* the URL part of the log entry */ + char *tmp; /* temp string */ + int try_again; /* 1 ==> keep trying the same cycle */ + + /* firewall */ + if (http_cmd == NULL || dir == NULL) { + if (pcredbg) { + fprintf(stderr, "form_url_dirname: bail #0\n"); + } + return; + } + dir[0] = '\0'; /* in case we must bail */ + if (pcredbg) { + fprintf(stderr, "form_url_dirname: http_cmd: %s\n", http_cmd); + } + + /* + * extract the URL from the log entry + */ + memset(ovector, 0, sizeof(ovector)); + match_ret = pcre_exec(pc[RE_GET_URL].re, pc[RE_GET_URL].pe, + http_cmd, strlen(http_cmd), 0, 0, ovector, OV_LEN); + /* look for a match */ + code = 0; + substr = NULL; + if (match_ret >= 2) { + code = pcre_get_substring(http_cmd, ovector, match_ret, 1, &substr); + } + /* no match, use the entire http command */ + if (code <= 0 || match_ret < 2 || substr == NULL || substr[0] == '\0') { + substr = strdup(http_cmd); + if (substr == NULL) { + if (pcredbg) { + fprintf(stderr, "form_url_dirname: bail #1\n"); + } + return; /* bail */ + } + } + url = substr; + /* url string has been allocated now */ + if (pcredbg) { + fprintf(stderr, "form_url_dirname: url: %s\n", url); + } + + /* + * force a leading / if it does not have one + */ + if (url[0] != '/') { + tmp = malloc(1+strlen(url)+1+1); + if (tmp == NULL) { + if (pcredbg) { + fprintf(stderr, "form_url_dirname: bail #2\n"); + } + return; /* bail */ + } + strcpy(tmp, "/"); + strcat(tmp, url); + pcre_free((void *)url); + url = tmp; + if (pcredbg) { + fprintf(stderr, "form_url_dirname: forced leading /: %s\n", url); + } + } + + /* + * strip off ? args and fields after whitespace + */ + memset(ovector, 0, sizeof(ovector)); + match_ret = pcre_exec(pc[RE_STRIP_ARGS].re, pc[RE_STRIP_ARGS].pe, + url, strlen(url), 0, 0, ovector, OV_LEN); + /* look for a match */ + code = 0; + substr = NULL; + if (match_ret == 2) { + code = pcre_get_substring(url, ovector, match_ret, 1, &substr); + } + /* if match, just use URL path (before any ? or whitespace) */ + if (code >= 0 && substr != NULL && substr[0] != '\0') { + pcre_free((void *)url); + url = substr; + if (pcredbg) { + fprintf(stderr, "form_url_dirname: stripped url: %s\n", url); + } + } + + /* + * convert //'s into / + */ + do { + /* // pattern match */ + memset(ovector, 0, sizeof(ovector)); + match_ret = pcre_exec(pc[RE_UN_SLASHLASH].re, pc[RE_UN_SLASHLASH].pe, + url, strlen(url), 0, 0, ovector, OV_LEN); + /* look for a match */ + try_again = 0; + code = 0; + code2 = 0; + substr = NULL; + substr2 = NULL; + if (match_ret == 3) { + /* before the // */ + code = pcre_get_substring(url, ovector, match_ret, 1, &substr); + /* after the // */ + code2 = pcre_get_substring(url, ovector, match_ret, 2, &substr2); + } + /* match, remove the foo/../ */ + if (code >= 0 && code2 >= 0 && substr != NULL && substr2 != NULL) { + tmp = malloc(strlen(substr)+1+strlen(substr2)+1+1); + if (tmp == NULL) { + if (pcredbg) { + fprintf(stderr, "form_url_dirname: bail #3\n"); + } + return; /* bail */ + } + strcpy(tmp, substr); + strcat(tmp, "/"); + strcat(tmp, substr2); + pcre_free((void *)substr); + substr = NULL; + pcre_free((void *)substr2); + substr2 = NULL; + pcre_free((void *)url); + url = tmp; + try_again = 1; + if (pcredbg) { + fprintf(stderr, "form_url_dirname: //-collapse: %s\n", url); + } + } + } while (try_again != 0); + + /* + * replace /./ with / + */ + do { + /* /./ pattern match */ + memset(ovector, 0, sizeof(ovector)); + match_ret = pcre_exec(pc[RE_SLASHDOT].re, pc[RE_SLASHDOT].pe, + url, strlen(url), 0, 0, ovector, OV_LEN); + /* look for a match */ + try_again = 0; + code = 0; + code2 = 0; + substr = NULL; + substr2 = NULL; + if (match_ret == 4) { + /* before the /./ */ + code = pcre_get_substring(url, ovector, match_ret, 1, &substr); + /* after the /./ */ + code2 = pcre_get_substring(url, ovector, match_ret, 3, &substr2); + } + /* match, remove the /./ */ + if (code >= 0 && code2 >= 0 && substr != NULL && substr2 != NULL) { + tmp = malloc(strlen(substr)+1+strlen(substr2)+1+1); + if (tmp == NULL) { + if (pcredbg) { + fprintf(stderr, "form_url_dirname: bail #4\n"); + } + return; /* bail */ + } + strcpy(tmp, substr); + strcat(tmp, substr2); + pcre_free((void *)substr); + substr = NULL; + pcre_free((void *)substr2); + substr2 = NULL; + pcre_free((void *)url); + url = tmp; + try_again = 1; + if (pcredbg) { + fprintf(stderr, "form_url_dirname: slashdot: %s\n", url); + } + } + } while (try_again != 0); + + /* + * remove foo/../, where foo does not end in . + */ + do { + /* foo/../ pattern match */ + memset(ovector, 0, sizeof(ovector)); + match_ret = pcre_exec(pc[RE_UN_DOTDOT].re, pc[RE_UN_DOTDOT].pe, + url, strlen(url), 0, 0, ovector, OV_LEN); + /* look for a match */ + try_again = 0; + code = 0; + code2 = 0; + substr = NULL; + substr2 = NULL; + if (match_ret == 3) { + /* before the foo/../ */ + code = pcre_get_substring(url, ovector, match_ret, 1, &substr); + /* after the foo/../ */ + code2 = pcre_get_substring(url, ovector, match_ret, 2, &substr2); + } + /* match, remove the foo/../ */ + if (code >= 0 && code2 >= 0 && substr != NULL && substr2 != NULL) { + tmp = malloc(strlen(substr)+1+strlen(substr2)+1+1); + if (tmp == NULL) { + if (pcredbg) { + fprintf(stderr, "form_url_dirname: bail #4\n"); + } + return; /* bail */ + } + strcpy(tmp, substr); + strcat(tmp, substr2); + pcre_free((void *)substr); + pcre_free((void *)substr2); + pcre_free((void *)url); + url = tmp; + try_again = 1; + if (pcredbg) { + fprintf(stderr, "form_url_dirname: ..-reduced: %s\n", url); + } + } + } while (try_again != 0); + + /* + * remove trailing /.. from the path + */ + do { + /* /foo/.. pattern match */ + memset(ovector, 0, sizeof(ovector)); + match_ret = pcre_exec(pc[RE_TRAIL_DOTDOT].re, pc[RE_TRAIL_DOTDOT].pe, + url, strlen(url), 0, 0, ovector, OV_LEN); + /* look for a match */ + try_again = 0; + code = 0; + substr = NULL; + if (match_ret == 2) { + /* before the /.. */ + code = pcre_get_substring(url, ovector, match_ret, 1, &substr); + } + /* match, remove the trailing /.. */ + if (code >= 0 && substr != NULL) { + pcre_free((void *)url); + url = substr; + try_again = 1; + if (pcredbg) { + fprintf(stderr, "form_url_dirname: trailing/..: %s\n", url); + } + } + } while (try_again != 0); + + /* + * remove leading /..'s + */ + do { + /* leading /../ pattern match */ + memset(ovector, 0, sizeof(ovector)); + match_ret = pcre_exec(pc[RE_LEAD_DOTDOT].re, pc[RE_LEAD_DOTDOT].pe, + url, strlen(url), 0, 0, ovector, OV_LEN); + /* look for a match */ + try_again = 0; + code2 = 0; + substr = NULL; + substr2 = NULL; + if (match_ret == 3) { + /* after the /../'s */ + code2 = pcre_get_substring(url, ovector, match_ret, 2, &substr2); + } + /* match, remove the foo/../ */ + if (code2 >= 0 && substr2 != NULL) { + pcre_free((void *)url); + url = substr2; + try_again = 1; + if (pcredbg) { + fprintf(stderr, + "form_url_dirname: leading/.. or /.: %s\n", url); + } + } + } while (try_again != 0); + + /* + * remove foo./../ + */ + do { + /* foo./../ pattern match */ + memset(ovector, 0, sizeof(ovector)); + match_ret = pcre_exec(pc[RE_UN_DOTDOT2].re, pc[RE_UN_DOTDOT2].pe, + url, strlen(url), 0, 0, ovector, OV_LEN); + /* look for a match */ + try_again = 0; + code = 0; + code2 = 0; + substr = NULL; + substr2 = NULL; + if (match_ret == 3) { + /* before the foo./../ */ + code = pcre_get_substring(url, ovector, match_ret, 1, &substr); + /* after the foo./../ */ + code2 = pcre_get_substring(url, ovector, match_ret, 2, &substr2); + } + /* match, remove the foo./../ */ + if (code >= 0 && code2 >= 0 && substr != NULL && substr2 != NULL) { + tmp = malloc(strlen(substr)+1+strlen(substr2)+1+1); + if (tmp == NULL) { + if (pcredbg) { + fprintf(stderr, "form_url_dirname: bail #5\n"); + } + return; /* bail */ + } + strcpy(tmp, substr); + strcat(tmp, substr2); + pcre_free((void *)substr); + substr = NULL; + pcre_free((void *)substr2); + substr2 = NULL; + pcre_free((void *)url); + url = tmp; + try_again = 1; + if (pcredbg) { + fprintf(stderr, "form_url_dirname: ..-reduced2: %s\n", url); + } + } + } while (try_again != 0); + + /* + * a lone /. becomes just / + */ + if (strcmp(url, "/.") == 0) { + tmp = strdup("/"); + if (tmp == NULL) { + if (pcredbg) { + fprintf(stderr, "form_url_dirname: bail #6\n"); + } + return; /* bail */ + } + pcre_free((void *)url); + url = tmp; + if (pcredbg) { + fprintf(stderr, "form_url_dirname: lone /.: %s\n", url); + } + } + + /* + * load the modified url into the dir field + */ + strncpy(dir, url, MAXURL); + dir[MAXURL] = '\0'; + pcre_free((void *)url); + url = NULL; + + /* + * strip off the final /element + * + * NOTE: We consider the dirname of /chongo/ to be /chongo because + * the URL path /chongo/ really refers to /chongo/index.html + * and the dirname of /chongo/index.html is /chongo + */ + tmp = strrchr(dir, '/'); + if (tmp != NULL) { + *tmp = '\0'; + } + /* unless it was only / */ + if (dir[0] == '\0') { + strcpy(dir, "/"); + } + if (pcredbg) { + fprintf(stderr, "form_url_dirname: dir: %s\n", dir); + } + return; } diff -r -u webalizer-2.01-10.init/preserve.c webalizer-2.01-10/preserve.c --- webalizer-2.01-10.init/preserve.c 2001-10-23 23:22:42.000000000 -0700 +++ webalizer-2.01-10/preserve.c 2007-01-23 23:02:19.000000000 -0800 @@ -66,15 +66,16 @@ #include "preserve.h" /* local variables */ -int hist_month[12], hist_year[12]; /* arrays for monthly total */ -u_long hist_hit[12]; /* calculations: used to */ -u_long hist_files[12]; /* produce index.html */ -u_long hist_site[12]; /* these are read and saved */ -double hist_xfer[12]; /* in the history file */ -u_long hist_page[12]; -u_long hist_visit[12]; +int hist_month[MH], hist_year[MH]; /* arrays for monthly total */ +u_int64_t hist_hit[MH]; /* calculations: used to */ +u_int64_t hist_files[MH]; /* produce index.html */ +u_int64_t hist_site[MH]; /* these are read and saved */ +u_int64_t hist_xfer[MH]; /* in the history file */ +u_int64_t hist_page[MH]; +u_int64_t hist_visit[MH]; +u_int64_t hist_code2xx304[MH]; -int hist_fday[12], hist_lday[12]; /* first/last day arrays */ +int hist_fday[MH], hist_lday[MH]; /* first/last day arrays */ /*********************************************/ /* GET_HISTORY - load in history file */ @@ -87,11 +88,12 @@ char buffer[BUFSIZE]; /* first initalize internal array */ - for (i=0;i<12;i++) + for (i=0;i1) printf("%s\n",msg_put_hist); - for (i=0;i<12;i++) + for (i=0;istring, + sprintf(buffer,"%s\n%d %lld %lld %lld %lld %lld\n", uptr->string, uptr->flag, uptr->count, uptr->files, uptr->xfer, uptr->entry, uptr->exit); if (fputs(buffer,fp)==EOF) return 1; @@ -272,7 +282,7 @@ hptr=sm_htab[i]; while (hptr!=NULL) { - sprintf(buffer,"%s\n%d %lu %lu %.0f %lu %lu\n%s\n", + sprintf(buffer,"%s\n%d %lld %lld %lld %lld %lld\n%s\n", hptr->string, hptr->flag, hptr->count, @@ -294,7 +304,7 @@ hptr=sd_htab[i]; while (hptr!=NULL) { - sprintf(buffer,"%s\n%d %lu %lu %.0f %lu %lu\n%s\n", + sprintf(buffer,"%s\n%d %lld %lld %lld %lld %lld\n%s\n", hptr->string, hptr->flag, hptr->count, @@ -318,7 +328,7 @@ rptr=rm_htab[i]; while (rptr!=NULL) { - sprintf(buffer,"%s\n%d %lu\n", rptr->string, + sprintf(buffer,"%s\n%d %lld\n", rptr->string, rptr->flag, rptr->count); if (fputs(buffer,fp)==EOF) return 1; /* error exit */ rptr=rptr->next; @@ -336,7 +346,7 @@ aptr=am_htab[i]; while (aptr!=NULL) { - sprintf(buffer,"%s\n%d %lu\n", aptr->string, + sprintf(buffer,"%s\n%d %lld\n", aptr->string, aptr->flag, aptr->count); if (fputs(buffer,fp)==EOF) return 1; /* error exit */ aptr=aptr->next; @@ -352,7 +362,7 @@ sptr=sr_htab[i]; while (sptr!=NULL) { - sprintf(buffer,"%s\n%lu\n", sptr->string,sptr->count); + sprintf(buffer,"%s\n%lld\n", sptr->string,sptr->count); if (fputs(buffer,fp)==EOF) return 1; /* error exit */ sptr=sptr->next; } @@ -367,7 +377,7 @@ iptr=im_htab[i]; while (iptr!=NULL) { - sprintf(buffer,"%s\n%d %lu %lu %.0f %lu %lu\n", + sprintf(buffer,"%s\n%d %lld %lld %lld %lld %lld\n", iptr->string, iptr->flag, iptr->count, @@ -381,6 +391,21 @@ } if (fputs("# End Of Table - usernames\n",fp)==EOF) return 1; + /* dir list */ + if (fputs("# -dirs- \n",fp)==EOF) return 1; /* error exit */ + for (i=0;istring, + dptr->flag, dptr->count, dptr->xfer); + if (fputs(buffer,fp)==EOF) return 1; + dptr=dptr->next; + } + } + if (fputs("# End Of Table - dirs\n",fp)==EOF) return 1; + fclose(fp); /* close data file... */ return 0; /* successful, return with good return code */ } @@ -399,11 +424,13 @@ struct anode t_anode; struct snode t_snode; struct inode t_inode; + struct pnode t_dnode; + int numfields; char buffer[BUFSIZE]; char tmp_buf[BUFSIZE]; - u_long ul_bogus=0; + u_int64_t ul_bogus=0; fp=fopen(state_fname,"r"); if (fp==NULL) @@ -437,15 +464,25 @@ /* Get monthly totals */ if ((fgets(buffer,BUFSIZE,fp)) != NULL) { - sscanf(buffer,"%lu %lu %lu %lu %lu %lu %lf %lu %lu %lu", + numfields = + sscanf(buffer,"%lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld", &t_hit, &t_file, &t_site, &t_url, - &t_ref, &t_agent, &t_xfer, &t_page, &t_visit, &t_user); + &t_ref, &t_agent, &t_xfer, &t_page, &t_visit, &t_user, &t_code2xx304); + if (numfields<9) /* kludge for reading 1.20.xx history files */ + { + t_page = 0; + t_visit = 0; + } + if (numfields<11) /* kludge for reading <2.01.13 history files */ + { + t_code2xx304 = 0; + } } else return 3; /* error exit */ /* Get daily totals */ if ((fgets(buffer,BUFSIZE,fp)) != NULL) { - sscanf(buffer,"%lu %lu %lu %d %d", + sscanf(buffer,"%lld %lld %lld %d %d", &dt_site, &ht_hit, &mh_hit, &f_day, &l_day); } else return 4; /* error exit */ @@ -454,7 +491,7 @@ { if ((fgets(buffer,BUFSIZE,fp)) != NULL) { - sscanf(buffer,"%lu %lu %lf %lu %lu %lu", + sscanf(buffer,"%lld %lld %lld %lld %lld %lld", &tm_hit[i],&tm_file[i],&tm_xfer[i],&tm_site[i],&tm_page[i], &tm_visit[i]); } else return 5; /* error exit */ @@ -465,7 +502,7 @@ { if ((fgets(buffer,BUFSIZE,fp)) != NULL) { - sscanf(buffer,"%lu %lu %lf %lu", + sscanf(buffer,"%lld %lld %lld %lld", &th_hit[i],&th_file[i],&th_xfer[i],&th_page[i]); } else return 6; /* error exit */ } @@ -474,7 +511,7 @@ for (i=0;i /some/dir +.br +http://example.com:80/some/dir/index.html ==> /some/dir +.br +http://example.com/some/dir/ ==> /some/dir +.br +http://example.com/some/dir ==> /some +.br +http://example.com/ ==> / +.br +http://example.com ==> / +.in -0.5i +.sp 1 +Use a \fInum\fP of zero to disable. +.TP 8 .B TopKURLs \fInum\fP Display the top \fInum\fP URLs (by KByte) table. Use zero to disable. .TP 8 +.B TopKDirs \fInum\fP +Display the top \fInum\fP URL path directroies (by KByte) table. +See the \fITopDirs\fP configuration keyword for details. +.sp 1 +Use a \fInum\fP of zero to disable. +.TP 8 +.TP 8 .B AllURLs \fP( yes | \fBno\fP ) Create seperate HTML page with \fBAll\fP URLs. .TP 8 +.B AllDirs \fP( yes | \fBno\fP ) +Create seperate HTML page with \fBAll\fP URL path directroies. +.TP 8 .B TopCountries \fInum\fP Display the top \fInum\fP Countries in the table. Use zero to disable. .TP 8 @@ -536,6 +583,9 @@ .TP 8 .B HideUser \fIname\fP Hide Usernames that match \fIname\fP. +.B HideDir \fIname\fP +Hide URL path directroies from the \fITopDirs\fP list. +See the \fITopDirs\fP configuration keyword for details. .TP 8 .B IgnoreAgent \fIname\fP Ignore User Agents that match \fIname\fP. @@ -650,6 +700,9 @@ .B DumpURLs \fP( yes | \fBno\fP ) Dump the url data to a tab delimited file. .TP 8 +.B DumpDirs \fP( yes | \fBno\fP ) +Dump the dir data to a tab delimited file. +.TP 8 .B DumpReferrers \fP( yes | \fBno\fP ) Dump the referrer data to a tab delimitd file. This data is only available if using a log that contains referrer information diff -r -u webalizer-2.01-10.init/webalizer.c webalizer-2.01-10/webalizer.c --- webalizer-2.01-10.init/webalizer.c 2002-04-16 15:11:31.000000000 -0700 +++ webalizer-2.01-10/webalizer.c 2007-01-23 23:02:19.000000000 -0800 @@ -84,6 +84,7 @@ /* internal function prototypes */ +char *skip_proto(char *); /* skip protocol such as http:// */ void clear_month(); /* clear monthly stuff */ char *unescape(char *); /* unescape URL's */ char from_hex(char); /* convert hex to dec */ @@ -101,8 +102,8 @@ /*********************************************/ char *version = "2.01"; /* program version */ -char *editlvl = "10"; /* edit level */ -char *moddate = "16-Apr-2002"; /* modification date */ +char *editlvl = "13"; /* edit level */ +char *moddate = "30-Sep-2006"; /* modification date */ char *copyright = "Copyright 1997-2001 by Bradford L. Barrett"; int verbose = 2; /* 2=verbose,1=err, 0=none */ @@ -120,7 +121,7 @@ int mangle_agent = 0; /* mangle user agents */ int incremental = 0; /* incremental mode 1=yes */ int use_https = 0; /* use 'https://' on URL's */ -int visit_timeout= 1800; /* visit timeout (seconds) */ +unsigned int visit_timeout= 1800; /* visit timeout (seconds) */ int graph_legend = 1; /* graph legend (1=yes) */ int graph_lines = 2; /* graph lines (0=none) */ int fold_seq_err = 0; /* fold seq err (0=no) */ @@ -139,10 +140,17 @@ char *dns_cache = NULL; /* DNS cache file name */ int dns_children = 0; /* DNS children (0=don't do)*/ +#if defined(MODIFY_LOG_LINES) +int strip_abs_url = 0; /* 1 => remove host part of URLs */ +int ignore_rel_url = 0; /* 1 => ignore non-abs URLs */ +#endif /* MODIFY_LOG_LINES */ + int ntop_sites = 30; /* top n sites to display */ int ntop_sitesK = 10; /* top n sites (by kbytes) */ int ntop_urls = 30; /* top n url's to display */ int ntop_urlsK = 10; /* top n url's (by kbytes) */ +int ntop_dirs = 30; /* top n dirs to display */ +int ntop_dirsK = 10; /* top n dirs (by kbytes) */ int ntop_entry = 10; /* top n entry url's */ int ntop_exit = 10; /* top n exit url's */ int ntop_refs = 30; /* top n referrers "" */ @@ -153,6 +161,7 @@ int all_sites = 0; /* List All sites (0=no) */ int all_urls = 0; /* List All URL's (0=no) */ +int all_dirs = 0; /* List All dirs (0=no) */ int all_refs = 0; /* List All Referrers */ int all_agents = 0; /* List All User Agents */ int all_search = 0; /* List All Search Strings */ @@ -160,6 +169,7 @@ int dump_sites = 0; /* Dump tab delimited sites */ int dump_urls = 0; /* URL's */ +int dump_dirs = 0; /* dirs */ int dump_refs = 0; /* Referrers */ int dump_agents = 0; /* User Agents */ int dump_users = 0; /* Usernames */ @@ -171,39 +181,40 @@ cur_day=0, cur_hour=0, /* tracking variables */ cur_min=0, cur_sec=0; -u_long cur_tstamp=0; /* Timestamp... */ -u_long rec_tstamp=0; -u_long req_tstamp=0; -u_long epoch; /* used for timestamp adj. */ +u_int64_t cur_tstamp=0; /* Timestamp... */ +u_int64_t rec_tstamp=0; +u_int64_t req_tstamp=0; +u_int64_t epoch; /* used for timestamp adj. */ int check_dup=0; /* check for dup flag */ int gz_log=0; /* gziped log? (0=no) */ -double t_xfer=0.0; /* monthly total xfer value */ -u_long t_hit=0,t_file=0,t_site=0, /* monthly total vars */ - t_url=0,t_ref=0,t_agent=0, +u_int64_t t_xfer=0.0; /* monthly total xfer value */ +u_int64_t t_hit=0,t_file=0,t_site=0, /* monthly total vars */ + t_url=0,t_dir=0,t_ref=0,t_agent=0, t_page=0, t_visit=0, t_user=0; +u_int64_t t_code2xx304=0; -double tm_xfer[31]; /* daily transfer totals */ +u_int64_t tm_xfer[31]; /* daily transfer totals */ -u_long tm_hit[31], tm_file[31], /* daily total arrays */ +u_int64_t tm_hit[31], tm_file[31], /* daily total arrays */ tm_site[31], tm_page[31], tm_visit[31]; -u_long dt_site; /* daily 'sites' total */ +u_int64_t dt_site; /* daily 'sites' total */ -u_long ht_hit=0, mh_hit=0; /* hourly hits totals */ +u_int64_t ht_hit=0, mh_hit=0; /* hourly hits totals */ -u_long th_hit[24], th_file[24], /* hourly total arrays */ +u_int64_t th_hit[24], th_file[24], /* hourly total arrays */ th_page[24]; -double th_xfer[24]; +u_int64_t th_xfer[24]; int f_day,l_day; /* first/last day vars */ struct utsname system_info; /* system info structure */ -u_long ul_bogus =0; /* Dummy counter for groups */ +u_int64_t ul_bogus =0; /* Dummy counter for groups */ struct log_struct log_rec; /* expanded log storage */ @@ -245,9 +256,9 @@ int rec_year,rec_month=1,rec_day,rec_hour,rec_min,rec_sec; int good_rec =0; /* 1 if we had a good record */ - u_long total_rec =0; /* Total Records Processed */ - u_long total_ignore=0; /* Total Records Ignored */ - u_long total_bad =0; /* Total Bad Records */ + u_int64_t total_rec =0; /* Total Records Processed */ + u_int64_t total_ignore=0; /* Total Records Ignored */ + u_int64_t total_bad =0; /* Total Bad Records */ int max_ctry; /* max countries defined */ @@ -257,6 +268,9 @@ "jul", "aug", "sep", "oct", "nov", "dec"}; + int code2xx304; /* 1 if access code was 2xx */ + int parse_code; /* 0 => OK, -1 => error, >0 => silent error */ + /* initalize epoch */ epoch=jdate(1,1,1970); /* used for timestamp adj. */ @@ -272,7 +286,7 @@ /* get command line options */ opterr = 0; /* disable parser errors */ - while ((i=getopt(argc,argv,"a:A:c:C:dD:e:E:fF:g:GhHiI:l:Lm:M:n:N:o:pP:qQr:R:s:S:t:Tu:U:vVx:XY"))!=EOF) + while ((i=getopt(argc,argv,"a:A:c:C:dD:e:E:fF:g:GhHiI:j:J:l:Lm:M:n:N:o:pP:qQr:R:s:S:t:Tu:U:vVx:XYzZ"))!=EOF) { switch (i) { @@ -294,6 +308,8 @@ case 'H': hourly_stats=0; break; /* no hourly stats */ case 'i': ignore_hist=1; break; /* Ignore history */ case 'I': add_nlist(optarg,&index_alias); break; /* Index alias */ + case 'j': add_nlist(optarg,&hidden_dirs); break; /* hide URL dir */ + case 'J': ntop_dirs=atoi(optarg); break; /* show topdirs used */ case 'l': graph_lines=atoi(optarg); break; /* Graph Lines */ case 'L': graph_legend=0; break; /* Graph Legends */ case 'm': visit_timeout=atoi(optarg); break; /* Visit Timeout */ @@ -318,6 +334,10 @@ case 'x': html_ext=optarg; break; /* HTML file extension */ case 'X': hide_sites=1; break; /* Hide ind. sites */ case 'Y': ctry_graph=0; break; /* Supress ctry graph */ +#if defined(MODIFY_LOG_LINES) + case 'z': strip_abs_url=1; break; /* remove host part of URLs */ + case 'Z': ignore_rel_url=1; break; /* ignore non-abs URLs */ +#endif /* MODIFY_LOG_LINES */ } } @@ -385,6 +405,7 @@ rm_htab[i]=NULL; am_htab[i]=NULL; sr_htab[i]=NULL; + di_htab[i]=NULL; } /* Be polite and announce yourself... */ @@ -536,11 +557,10 @@ total_rec++; if (strlen(buffer) == (BUFSIZE-1)) { - if (verbose) + if (debug_mode) { fprintf(stderr,"%s",msg_big_rec); - if (debug_mode) fprintf(stderr,":\n%s",buffer); - else fprintf(stderr,"\n"); + fprintf(stderr,":\n%s",buffer); } total_bad++; /* bump bad record counter */ @@ -561,8 +581,9 @@ /* got a record... */ strcpy(tmp_buf, buffer); /* save buffer in case of error */ - if (parse_record(buffer)) /* parse the record */ - { + parse_code = parse_record(buffer); + switch (parse_code) { + case 0: /*********************************************/ /* PASSED MINIMAL CHECKS, DO A LITTLE MORE */ /*********************************************/ @@ -588,12 +609,12 @@ if (rec_hour>23) rec_hour=0; /* minimal sanity check on date */ - if ((i>=12)||(rec_min>59)||(rec_sec>59)||(rec_year<1990)) + if ((i>=12)||(rec_min>59)||(rec_sec>60)||(rec_year<1990)) { total_bad++; /* if a bad date, bump counter */ if (verbose) { - fprintf(stderr,"%s: %s [%lu]", + fprintf(stderr,"%s: %s [%lld]", msg_bad_date,log_rec.datetime,total_rec); if (debug_mode) fprintf(stderr,":\n%s\n",tmp_buf); else fprintf(stderr,"\n"); @@ -893,7 +914,7 @@ /* if necessary, shrink referrer to fit storage */ if (strlen(log_rec.refer)>=MAXREFH) { - if (verbose) fprintf(stderr,"%s [%lu]\n", + if (debug_mode) fprintf(stderr,"%s [%lld]\n", msg_big_ref,total_rec); log_rec.refer[MAXREFH-1]='\0'; } @@ -901,7 +922,7 @@ /* if necessary, shrink URL to fit storage */ if (strlen(log_rec.url)>=MAXURLH) { - if (verbose) fprintf(stderr,"%s [%lu]\n", + if (debug_mode) fprintf(stderr,"%s [%lld]\n", msg_big_req,total_rec); log_rec.url[MAXURLH-1]='\0'; } @@ -1012,7 +1033,7 @@ /* Ignore/Include check */ if ( (isinlist(include_sites,log_rec.hostname)==NULL) && (isinlist(include_urls,log_rec.url)==NULL) && - (isinlist(include_refs,log_rec.refer)==NULL) && + (isinlist(include_refs,skip_proto(log_rec.refer))==NULL) && (isinlist(include_agents,log_rec.agent)==NULL) && (isinlist(include_users,log_rec.ident)==NULL) ) { @@ -1022,28 +1043,29 @@ { total_ignore++; continue; } if (isinlist(ignored_agents,log_rec.agent)!=NULL) { total_ignore++; continue; } - if (isinlist(ignored_refs,log_rec.refer)!=NULL) + if (isinlist(ignored_refs,skip_proto(log_rec.refer))!=NULL) { total_ignore++; continue; } if (isinlist(ignored_users,log_rec.ident)!=NULL) { total_ignore++; continue; } } /* Bump response code totals */ + code2xx304 = 0; switch (log_rec.resp_code) { case RC_CONTINUE: i=IDX_CONTINUE; break; case RC_SWITCHPROTO: i=IDX_SWITCHPROTO; break; - case RC_OK: i=IDX_OK; break; - case RC_CREATED: i=IDX_CREATED; break; - case RC_ACCEPTED: i=IDX_ACCEPTED; break; - case RC_NONAUTHINFO: i=IDX_NONAUTHINFO; break; - case RC_NOCONTENT: i=IDX_NOCONTENT; break; - case RC_RESETCONTENT: i=IDX_RESETCONTENT; break; - case RC_PARTIALCONTENT: i=IDX_PARTIALCONTENT; break; + case RC_OK: i=IDX_OK; code2xx304=1; break; + case RC_CREATED: i=IDX_CREATED; code2xx304=1; break; + case RC_ACCEPTED: i=IDX_ACCEPTED; code2xx304=1; break; + case RC_NONAUTHINFO: i=IDX_NONAUTHINFO; code2xx304=1; break; + case RC_NOCONTENT: i=IDX_NOCONTENT; code2xx304=1; break; + case RC_RESETCONTENT: i=IDX_RESETCONTENT; code2xx304=1; break; + case RC_PARTIALCONTENT: i=IDX_PARTIALCONTENT; code2xx304=1; break; case RC_MULTIPLECHOICES: i=IDX_MULTIPLECHOICES; break; case RC_MOVEDPERM: i=IDX_MOVEDPERM; break; case RC_MOVEDTEMP: i=IDX_MOVEDTEMP; break; case RC_SEEOTHER: i=IDX_SEEOTHER; break; - case RC_NOMOD: i=IDX_NOMOD; break; + case RC_NOMOD: i=IDX_NOMOD; code2xx304=1; break; case RC_USEPROXY: i=IDX_USEPROXY; break; case RC_MOVEDTEMPORARILY: i=IDX_MOVEDTEMPORARILY; break; case RC_BAD: i=IDX_BAD; break; @@ -1079,21 +1101,29 @@ i=1; else i=0; /* URL/ident hash table (only if valid response code) */ - if ((log_rec.resp_code==RC_OK)||(log_rec.resp_code==RC_NOMOD)|| - (log_rec.resp_code==RC_PARTIALCONTENT)) + if (code2xx304) { /* URL hash table */ - if (put_unode(log_rec.url,OBJ_REG,(u_long)1, - log_rec.xfer_size,&t_url,(u_long)0,(u_long)0,um_htab)) + if (put_unode(log_rec.url,OBJ_REG,(u_int64_t)1, + log_rec.xfer_size,&t_url,(u_int64_t)0,(u_int64_t)0,um_htab)) { if (verbose) /* Error adding URL node, skipping ... */ fprintf(stderr,"%s %s\n", msg_nomem_u, log_rec.url); } + /* dir hash table */ + if (put_pnode(log_rec.dir,OBJ_REG,(u_int64_t)1, + log_rec.xfer_size,&t_dir,di_htab)) + { + if (verbose) + /* Error adding URL node, skipping ... */ + fprintf(stderr,"%s %s\n", msg_nomem_d, log_rec.dir); + } + /* ident (username) hash table */ if (put_inode(log_rec.ident,OBJ_REG, - 1,(u_long)i,log_rec.xfer_size,&t_user, + 1,(u_int64_t)i,log_rec.xfer_size,&t_user, 0,rec_tstamp,im_htab)) { if (verbose) @@ -1102,20 +1132,20 @@ } } - /* referrer hash table */ - if (ntop_refs) + /* 2xx access - referrer hash table */ + if (code2xx304 && ntop_refs) { if (log_rec.refer[0]!='\0') - if (put_rnode(log_rec.refer,OBJ_REG,(u_long)1,&t_ref,rm_htab)) + if (put_rnode(log_rec.refer,OBJ_REG,(u_int64_t)1,&t_ref,rm_htab)) { if (verbose) fprintf(stderr,"%s %s\n", msg_nomem_r, log_rec.refer); } } - /* hostname (site) hash table - daily */ - if (put_hnode(log_rec.hostname,OBJ_REG, - 1,(u_long)i,log_rec.xfer_size,&dt_site, + /* 2xx access - hostname (site) hash table - daily */ + if (code2xx304 && put_hnode(log_rec.hostname,OBJ_REG, + 1,(u_int64_t)i,log_rec.xfer_size,&dt_site, 0,rec_tstamp,"",sd_htab)) { if (verbose) @@ -1123,9 +1153,9 @@ fprintf(stderr,"%s %s\n",msg_nomem_dh, log_rec.hostname); } - /* hostname (site) hash table - monthly */ - if (put_hnode(log_rec.hostname,OBJ_REG, - 1,(u_long)i,log_rec.xfer_size,&t_site, + /* 2xx access - hostname (site) hash table - monthly */ + if (code2xx304 && put_hnode(log_rec.hostname,OBJ_REG, + 1,(u_int64_t)i,log_rec.xfer_size,&t_site, 0,rec_tstamp,"",sm_htab)) { if (verbose) @@ -1137,7 +1167,7 @@ if (ntop_agents) { if (log_rec.agent[0]!='\0') - if (put_anode(log_rec.agent,OBJ_REG,(u_long)1,&t_agent,am_htab)) + if (put_anode(log_rec.agent,OBJ_REG,(u_int64_t)1,&t_agent,am_htab)) { if (verbose) fprintf(stderr,"%s %s\n", msg_nomem_a, log_rec.agent); @@ -1146,6 +1176,9 @@ /* bump monthly/daily/hourly totals */ t_hit++; ht_hit++; /* daily/hourly hits */ + if (code2xx304) { + t_code2xx304++; /* 2xx and 304 code hits */ + } t_xfer += log_rec.xfer_size; /* total xfer size */ tm_xfer[rec_day-1] += log_rec.xfer_size; /* daily xfer total */ tm_hit[rec_day-1]++; /* daily hits total */ @@ -1160,8 +1193,8 @@ th_file[rec_hour]++; } - /* Pages (pageview) calculation */ - if (ispage(log_rec.url)) + /* 2xx access - (pageview) calculation */ + if (code2xx304 && ispage(log_rec.url)) { t_page++; tm_page[rec_day-1]++; @@ -1175,11 +1208,11 @@ /* RECORD PROCESSED - DO GROUPS HERE */ /*********************************************/ - /* URL Grouping */ - if ( (cp1=isinglist(group_urls,log_rec.url))!=NULL) + /* 2xx access - URL Grouping */ + if (code2xx304 && (cp1=isinglist(group_urls,log_rec.url))!=NULL) { - if (put_unode(cp1,OBJ_GRP,(u_long)1,log_rec.xfer_size, - &ul_bogus,(u_long)0,(u_long)0,um_htab)) + if (put_unode(cp1,OBJ_GRP,(u_int64_t)1,log_rec.xfer_size, + &ul_bogus,(u_int64_t)0,(u_int64_t)0,um_htab)) { if (verbose) /* Error adding URL node, skipping ... */ @@ -1187,10 +1220,10 @@ } } - /* Site Grouping */ - if ( (cp1=isinglist(group_sites,log_rec.hostname))!=NULL) + /* 2xx access - Site Grouping */ + if (code2xx304 && (cp1=isinglist(group_sites,log_rec.hostname))!=NULL) { - if (put_hnode(cp1,OBJ_GRP,1,(u_long)(log_rec.resp_code==RC_OK)?1:0, + if (put_hnode(cp1,OBJ_GRP,1,(u_int64_t)(log_rec.resp_code==RC_OK)?1:0, log_rec.xfer_size,&ul_bogus, 0,rec_tstamp,"",sm_htab)) { @@ -1199,7 +1232,7 @@ fprintf(stderr,"%s %s\n", msg_nomem_mh, cp1); } } - else + else if (code2xx304) { /* Domain Grouping */ if (group_domains) @@ -1208,7 +1241,7 @@ if (cp1 != NULL) { if (put_hnode(cp1,OBJ_GRP,1, - (u_long)(log_rec.resp_code==RC_OK)?1:0, + (u_int64_t)(log_rec.resp_code==RC_OK)?1:0, log_rec.xfer_size,&ul_bogus, 0,rec_tstamp,"",sm_htab)) { @@ -1220,10 +1253,10 @@ } } - /* Referrer Grouping */ - if ( (cp1=isinglist(group_refs,log_rec.refer))!=NULL) + /* 2xx access - Referrer Grouping */ + if (code2xx304 && (cp1=isinglist(group_refs,skip_proto(log_rec.refer)))!=NULL) { - if (put_rnode(cp1,OBJ_GRP,(u_long)1,&ul_bogus,rm_htab)) + if (put_rnode(cp1,OBJ_GRP,(u_int64_t)1,&ul_bogus,rm_htab)) { if (verbose) /* Error adding Referrer node, skipping ... */ @@ -1234,7 +1267,7 @@ /* User Agent Grouping */ if ( (cp1=isinglist(group_agents,log_rec.agent))!=NULL) { - if (put_anode(cp1,OBJ_GRP,(u_long)1,&ul_bogus,am_htab)) + if (put_anode(cp1,OBJ_GRP,(u_int64_t)1,&ul_bogus,am_htab)) { if (verbose) /* Error adding User Agent node, skipping ... */ @@ -1245,7 +1278,7 @@ /* Ident (username) Grouping */ if ( (cp1=isinglist(group_users,log_rec.ident))!=NULL) { - if (put_inode(cp1,OBJ_GRP,1,(u_long)(log_rec.resp_code==RC_OK)?1:0, + if (put_inode(cp1,OBJ_GRP,1,(u_int64_t)(log_rec.resp_code==RC_OK)?1:0, log_rec.xfer_size,&ul_bogus, 0,rec_tstamp,im_htab)) { @@ -1254,14 +1287,13 @@ fprintf(stderr,"%s %s\n", msg_nomem_i, cp1); } } - } + break; /*********************************************/ /* BAD RECORD */ /*********************************************/ - else - { + case -1: /* If first record, check if stupid Netscape header stuff */ if ( (total_rec==1) && (strncmp(buffer,"format=",7)==0) ) { @@ -1276,11 +1308,23 @@ total_bad++; if (verbose) { - fprintf(stderr,"%s (%lu)",msg_bad_rec,total_rec); + fprintf(stderr,"%s (%lld)",msg_bad_rec,total_rec); if (debug_mode) fprintf(stderr,":\n%s\n",tmp_buf); else fprintf(stderr,"\n"); } } + break; + + /*********************************************/ + /* SILENT BAD RECORD */ + /*********************************************/ + + default: + if (debug_mode) { + fprintf(stderr,"%s (%lld)",msg_bad_rec,total_rec); + fprintf(stderr,"parse_code: %d:\n%s\n",parse_code,tmp_buf); + } + break; } } @@ -1319,14 +1363,16 @@ end_time = times(&mytms); /* display timing totals? */ if (time_me || (verbose>1)) { - printf("%lu %s ",total_rec, msg_records); + u_int64_t tmp; + + printf("%lld %s ",total_rec, msg_records); if (total_ignore) { - printf("(%lu %s",total_ignore,msg_ignored); - if (total_bad) printf(", %lu %s) ",total_bad,msg_bad); + printf("(%lld %s",total_ignore,msg_ignored); + if (total_bad) printf(", %lld %s) ",total_bad,msg_bad); else printf(") "); } - else if (total_bad) printf("(%lu %s) ",total_bad,msg_bad); + else if (total_bad) printf("(%lld %s) ",total_bad,msg_bad); /* get processing time (end-start) */ temp_time = (float)(end_time-start_time)/CLK_TCK; @@ -1334,10 +1380,10 @@ /* calculate records per second */ if (temp_time) - i=( (int)( (float)total_rec/temp_time ) ); - else i=0; + tmp=( (u_int64_t)( (double)total_rec/temp_time ) ); + else tmp=0; - if ( (i>0) && (i<=total_rec) ) printf(", %d/sec\n", i); + if ( (tmp>0) && (tmp<=total_rec) ) printf(", %lld/sec\n", tmp); else printf("\n"); } @@ -1356,6 +1402,68 @@ } } +/************************************************************************/ +/* SKIP_PROTO - skip any leading protocol:// or protocol:/ or protocol: */ +/************************************************************************/ + +char * +skip_proto(char *str) +{ + char *p; /* test pointer */ + + /* + * firewall - NULL string returns NULL + */ + if (str == NULL) { + return NULL; + } else { + p = str; + } + + /* + * first try to skip the leading protocol (such as http or https) + */ + while (*p && isascii(*p) && isalnum(*p)) { + ++p; + } + + /* + * protocol must be followed by : + */ + if (*p == ':') { + + /* skipping protcol: */ + if (*++p == '/') { + + /* skipping protcol:/ */ + if (*++p == '/') { + + /* skipping protcol:// */ + if (*++p == '\0') { + + /* referer only foo:// so use the whole referer */ + p = str; + } + + /* referer only foo:/ so use the whole referer */ + } else if (*p == '\0') { + p = str; + } + + /* referer only foo: so use the whole referer */ + } else if (*p == '\0') { + p = str; + } + + /* no leading protocol: so use the whole referer */ + } else { + p = str; + } + + /* return original string or string past skipped part */ + return p; +} + /*********************************************/ /* GET_CONFIG - get configuration file info */ /*********************************************/ @@ -1449,7 +1557,12 @@ "DNSCache", /* DNS Cache file name 84 */ "DNSChildren", /* DNS Children (0=no DNS) 85 */ "DailyGraph", /* Daily Graph (0=no) 86 */ - "DailyStats" /* Daily Stats (0=no) 87 */ + "DailyStats", /* Daily Stats (0=no) 87 */ + "HideDir", /* URL path dirs to hide 90 */ + "DumpDirs", /* Dump dirs tab file 91 */ + "TopDirs", /* Top dirs 92 */ + "TopKDirs", /* Top dirs (by KBytes) 93 */ + "AllDirs", /* List all Dirs? 94 */ }; FILE *fp; @@ -1516,7 +1629,7 @@ case 15: ntop_ctrys = atoi(value); break; /* TopCountries */ case 16: add_nlist(value,&hidden_sites); break; /* HideSite */ case 17: add_nlist(value,&hidden_urls); break; /* HideURL */ - case 18: add_nlist(value,&hidden_refs); break; /* HideReferrer */ + case 18: add_nlist(skip_proto(value),&hidden_refs); break; /* HideReferrer */ case 19: add_nlist(value,&hidden_agents); break; /* HideAgent */ case 20: add_nlist(value,&index_alias); break; /* IndexAlias */ case 21: add_nlist(value,&html_head); break; /* HTMLHead */ @@ -1531,7 +1644,7 @@ case 30: local_time=(value[0]=='y')?0:1; break; /* GMTTime */ case 31: add_glist(value,&group_urls); break; /* GroupURL */ case 32: add_glist(value,&group_sites); break; /* GroupSite */ - case 33: add_glist(value,&group_refs); break; /* GroupReferrer */ + case 33: add_glist(skip_proto(value),&group_refs); break; /* GroupReferrer */ case 34: add_glist(value,&group_agents); break; /* GroupAgent */ case 35: shade_groups=(value[0]=='y')?1:0; break; /* GroupShading */ case 36: hlite_groups=(value[0]=='y')?1:0; break; /* GroupHighlight */ @@ -1545,7 +1658,7 @@ case 44: use_https=(value[0]=='y')?1:0; break; /* Use https:// */ case 45: add_nlist(value,&include_sites); break; /* IncludeSite */ case 46: add_nlist(value,&include_urls); break; /* IncludeURL */ - case 47: add_nlist(value,&include_refs); break; /* IncludeReferrer*/ + case 47: add_nlist(skip_proto(value),&include_refs); break; /* IncludeReferrer*/ case 48: add_nlist(value,&include_agents); break; /* IncludeAgent */ case 49: add_nlist(value,&page_type); break; /* PageType */ case 50: visit_timeout=atoi(value); break; /* VisitTimeout */ @@ -1593,6 +1706,13 @@ #endif /* USE_DNS */ case 86: daily_graph=(value[0]=='n')?0:1; break; /* HourlyGraph */ case 87: daily_stats=(value[0]=='n')?0:1; break; /* HourlyStats */ + case 88: /* Disable use of GeoIP if not enabled */ + case 89: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break; + case 90: add_nlist(value,&hidden_dirs); break; /* HideDir */ + case 91: dump_dirs=(value[0]=='y')?1:0; break; /* DumpDirs? */ + case 92: ntop_dirs = atoi(value); break; /* TopDirs */ + case 93: ntop_dirsK = atoi(value); break; /* TopKDirs (KB) */ + case 94: all_dirs=(value[0]=='y')?1:0; break; /* All dirs? */ } } fclose(fp); @@ -1649,7 +1769,8 @@ ctry[i].files=0; ctry[i].xfer=0; } - t_hit=t_file=t_site=t_url=t_ref=t_agent=t_page=t_visit=t_user=0; + t_hit=t_file=t_site=t_url=t_dir=t_ref=t_agent=t_page=t_visit=t_user=0; + t_code2xx304=0; t_xfer=0.0; mh_hit = dt_site = 0; f_day=l_day=1; @@ -1741,10 +1862,10 @@ /* CTRY_IDX - create unique # from domain */ /*********************************************/ -u_long ctry_idx(char *str) +u_int64_t ctry_idx(char *str) { int i=strlen(str),j=0; - u_long idx=0; + u_int64_t idx=0; char *cp1=str+i; for (;i>0;i--) { idx+=((*--cp1-'a'+1)< (b) ? (a) : (b)) #endif -#define MAXHASH 2048 /* Size of our hash tables */ +#define MAXHASH 3217 /* Size of our hash tables */ #define BUFSIZE 4096 /* Max buffer size for log record */ #define MAXHOST 128 /* Max hostname buffer size */ #define MAXURL 1024 /* Max HTTP request/URL field size */ -#define MAXURLH 128 /* Max URL field size in htab */ +#define MAXURLH 256 /* Max URL field size in htab */ #define MAXREF 1024 /* Max referrer field size */ -#define MAXREFH 128 /* Max referrer field size in htab */ +#define MAXREFH 256 /* Max referrer field size in htab */ #define MAXAGENT 64 /* Max user agent field size */ #define MAXCTRY 48 /* Max country name size */ #define MAXSRCH 256 /* Max size of search string buffer */ -#define MAXSRCHH 64 /* Max size of search str in htab */ +#define MAXSRCHH 256 /* Max size of search str in htab */ #define MAXIDENT 64 /* Max size of ident string (user) */ +/* NM must be at least 12 */ +/* XXX - right now MH != 12 does not work, keep this at 12 */ +#define MH (1*12) /* months of history to keep */ + #define SLOP_VAL 3600 /* out of sequence slop (seconds) */ /* Log types */ @@ -125,30 +129,31 @@ /* Response code structure */ struct response_code { char *desc; /* response code struct */ - u_long count; }; + u_int64_t count; }; /* Country code structure */ -struct country_code { u_long idx; +struct country_code { u_int64_t idx; char *desc; - u_long count; - u_long files; - double xfer; }; + u_int64_t count; + u_int64_t files; + u_int64_t xfer; }; typedef struct country_code *CLISTPTR; /* log record structure */ -struct log_struct { char hostname[MAXHOST]; /* hostname */ +struct log_struct { char hostname[MAXHOST+1]; /* hostname */ char datetime[29]; /* raw timestamp */ - char url[MAXURL]; /* raw request field */ + char url[MAXURL+1]; /* raw request field */ + char dir[MAXURL+1]; /* dirname of URL path */ int resp_code; /* response code */ - u_long xfer_size; /* xfer size in bytes */ + u_int64_t xfer_size; /* xfer size in bytes */ #ifdef USE_DNS struct in_addr addr; /* IP address structure */ #endif /* USE_DNS */ - char refer[MAXREF]; /* referrer */ - char agent[MAXAGENT]; /* user agent (browser) */ - char srchstr[MAXSRCH]; /* search string */ - char ident[MAXIDENT]; }; /* ident string (user) */ + char refer[MAXREF+1]; /* referrer */ + char agent[MAXAGENT+1]; /* user agent (browser) */ + char srchstr[MAXSRCH+1]; /* search string */ + char ident[MAXIDENT+1]; }; /* ident string (user) */ extern struct log_struct log_rec; @@ -172,7 +177,7 @@ extern int mangle_agent ; /* mangle user agents */ extern int incremental ; /* incremental mode 1=yes */ extern int use_https ; /* use 'https://' on URL's */ -extern int visit_timeout; /* visit timeout (30 min) */ +extern unsigned int visit_timeout; /* visit timeout (30 min) */ extern int graph_legend ; /* graph legend (1=yes) */ extern int graph_lines ; /* graph lines (0=none) */ extern int fold_seq_err ; /* fold seq err (0=no) */ @@ -191,10 +196,17 @@ extern char *dns_cache ; /* DNS cache file name */ extern int dns_children ; /* # of DNS children */ +#if defined(MODIFY_LOG_LINES) +extern int strip_abs_url; /* 1 => remove host part of URLs */ +extern int ignore_rel_url; /* 1 => ignore non-abs URLs */ +#endif /* MODIFY_LOG_LINES */ + extern int ntop_sites ; /* top n sites to display */ extern int ntop_sitesK ; /* top n sites (by kbytes) */ extern int ntop_urls ; /* top n url's to display */ extern int ntop_urlsK ; /* top n url's (by kbytes) */ +extern int ntop_dirs ; /* top n dirs to display */ +extern int ntop_dirsK ; /* top n dirs (by kbytes) */ extern int ntop_entry ; /* top n entry url's */ extern int ntop_exit ; /* top n exit url's */ extern int ntop_refs ; /* top n referrers "" */ @@ -205,6 +217,7 @@ extern int all_sites ; /* List All sites (0=no) */ extern int all_urls ; /* List All URL's (0=no) */ +extern int all_dirs ; /* List All dirs (0=no) */ extern int all_refs ; /* List All Referrers */ extern int all_agents ; /* List All User Agents */ extern int all_search ; /* List All Search Strings */ @@ -212,6 +225,7 @@ extern int dump_sites ; /* Dump tab delimited sites */ extern int dump_urls ; /* URL's */ +extern int dump_dirs ; /* dirs */ extern int dump_refs ; /* Referrers */ extern int dump_agents ; /* User Agents */ extern int dump_users ; /* Usernames */ @@ -219,33 +233,34 @@ extern int dump_header ; /* Dump header as first rec */ extern char *dump_path ; /* Path for dump files */ -extern u_long cur_tstamp; /* Current timestamp */ -extern u_long epoch; /* used for timestamp adj. */ +extern u_int64_t cur_tstamp; /* Current timestamp */ +extern u_int64_t epoch; /* used for timestamp adj. */ extern int check_dup; /* check for dups flag */ extern int cur_year,cur_month, /* year/month/day/hour */ cur_day, cur_hour, /* tracking variables */ cur_min, cur_sec; -extern double t_xfer; /* monthly total xfer value */ -extern u_long t_hit, t_file, t_site, /* monthly total vars */ - t_url, t_ref, t_agent, +extern u_int64_t t_xfer; /* monthly total xfer value */ +extern u_int64_t t_hit, t_file, t_site, /* monthly total vars */ + t_url, t_dir, t_ref, t_agent, t_page,t_visit,t_user; +extern u_int64_t t_code2xx304; -extern double tm_xfer[31]; /* daily transfer totals */ +extern u_int64_t tm_xfer[31]; /* daily transfer totals */ -extern u_long tm_hit[31], tm_file[31], /* daily total arrays */ +extern u_int64_t tm_hit[31], tm_file[31], /* daily total arrays */ tm_site[31],tm_page[31], tm_visit[31]; -extern u_long dt_site; /* daily 'sites' total */ +extern u_int64_t dt_site; /* daily 'sites' total */ -extern u_long ht_hit,mh_hit; /* hourly hits totals */ +extern u_int64_t ht_hit,mh_hit; /* hourly hits totals */ -extern u_long th_hit[24], th_file[24], /* hourly total arrays */ +extern u_int64_t th_hit[24], th_file[24], /* hourly total arrays */ th_page[24]; -extern double th_xfer[24]; +extern u_int64_t th_xfer[24]; extern int f_day,l_day; /* first/last day vars */ extern int gz_log; /* flag for zipped log */ @@ -254,10 +269,11 @@ /* define our externally visable functions */ +extern char *skip_proto(char *); /* skip protocol such as http:// */ extern char *cur_time(); -extern u_long ctry_idx(char *); +extern u_int64_t ctry_idx(char *); extern void init_counters(); extern int ispage(char *); -extern u_long jdate(int,int,int); +extern u_int64_t jdate(int,int,int); #endif /* _WEBALIZER_H */