The following patch fixes a number of issues related to multisort v1.1: * Fixed sort bug where 01/Feb/2001:03:26:15 was incorrectly sorted before 31/Jan/2001:23:25:08 * Fixed bugs related to very old dates and dates far in the future. * Fixed bugs related to processing empty input files. * Correctly distinguishes between file EOF and read errors. * Allows multisort to just process a single file. * Added slightly better sanity checks on timestamp string formats. * Fixed a bug where multisort could hang on an I/O error. * Correctly computes POSIX "Seconds since the Epoch" values with full leapyear rules. * Speedup as per Bertrand Demiddelaer's patch of: http://bert.tuxfamily.org/patches/multisort.patch * Added -m maxage which will output only lines <= maxage seconds old instead of all lines. * Updated the usage message. * Compile with LFS support to be able to process log files >2GB The result is what I call multisort v1.1.3. Enjoy. chongo (Landon Curt Noll) /\oo/\ http://www.isthe.com/chongo/index.html http://www.isthe.com/chongo/src/multisort-patch/index.html p.s. I did not write multisort, Zachary Beane did. See the above URL for details. =-= --- multisort.c.init 2001-11-23 02:14:06.000000000 -0800 +++ multisort.c 2004-01-06 06:22:04.000000000 -0800 @@ -3,7 +3,7 @@ * multisort - sort multiple Common Log Format files into a single, * date-ordered file * - * $Id: my-1.1.3.ptch,v 1.4 2004/01/06 14:32:43 root Exp $ + * $Id: my-1.1.3.ptch,v 1.4 2004/01/06 14:32:43 root Exp $ * * Version 1.0 - 14 Jan 1999 * @@ -12,6 +12,29 @@ * - Fixed some potential segfaults by checking the return values * of various functions. * + * Version 1.1.3 - 04 Jan 2004 (unofficial by chongo) + * + * + Fixed sort bug where 01/Feb/2001:03:26:15 was incorrectly sorted + * before 31/Jan/2001:23:25:08 + * + Fixed bugs related to very old dates and dates far in the future. + * + Fixed bugs related to processing empty input files. + * + Correctly distinguishes between file EOF and read errors. + * + Allows multisort to just process a single file. + * + Added slightly better sanity checks on timestamp string formats. + * + Fixed a bug where multisort could hang on an I/O error. + * + Correctly computes POSIX "Seconds since the Epoch" values + * with full leapyear rules. + * + Speedup as per Bertrand Demiddelaer's patch of: + * http://bert.tuxfamily.org/patches/multisort.patch + * + Added -m maxage which will output only lines <= maxage seconds old + * instead of all lines. + * + Updated the usage message. + * + Compile with LFS support to be able to process log files >2GB + * + * NOTE: For more information, unofficial multisort v1.1.3 patch URL: + * + * http://www.isthe.com/chongo/src/multisort-patch/index.html + * * Zachary Beane * * Copyright (C) 1999 Zachary Beane @@ -32,17 +55,27 @@ * ***********************************************************************/ +/* + * force Large File Usage (LFS) under Linux - + * see: http://www.suse.de/~aj/linux_lfs.html + */ +#define _FILE_OFFSET_BITS=64 +#define _LARGEFILE_SOURCE + #include #include #include #include +#include +#include +#include struct _input_file { int enabled; char *name; FILE *in_fh; - long atime; + long long atime; char buf[BUFSIZ + 1]; }; @@ -53,6 +86,21 @@ /* Command-line: gperf -t -k* -L ANSI-C */ struct month { char *name; int pos; }; +long long month_offset[12] = { + -1, /* Jan */ + 30, /* Feb */ + 58, /* Mar */ + 89, /* Apr */ + 119, /* May */ + 150, /* Jun */ + 180, /* Jul */ + 211, /* Aug */ + 242, /* Sep */ + 272, /* Oct */ + 303, /* Nov */ + 333 /* Dec */ +}; + #define TOTAL_KEYWORDS 12 #define MIN_WORD_LENGTH 3 #define MAX_WORD_LENGTH 3 @@ -181,9 +229,12 @@ * Ok, for this new update, don't be so bloody slack about not checking * return values and string lengths and such. Lazy bastard. * + * Return POSIX "Seconds since the Epoch" with th extension that a + * long long (64 bit) value is returned and the 100/400 year leapyear + * rule. */ -long +long long conv_time(char *s) { char *ptr; @@ -208,6 +259,15 @@ return 0; orig_ptr = ptr; + if (ptr[2] != '/' || + ptr[6] != '/' || + ptr[11] != ':' || + ptr[14] != ':' || + ptr[17] != ':' || + ptr[20] != ' ') { + /* malformed date string */ + return 0; + } ptr[2] = '\0'; ptr[6] = '\0'; ptr[11] = '\0'; @@ -227,7 +287,7 @@ ptr += 4; - year = atoi(ptr) - 1990; + year = atoi(ptr) - 1900; ptr += 5; hour = atoi(ptr); @@ -249,24 +309,35 @@ ptr[17] = ':'; ptr[20] = ' '; - return((year * 31104000) - + (mon * 2592000) - + (mday * 86400) - + (hour * 3600) - + (min * 60) - + sec); + return (sec + (min * 60LL) + (hour * 3600LL) + /* sec of day */ + ((month_offset[mon]+mday) * 86400LL) + /* day of year */ + ((year-70) * 31536000LL) + /* Epoch year */ + (((year-69)/4) * 86400LL) - /* leap days */ + (((year-100)/100) * 86400LL) + /* 100yr rule */ + (((year-100)/400) * 86400LL)); /* 400yr rule */ } void usage(void) { - fprintf(stderr, "usage: multisort LOGFILE1 LOGFILE2 [LOGFILEn ...]\n"); - fprintf(stderr, "\n"); - fprintf(stderr, "multisort 1.1 Copyright (C) 1999 Zachary Beane\n"); - fprintf(stderr, "This program has NO WARRANTY and is licensed " - "under the terms of the\nGNU General Public License.\n" - "http://www.xach.com/multisort/ - bugs to xach@mint.net\n"); + fprintf(stderr, + "usage: multisort [-m maxage] LOGFILE1 [LOGFILEn ...]\n\n"); + fprintf(stderr, "\t-m maxage output only lines <= maxage secs old\n"); + fprintf(stderr, "\t\t without -m, it will output all lines\n\n"); + fprintf(stderr, "\tLOGFILE name of - means read from stdin\n\n"); + fprintf(stderr, "multisort 1.1.3 Copyright (C) 1999 Zachary Beane\n\n" + "\tSee http://www.xach.com/multisort/index.html for more info\n" + "\tas well as an EMail address for multisort bug reports.\n\n"); + fprintf(stderr, + "\tThis program has NO WARRANTY and is licensed under the\n" + "\tterms of the GNU General Public License.\n\n"); + fprintf(stderr, + "This code has bug fixes and other improvements by\n" + "chongo (Landon Curt Noll) -- Share and Enjoy! :-)\n\n"); + fprintf(stderr, "\thttp://www.isthe.com/chongo/index.html\n"); + fprintf(stderr, + "\thttp://www.isthe.com/chongo/src/multisort-patch/index.html\n"); exit(1); } @@ -279,11 +350,44 @@ int if_count = 0; /* number of total input files */ int if_nr = 0; /* number of active input files */ char *ret = NULL; - long min_time = 0; + long long min_time = 0LL; + long long oldest_time = 0LL; /* timestamp of old record to output */ + long long now; /* prog start time */ + long long max_age; /* -m arg, ignore this many secs old */ int min_index = 0; + struct timeval utc_now; /* prog start time in UTC */ + struct timezone ignored; /* ignored timezone arg */ int i, j; - - if (argc < 3) { + + /* determine the time, now, in UTC */ + if (gettimeofday(&utc_now, &ignored) < 0) { + perror("gettimeofday"); + exit(1); + } + now = (long long)utc_now.tv_sec; + + /* parse -m max_age if found */ + max_age = now; + if (argc > 1 && strcmp(argv[1], "-m") == 0) { + /* convert max_age to a numeric value if we can */ + errno = 0; + max_age = strtoll(argv[2], NULL, 0); + if (errno != 0) { + perror("strtoll"); + exit(1); + } + /* cover up the -m max_age arg pair */ + argv[2] = argv[0]; + argc -= 2; + argv += 2; + } + + /* compute time oldest record to output - cannot be before epoch */ + if (now > max_age) { + oldest_time = now - max_age; + } + + if (argc < 2) { usage(); } @@ -302,7 +406,11 @@ exit(1); } if_list[j]->name = strdup(argv[i]); - if_list[j]->in_fh = fopen(argv[i], "r"); + if (strcmp(if_list[j]->name, "-") == 0) { + if_list[j]->in_fh = stdin; + } else { + if_list[j]->in_fh = fopen(argv[i], "r"); + } if (if_list[j]->in_fh == NULL) { fprintf(stderr, "multisort: %s: %s\n", argv[i], @@ -314,41 +422,56 @@ /* Read the first line for each open file */ ret = fgets(if_list[j]->buf, BUFSIZ, if_list[j]->in_fh); - if (ret == NULL) { - fprintf(stderr, "multisort: empty input file `%s'", - if_list[j]->name); - exit(1); - } - + if (ret == NULL) { + if (ferror(if_list[j]->in_fh)) { + fprintf(stderr, + "multisort: read error, file `%s'", + if_list[j]->name); + } + fclose(if_list[j]->in_fh); + if_list[j]->enabled = 0; + } else { + if_list[j]->atime = conv_time(if_list[j]->buf); + ++if_nr; + } + ++if_count; } - if_count = if_nr = j; - - while (if_nr) { + while (if_nr > 0) { min_index = 0; - min_time = 900000000L; + min_time = 0x7fffffffffffffffLL; /* 2^63-1 */ for (i = 0; i < if_count; i++) { if (!if_list[i]->enabled) continue; - if_list[i]->atime = conv_time(if_list[i]->buf); if (if_list[i]->atime < min_time) { min_time = if_list[i]->atime; min_index = i; } } - /* output the lowest */ + /* output the lowest - silently ignore if too old */ /* printf("%s ", if_list[min_index]->name); */ - fputs(if_list[min_index]->buf, stdout); + if (min_time >= oldest_time) { + fputs(if_list[min_index]->buf, stdout); + } /* refill the buffer */ - ret = fgets(if_list[min_index]->buf, BUFSIZ, - if_list[min_index]->in_fh); - if (ret == NULL) { - if_list[min_index]->enabled = 0; - fclose(if_list[min_index]->in_fh); - if_nr--; + if (if_list[min_index]->enabled) { + ret = fgets(if_list[min_index]->buf, BUFSIZ, + if_list[min_index]->in_fh); + if (ret == NULL) { + if (ferror(if_list[min_index]->in_fh)) { + fprintf(stderr, + "multisort: fgets error, file `%s'", + if_list[min_index]->name); + } + if_list[min_index]->enabled = 0; + fclose(if_list[min_index]->in_fh); + if_nr--; + } else { + if_list[min_index]->atime = conv_time(if_list[min_index]->buf); + } } }