1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written December 1988 by Mike Haertel.
18 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
19 or (US mail) as Mike Haertel c/o Free Software Foundation.
21 Ørn E. Hansen added NLS support in 1997. */
26 #include <sys/types.h>
32 #include "filevercmp.h"
40 #include "readtokens0.h"
43 #include "strnumcmp.h"
48 #if HAVE_SYS_RESOURCE_H
49 # include <sys/resource.h>
52 struct rlimit { size_t rlim_cur; };
53 # define getrlimit(Resource, Rlp) (-1)
56 /* The official name of this program (e.g., no `g' prefix). */
57 #define PROGRAM_NAME "sort"
60 proper_name ("Mike Haertel"), \
61 proper_name ("Paul Eggert")
63 #if HAVE_LANGINFO_CODESET
64 # include <langinfo.h>
67 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
70 # define SA_NOCLDSTOP 0
71 /* No sigprocmask. Always 'return' zero. */
72 # define sigprocmask(How, Set, Oset) (0)
74 # if ! HAVE_SIGINTERRUPT
75 # define siginterrupt(sig, flag) /* empty */
79 #if !defined OPEN_MAX && defined NR_OPEN
80 # define OPEN_MAX NR_OPEN
86 #define UCHAR_LIM (UCHAR_MAX + 1)
88 #ifndef DEFAULT_TMPDIR
89 # define DEFAULT_TMPDIR "/tmp"
95 /* POSIX says to exit with status 1 if invoked with -c and the
96 input is not properly sorted. */
97 SORT_OUT_OF_ORDER = 1,
99 /* POSIX says any other irregular exit must exit with a status
100 code greater than 1. */
106 /* The number of times we should try to fork a compression process
107 (we retry if the fork call fails). We don't _need_ to compress
108 temp files, this is just to reduce disk access, so this number
110 MAX_FORK_TRIES_COMPRESS = 2,
112 /* The number of times we should try to fork a decompression process.
113 If we can't fork a decompression process, we can't sort, so this
114 number should be big. */
115 MAX_FORK_TRIES_DECOMPRESS = 8
118 /* The representation of the decimal point in the current locale. */
119 static int decimal_point;
121 /* Thousands separator; if -1, then there isn't one. */
122 static int thousands_sep;
124 /* Nonzero if the corresponding locales are hard. */
125 static bool hard_LC_COLLATE;
127 static bool hard_LC_TIME;
130 #define NONZERO(x) ((x) != 0)
132 /* The kind of blanks for '-b' to skip in various options. */
133 enum blanktype { bl_start, bl_end, bl_both };
135 /* The character marking end of line. Default to \n. */
136 static char eolchar = '\n';
138 /* Lines are held in core as counted strings. */
141 char *text; /* Text of the line. */
142 size_t length; /* Length including final newline. */
143 char *keybeg; /* Start of first key. */
144 char *keylim; /* Limit of first key. */
150 char *buf; /* Dynamically allocated buffer,
151 partitioned into 3 regions:
154 - an array of lines, in reverse order. */
155 size_t used; /* Number of bytes used for input data. */
156 size_t nlines; /* Number of lines in the line array. */
157 size_t alloc; /* Number of bytes allocated. */
158 size_t left; /* Number of bytes left from previous reads. */
159 size_t line_bytes; /* Number of bytes to reserve for each line. */
160 bool eof; /* An EOF has been read. */
165 size_t sword; /* Zero-origin 'word' to start at. */
166 size_t schar; /* Additional characters to skip. */
167 size_t eword; /* Zero-origin first word after field. */
168 size_t echar; /* Additional characters in field. */
169 bool const *ignore; /* Boolean array of characters to ignore. */
170 char const *translate; /* Translation applied to characters. */
171 bool skipsblanks; /* Skip leading blanks when finding start. */
172 bool skipeblanks; /* Skip leading blanks when finding end. */
173 bool numeric; /* Flag for numeric comparison. Handle
174 strings of digits with optional decimal
175 point, but no exponential notation. */
176 bool random; /* Sort by random hash of key. */
177 bool general_numeric; /* Flag for general, numeric comparison.
178 Handle numbers in exponential notation. */
179 bool month; /* Flag for comparison by month name. */
180 bool reverse; /* Reverse the sense of comparison. */
181 bool version; /* sort by version number */
182 struct keyfield *next; /* Next keyfield to try. */
191 /* FIXME: None of these tables work with multibyte character sets.
192 Also, there are many other bugs when handling multibyte characters.
193 One way to fix this is to rewrite `sort' to use wide characters
194 internally, but doing this with good performance is a bit
197 /* Table of blanks. */
198 static bool blanks[UCHAR_LIM];
200 /* Table of non-printing characters. */
201 static bool nonprinting[UCHAR_LIM];
203 /* Table of non-dictionary characters (not letters, digits, or blanks). */
204 static bool nondictionary[UCHAR_LIM];
206 /* Translation table folding lower case to upper. */
207 static char fold_toupper[UCHAR_LIM];
209 #define MONTHS_PER_YEAR 12
211 /* Table mapping month names to integers.
212 Alphabetic order allows binary search. */
213 static struct month monthtab[] =
229 /* During the merge phase, the number of files to merge at once. */
230 #define NMERGE_DEFAULT 16
232 /* Minimum size for a merge or check buffer. */
233 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
235 /* Minimum sort size; the code might not work with smaller sizes. */
236 #define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
238 /* The number of bytes needed for a merge or check buffer, which can
239 function relatively efficiently even if it holds only one line. If
240 a longer line is seen, this value is increased. */
241 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
243 /* The approximate maximum number of bytes of main memory to use, as
244 specified by the user. Zero if the user has not specified a size. */
245 static size_t sort_size;
247 /* The guessed size for non-regular files. */
248 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
250 /* Array of directory names in which any temporary files are to be created. */
251 static char const **temp_dirs;
253 /* Number of temporary directory names used. */
254 static size_t temp_dir_count;
256 /* Number of allocated slots in temp_dirs. */
257 static size_t temp_dir_alloc;
259 /* Flag to reverse the order of all comparisons. */
262 /* Flag for stable sort. This turns off the last ditch bytewise
263 comparison of lines, and instead leaves lines in the same order
264 they were read if all keys compare equal. */
267 /* If TAB has this value, blanks separate fields. */
268 enum { TAB_DEFAULT = CHAR_MAX + 1 };
270 /* Tab character separating fields. If TAB_DEFAULT, then fields are
271 separated by the empty string between a non-blank character and a blank
273 static int tab = TAB_DEFAULT;
275 /* Flag to remove consecutive duplicate lines from the output.
276 Only the last of a sequence of equal lines will be output. */
279 /* Nonzero if any of the input files are the standard input. */
280 static bool have_read_stdin;
282 /* List of key field comparisons to be tried. */
283 static struct keyfield *keylist;
285 /* Program used to (de)compress temp files. Must accept -d. */
286 static char const *compress_program;
288 /* Maximum number of files to merge in one go. If more than this
289 number are present, temp files will be used. */
290 static unsigned int nmerge = NMERGE_DEFAULT;
292 static void sortlines_temp (struct line *, size_t, struct line *);
294 /* Report MESSAGE for FILE, then clean up and exit.
295 If FILE is null, it represents standard output. */
297 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
299 die (char const *message, char const *file)
301 error (0, errno, "%s: %s", message, file ? file : _("standard output"));
308 if (status != EXIT_SUCCESS)
309 fprintf (stderr, _("Try `%s --help' for more information.\n"),
314 Usage: %s [OPTION]... [FILE]...\n\
315 or: %s [OPTION]... --files0-from=F\n\
317 program_name, program_name);
319 Write sorted concatenation of all FILE(s) to standard output.\n\
323 Mandatory arguments to long options are mandatory for short options too.\n\
330 -b, --ignore-leading-blanks ignore leading blanks\n\
331 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
332 -f, --ignore-case fold lower case to upper case characters\n\
335 -g, --general-numeric-sort compare according to general numerical value\n\
336 -i, --ignore-nonprinting consider only printable characters\n\
337 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
340 -n, --numeric-sort compare according to string numerical value\n\
341 -R, --random-sort sort by random hash of keys\n\
342 --random-source=FILE get random bytes from FILE (default /dev/urandom)\n\
343 -r, --reverse reverse the result of comparisons\n\
346 --sort=WORD sort according to WORD:\n\
347 general-numeric -g, month -M, numeric -n,\n\
348 random -R, version -V\n\
349 -V, --version-sort sort by numeric version\n\
357 --batch-size=NMERGE merge at most NMERGE inputs at once;\n\
358 for more use temp files\n\
361 -c, --check, --check=diagnose-first check for sorted input; do not sort\n\
362 -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
363 --compress-program=PROG compress temporaries with PROG;\n\
364 decompress them with PROG -d\n\
365 --files0-from=F read input from the files specified by\n\
366 NUL-terminated names in file F;\n\
367 If F is - then read names from standard input\n\
370 -k, --key=POS1[,POS2] start a key at POS1 (origin 1), end it at POS2\n\
371 (default end of line)\n\
372 -m, --merge merge already sorted files; do not sort\n\
375 -o, --output=FILE write result to FILE instead of standard output\n\
376 -s, --stable stabilize sort by disabling last-resort comparison\n\
377 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
380 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
381 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
382 multiple options specify multiple directories\n\
383 -u, --unique with -c, check for strict ordering;\n\
384 without -c, output only the first of an equal run\n\
387 -z, --zero-terminated end lines with 0 byte, not newline\n\
389 fputs (HELP_OPTION_DESCRIPTION, stdout);
390 fputs (VERSION_OPTION_DESCRIPTION, stdout);
393 POS is F[.C][OPTS], where F is the field number and C the character position\n\
394 in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\
395 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
396 one or more single-letter ordering options, which override global ordering\n\
397 options for that key. If no key is given, use the entire line as the key.\n\
399 SIZE may be followed by the following multiplicative suffixes:\n\
402 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
404 With no FILE, or when FILE is -, read standard input.\n\
407 The locale specified by the environment affects sort order.\n\
408 Set LC_ALL=C to get the traditional sort order that uses\n\
409 native byte values.\n\
411 emit_bug_reporting_address ();
417 /* For long options that have no equivalent short option, use a
418 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
421 CHECK_OPTION = CHAR_MAX + 1,
422 COMPRESS_PROGRAM_OPTION,
425 RANDOM_SOURCE_OPTION,
429 static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uVy:z";
431 static struct option const long_options[] =
433 {"ignore-leading-blanks", no_argument, NULL, 'b'},
434 {"check", optional_argument, NULL, CHECK_OPTION},
435 {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
436 {"dictionary-order", no_argument, NULL, 'd'},
437 {"ignore-case", no_argument, NULL, 'f'},
438 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
439 {"general-numeric-sort", no_argument, NULL, 'g'},
440 {"ignore-nonprinting", no_argument, NULL, 'i'},
441 {"key", required_argument, NULL, 'k'},
442 {"merge", no_argument, NULL, 'm'},
443 {"month-sort", no_argument, NULL, 'M'},
444 {"numeric-sort", no_argument, NULL, 'n'},
445 {"version-sort", no_argument, NULL, 'V'},
446 {"random-sort", no_argument, NULL, 'R'},
447 {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
448 {"sort", required_argument, NULL, SORT_OPTION},
449 {"output", required_argument, NULL, 'o'},
450 {"reverse", no_argument, NULL, 'r'},
451 {"stable", no_argument, NULL, 's'},
452 {"batch-size", required_argument, NULL, NMERGE_OPTION},
453 {"buffer-size", required_argument, NULL, 'S'},
454 {"field-separator", required_argument, NULL, 't'},
455 {"temporary-directory", required_argument, NULL, 'T'},
456 {"unique", no_argument, NULL, 'u'},
457 {"zero-terminated", no_argument, NULL, 'z'},
458 {GETOPT_HELP_OPTION_DECL},
459 {GETOPT_VERSION_OPTION_DECL},
463 #define CHECK_TABLE \
465 _ct_("silent", 'C') \
466 _ct_("diagnose-first", 'c')
468 static char const *const check_args[] =
470 #define _ct_(_s, _c) _s,
474 static char const check_types[] =
476 #define _ct_(_s, _c) _c,
482 _st_("general-numeric", 'g') \
484 _st_("numeric", 'n') \
485 _st_("random", 'R') \
488 static char const *const sort_args[] =
490 #define _st_(_s, _c) _s,
494 static char const sort_types[] =
496 #define _st_(_s, _c) _c,
501 /* The set of signals that are caught. */
502 static sigset_t caught_signals;
504 /* Critical section status. */
511 /* Enter a critical section. */
512 static struct cs_status
515 struct cs_status status;
516 status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
520 /* Leave a critical section. */
522 cs_leave (struct cs_status status)
526 /* Ignore failure when restoring the signal mask. */
527 sigprocmask (SIG_SETMASK, &status.sigs, NULL);
531 /* The list of temporary files. */
534 struct tempnode *volatile next;
535 pid_t pid; /* If compressed, the pid of compressor, else zero */
536 char name[1]; /* Actual size is 1 + file name length. */
538 static struct tempnode *volatile temphead;
539 static struct tempnode *volatile *temptail = &temphead;
544 pid_t pid; /* If compressed, the pid of compressor, else zero */
547 /* A table where we store compression process states. We clean up all
548 processes in a timely manner so as not to exhaust system resources,
549 so we store the info on whether the process is still running, or has
551 static Hash_table *proctab;
553 enum { INIT_PROCTAB_SIZE = 47 };
555 enum procstate { ALIVE, ZOMBIE };
557 /* A proctab entry. The COUNT field is there in case we fork a new
558 compression process that has the same PID as an old zombie process
559 that is still in the table (because the process to decompress the
560 temp file it was associated with hasn't started yet). */
564 enum procstate state;
569 proctab_hasher (const void *entry, size_t tabsize)
571 const struct procnode *node = entry;
572 return node->pid % tabsize;
576 proctab_comparator (const void *e1, const void *e2)
578 const struct procnode *n1 = e1, *n2 = e2;
579 return n1->pid == n2->pid;
582 /* The total number of forked processes (compressors and decompressors)
583 that have not been reaped yet. */
584 static size_t nprocs;
586 /* The number of child processes we'll allow before we try to reap some. */
587 enum { MAX_PROCS_BEFORE_REAP = 2 };
589 /* If 0 < PID, wait for the child process with that PID to exit.
590 If PID is -1, clean up a random child process which has finished and
591 return the process ID of that child. If PID is -1 and no processes
592 have quit yet, return 0 without waiting. */
598 pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
601 error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
605 if (! WIFEXITED (status) || WEXITSTATUS (status))
606 error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
614 /* Add the PID of a running compression process to proctab, or update
615 the entry COUNT and STATE fields if it's already there. This also
616 creates the table for us the first time it's called. */
619 register_proc (pid_t pid)
621 struct procnode test, *node;
625 proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
634 node = hash_lookup (proctab, &test);
642 node = xmalloc (sizeof *node);
646 hash_insert (proctab, node);
650 /* This is called when we reap a random process. We don't know
651 whether we have reaped a compression process or a decompression
652 process until we look in the table. If there's an ALIVE entry for
653 it, then we have reaped a compression process, so change the state
654 to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */
657 update_proc (pid_t pid)
659 struct procnode test, *node;
662 node = hash_lookup (proctab, &test);
664 node->state = ZOMBIE;
667 /* This is for when we need to wait for a compression process to exit.
668 If it has a ZOMBIE entry in the table then it's already dead and has
669 been reaped. Note that if there's an ALIVE entry for it, it still may
670 already have died and been reaped if a second process was created with
671 the same PID. This is probably exceedingly rare, but to be on the safe
672 side we will have to wait for any compression process with this PID. */
675 wait_proc (pid_t pid)
677 struct procnode test, *node;
680 node = hash_lookup (proctab, &test);
681 if (node->state == ALIVE)
684 node->state = ZOMBIE;
687 hash_delete (proctab, node);
692 /* Keep reaping finished children as long as there are more to reap.
693 This doesn't block waiting for any of them, it only reaps those
694 that are already dead. */
701 while (0 < nprocs && (pid = reap (-1)))
705 /* Clean up any remaining temporary files. */
710 struct tempnode const *node;
712 for (node = temphead; node; node = node->next)
717 /* Cleanup actions to take when exiting. */
724 /* Clean up any remaining temporary files in a critical section so
725 that a signal handler does not try to clean them too. */
726 struct cs_status cs = cs_enter ();
734 /* Create a new temporary file, returning its newly allocated tempnode.
735 Store into *PFD the file descriptor open for writing. */
737 static struct tempnode *
738 create_temp_file (int *pfd)
740 static char const slashbase[] = "/sortXXXXXX";
741 static size_t temp_dir_index;
744 char const *temp_dir = temp_dirs[temp_dir_index];
745 size_t len = strlen (temp_dir);
746 struct tempnode *node =
747 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
748 char *file = node->name;
751 memcpy (file, temp_dir, len);
752 memcpy (file + len, slashbase, sizeof slashbase);
755 if (++temp_dir_index == temp_dir_count)
758 /* Create the temporary file in a critical section, to avoid races. */
764 temptail = &node->next;
771 error (SORT_FAILURE, errno, _("cannot create temporary file in %s"),
778 /* Return a stream for FILE, opened with mode HOW. A null FILE means
779 standard output; HOW should be "w". When opening for input, "-"
780 means standard input. To avoid confusion, do not return file
781 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
782 opening an ordinary FILE. */
785 xfopen (const char *file, const char *how)
791 else if (STREQ (file, "-") && *how == 'r')
793 have_read_stdin = true;
798 fp = fopen (file, how);
800 die (_("open failed"), file);
806 /* Close FP, whose name is FILE, and report any errors. */
809 xfclose (FILE *fp, char const *file)
814 /* Allow reading stdin from tty more than once. */
820 /* Don't close stdout just yet. close_stdout does that. */
821 if (fflush (fp) != 0)
822 die (_("fflush failed"), file);
826 if (fclose (fp) != 0)
827 die (_("close failed"), file);
833 dup2_or_die (int oldfd, int newfd)
835 if (dup2 (oldfd, newfd) < 0)
836 error (SORT_FAILURE, errno, _("dup2 failed"));
839 /* Fork a child process for piping to and do common cleanup. The
840 TRIES parameter tells us how many times to try to fork before
841 giving up. Return the PID of the child or -1 if fork failed. */
844 pipe_fork (int pipefds[2], size_t tries)
846 #if HAVE_WORKING_FORK
847 struct tempnode *saved_temphead;
849 unsigned int wait_retry = 1;
850 pid_t pid IF_LINT (= -1);
853 if (pipe (pipefds) < 0)
858 /* This is so the child process won't delete our temp files
859 if it receives a signal before exec-ing. */
861 saved_temphead = temphead;
867 temphead = saved_temphead;
872 if (0 <= pid || errno != EAGAIN)
889 close (STDIN_FILENO);
890 close (STDOUT_FILENO);
897 #else /* ! HAVE_WORKING_FORK */
902 /* Create a temporary file and start a compression program to filter output
903 to that file. Set *PFP to the file handle and if *PPID is non-NULL,
904 set it to the PID of the newly-created process. */
907 create_temp (FILE **pfp, pid_t *ppid)
910 struct tempnode *node = create_temp_file (&tempfd);
911 char *name = node->name;
913 if (compress_program)
917 node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
924 register_proc (node->pid);
926 else if (node->pid == 0)
929 dup2_or_die (tempfd, STDOUT_FILENO);
931 dup2_or_die (pipefds[0], STDIN_FILENO);
934 if (execlp (compress_program, compress_program, (char *) NULL) < 0)
935 error (SORT_FAILURE, errno, _("couldn't execute %s"),
942 *pfp = fdopen (tempfd, "w");
944 die (_("couldn't create temporary file"), name);
952 /* Open a compressed temp file and start a decompression process through
953 which to filter the input. PID must be the valid processes ID of the
954 process used to compress the file. */
957 open_temp (const char *name, pid_t pid)
959 int tempfd, pipefds[2];
965 tempfd = open (name, O_RDONLY);
967 die (_("couldn't open temporary file"), name);
969 child_pid = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
975 else if (child_pid == 0)
978 dup2_or_die (tempfd, STDIN_FILENO);
980 dup2_or_die (pipefds[1], STDOUT_FILENO);
983 if (execlp (compress_program, compress_program, "-d", (char *) NULL) < 0)
984 error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
988 error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
991 fp = fdopen (pipefds[0], "r");
993 die (_("couldn't create temporary file"), name);
999 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
1001 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
1002 die (_("write failed"), output_file);
1005 /* Append DIR to the array of temporary directory names. */
1007 add_temp_dir (char const *dir)
1009 if (temp_dir_count == temp_dir_alloc)
1010 temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
1012 temp_dirs[temp_dir_count++] = dir;
1015 /* Remove NAME from the list of temporary files. */
1018 zaptemp (const char *name)
1020 struct tempnode *volatile *pnode;
1021 struct tempnode *node;
1022 struct tempnode *next;
1024 int unlink_errno = 0;
1025 struct cs_status cs;
1027 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
1030 /* Unlink the temporary file in a critical section to avoid races. */
1033 unlink_status = unlink (name);
1034 unlink_errno = errno;
1038 if (unlink_status != 0)
1039 error (0, unlink_errno, _("warning: cannot remove: %s"), name);
1045 #if HAVE_NL_LANGINFO
1048 struct_month_cmp (const void *m1, const void *m2)
1050 struct month const *month1 = m1;
1051 struct month const *month2 = m2;
1052 return strcmp (month1->name, month2->name);
1057 /* Initialize the character class tables. */
1064 for (i = 0; i < UCHAR_LIM; ++i)
1066 blanks[i] = !! isblank (i);
1067 nonprinting[i] = ! isprint (i);
1068 nondictionary[i] = ! isalnum (i) && ! isblank (i);
1069 fold_toupper[i] = toupper (i);
1072 #if HAVE_NL_LANGINFO
1073 /* If we're not in the "C" locale, read different names for months. */
1076 for (i = 0; i < MONTHS_PER_YEAR; i++)
1083 s = (char *) nl_langinfo (ABMON_1 + i);
1085 monthtab[i].name = name = xmalloc (s_len + 1);
1086 monthtab[i].val = i + 1;
1088 for (j = 0; j < s_len; j++)
1089 name[j] = fold_toupper[to_uchar (s[j])];
1092 qsort ((void *) monthtab, MONTHS_PER_YEAR,
1093 sizeof *monthtab, struct_month_cmp);
1098 /* Specify how many inputs may be merged at once.
1099 This may be set on the command-line with the
1100 --batch-size option. */
1102 specify_nmerge (int oi, char c, char const *s)
1105 struct rlimit rlimit;
1106 enum strtol_error e = xstrtoumax (s, NULL, 10, &n, NULL);
1108 /* Try to find out how many file descriptors we'll be able
1109 to open. We need at least nmerge + 3 (STDIN_FILENO,
1110 STDOUT_FILENO and STDERR_FILENO). */
1111 unsigned int max_nmerge = ((getrlimit (RLIMIT_NOFILE, &rlimit) == 0
1116 if (e == LONGINT_OK)
1120 e = LONGINT_OVERFLOW;
1125 error (0, 0, _("invalid --%s argument %s"),
1126 long_options[oi].name, quote(s));
1127 error (SORT_FAILURE, 0,
1128 _("minimum --%s argument is %s"),
1129 long_options[oi].name, quote("2"));
1131 else if (max_nmerge < nmerge)
1133 e = LONGINT_OVERFLOW;
1140 if (e == LONGINT_OVERFLOW)
1142 char max_nmerge_buf[INT_BUFSIZE_BOUND (unsigned int)];
1143 error (0, 0, _("--%s argument %s too large"),
1144 long_options[oi].name, quote(s));
1145 error (SORT_FAILURE, 0,
1146 _("maximum --%s argument with current rlimit is %s"),
1147 long_options[oi].name,
1148 uinttostr (max_nmerge, max_nmerge_buf));
1151 xstrtol_fatal (e, oi, c, long_options, s);
1154 /* Specify the amount of main memory to use when sorting. */
1156 specify_sort_size (int oi, char c, char const *s)
1160 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1162 /* The default unit is KiB. */
1163 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1165 if (n <= UINTMAX_MAX / 1024)
1168 e = LONGINT_OVERFLOW;
1171 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
1172 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1181 double mem = physmem_total () * n / 100;
1183 /* Use "<", not "<=", to avoid problems with rounding. */
1184 if (mem < UINTMAX_MAX)
1190 e = LONGINT_OVERFLOW;
1195 if (e == LONGINT_OK)
1197 /* If multiple sort sizes are specified, take the maximum, so
1198 that option order does not matter. */
1205 sort_size = MAX (sort_size, MIN_SORT_SIZE);
1209 e = LONGINT_OVERFLOW;
1212 xstrtol_fatal (e, oi, c, long_options, s);
1215 /* Return the default sort size. */
1217 default_sort_size (void)
1219 /* Let MEM be available memory or 1/8 of total memory, whichever
1221 double avail = physmem_available ();
1222 double total = physmem_total ();
1223 double mem = MAX (avail, total / 8);
1224 struct rlimit rlimit;
1226 /* Let SIZE be MEM, but no more than the maximum object size or
1227 system resource limits. Avoid the MIN macro here, as it is not
1228 quite right when only one argument is floating point. Don't
1229 bother to check for values like RLIM_INFINITY since in practice
1230 they are not much less than SIZE_MAX. */
1231 size_t size = SIZE_MAX;
1234 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1235 size = rlimit.rlim_cur;
1237 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1238 size = rlimit.rlim_cur;
1241 /* Leave a large safety margin for the above limits, as failure can
1242 occur when they are exceeded. */
1246 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1247 Exceeding RSS is not fatal, but can be quite slow. */
1248 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1249 size = rlimit.rlim_cur / 16 * 15;
1252 /* Use no less than the minimum. */
1253 return MAX (size, MIN_SORT_SIZE);
1256 /* Return the sort buffer size to use with the input files identified
1257 by FPS and FILES, which are alternate names of the same files.
1258 NFILES gives the number of input files; NFPS may be less. Assume
1259 that each input line requires LINE_BYTES extra bytes' worth of line
1260 information. Do not exceed the size bound specified by the user
1261 (or a default size bound, if the user does not specify one). */
1264 sort_buffer_size (FILE *const *fps, size_t nfps,
1265 char *const *files, size_t nfiles,
1268 /* A bound on the input size. If zero, the bound hasn't been
1270 static size_t size_bound;
1272 /* In the worst case, each input byte is a newline. */
1273 size_t worst_case_per_input_byte = line_bytes + 1;
1275 /* Keep enough room for one extra input line and an extra byte.
1276 This extra room might be needed when preparing to read EOF. */
1277 size_t size = worst_case_per_input_byte + 1;
1281 for (i = 0; i < nfiles; i++)
1287 if ((i < nfps ? fstat (fileno (fps[i]), &st)
1288 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1289 : stat (files[i], &st))
1291 die (_("stat failed"), files[i]);
1293 if (S_ISREG (st.st_mode))
1294 file_size = st.st_size;
1297 /* The file has unknown size. If the user specified a sort
1298 buffer size, use that; otherwise, guess the size. */
1301 file_size = INPUT_FILE_SIZE_GUESS;
1306 size_bound = sort_size;
1308 size_bound = default_sort_size ();
1311 /* Add the amount of memory needed to represent the worst case
1312 where the input consists entirely of newlines followed by a
1313 single non-newline. Check for overflow. */
1314 worst_case = file_size * worst_case_per_input_byte + 1;
1315 if (file_size != worst_case / worst_case_per_input_byte
1316 || size_bound - size <= worst_case)
1324 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
1325 must be at least sizeof (struct line). Allocate ALLOC bytes
1329 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1331 /* Ensure that the line array is properly aligned. If the desired
1332 size cannot be allocated, repeatedly halve it until allocation
1333 succeeds. The smaller allocation may hurt overall performance,
1334 but that's better than failing. */
1337 alloc += sizeof (struct line) - alloc % sizeof (struct line);
1338 buf->buf = malloc (alloc);
1342 if (alloc <= line_bytes + 1)
1346 buf->line_bytes = line_bytes;
1348 buf->used = buf->left = buf->nlines = 0;
1352 /* Return one past the limit of the line array. */
1354 static inline struct line *
1355 buffer_linelim (struct buffer const *buf)
1357 return (struct line *) (buf->buf + buf->alloc);
1360 /* Return a pointer to the first character of the field specified
1364 begfield (const struct line *line, const struct keyfield *key)
1366 char *ptr = line->text, *lim = ptr + line->length - 1;
1367 size_t sword = key->sword;
1368 size_t schar = key->schar;
1369 size_t remaining_bytes;
1371 /* The leading field separator itself is included in a field when -t
1374 if (tab != TAB_DEFAULT)
1375 while (ptr < lim && sword--)
1377 while (ptr < lim && *ptr != tab)
1383 while (ptr < lim && sword--)
1385 while (ptr < lim && blanks[to_uchar (*ptr)])
1387 while (ptr < lim && !blanks[to_uchar (*ptr)])
1391 if (key->skipsblanks)
1392 while (ptr < lim && blanks[to_uchar (*ptr)])
1395 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
1396 remaining_bytes = lim - ptr;
1397 if (schar < remaining_bytes)
1405 /* Return the limit of (a pointer to the first character after) the field
1406 in LINE specified by KEY. */
1409 limfield (const struct line *line, const struct keyfield *key)
1411 char *ptr = line->text, *lim = ptr + line->length - 1;
1412 size_t eword = key->eword, echar = key->echar;
1413 size_t remaining_bytes;
1415 /* Move PTR past EWORD fields or to one past the last byte on LINE,
1416 whichever comes first. If there are more than EWORD fields, leave
1417 PTR pointing at the beginning of the field having zero-based index,
1418 EWORD. If a delimiter character was specified (via -t), then that
1419 `beginning' is the first character following the delimiting TAB.
1420 Otherwise, leave PTR pointing at the first `blank' character after
1421 the preceding field. */
1422 if (tab != TAB_DEFAULT)
1423 while (ptr < lim && eword--)
1425 while (ptr < lim && *ptr != tab)
1427 if (ptr < lim && (eword | echar))
1431 while (ptr < lim && eword--)
1433 while (ptr < lim && blanks[to_uchar (*ptr)])
1435 while (ptr < lim && !blanks[to_uchar (*ptr)])
1439 #ifdef POSIX_UNSPECIFIED
1440 /* The following block of code makes GNU sort incompatible with
1441 standard Unix sort, so it's ifdef'd out for now.
1442 The POSIX spec isn't clear on how to interpret this.
1443 FIXME: request clarification.
1445 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1446 Date: Thu, 30 May 96 12:20:41 -0400
1447 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1449 [...]I believe I've found another bug in `sort'.
1454 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1457 $ /bin/sort -k1.7,1.7 </tmp/sort.in
1461 Unix sort produced the answer I expected: sort on the single character
1462 in column 7. GNU sort produced different results, because it disagrees
1463 on the interpretation of the key-end spec "M.N". Unix sort reads this
1464 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1465 "skip M-1 fields, then either N-1 characters or the rest of the current
1466 field, whichever comes first". This extra clause applies only to
1467 key-ends, not key-starts.
1470 /* Make LIM point to the end of (one byte past) the current field. */
1471 if (tab != TAB_DEFAULT)
1474 newlim = memchr (ptr, tab, lim - ptr);
1482 while (newlim < lim && blanks[to_uchar (*newlim)])
1484 while (newlim < lim && !blanks[to_uchar (*newlim)])
1490 /* If we're ignoring leading blanks when computing the End
1491 of the field, don't start counting bytes until after skipping
1492 past any leading blanks. */
1493 if (key->skipeblanks)
1494 while (ptr < lim && blanks[to_uchar (*ptr)])
1497 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1498 remaining_bytes = lim - ptr;
1499 if (echar < remaining_bytes)
1507 /* Fill BUF reading from FP, moving buf->left bytes from the end
1508 of buf->buf to the beginning first. If EOF is reached and the
1509 file wasn't terminated by a newline, supply one. Set up BUF's line
1510 table too. FILE is the name of the file corresponding to FP.
1511 Return true if some input was read. */
1514 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1516 struct keyfield const *key = keylist;
1518 size_t line_bytes = buf->line_bytes;
1519 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1524 if (buf->used != buf->left)
1526 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1527 buf->used = buf->left;
1533 char *ptr = buf->buf + buf->used;
1534 struct line *linelim = buffer_linelim (buf);
1535 struct line *line = linelim - buf->nlines;
1536 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1537 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1539 while (line_bytes + 1 < avail)
1541 /* Read as many bytes as possible, but do not read so many
1542 bytes that there might not be enough room for the
1543 corresponding line array. The worst case is when the
1544 rest of the input file consists entirely of newlines,
1545 except that the last byte is not a newline. */
1546 size_t readsize = (avail - 1) / (line_bytes + 1);
1547 size_t bytes_read = fread (ptr, 1, readsize, fp);
1548 char *ptrlim = ptr + bytes_read;
1550 avail -= bytes_read;
1552 if (bytes_read != readsize)
1555 die (_("read failed"), file);
1559 if (buf->buf == ptrlim)
1561 if (ptrlim[-1] != eol)
1566 /* Find and record each line in the just-read input. */
1567 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1571 line->text = line_start;
1572 line->length = ptr - line_start;
1573 mergesize = MAX (mergesize, line->length);
1574 avail -= line_bytes;
1578 /* Precompute the position of the first key for
1580 line->keylim = (key->eword == SIZE_MAX
1582 : limfield (line, key));
1584 if (key->sword != SIZE_MAX)
1585 line->keybeg = begfield (line, key);
1588 if (key->skipsblanks)
1589 while (blanks[to_uchar (*line_start)])
1591 line->keybeg = line_start;
1603 buf->used = ptr - buf->buf;
1604 buf->nlines = buffer_linelim (buf) - line;
1605 if (buf->nlines != 0)
1607 buf->left = ptr - line_start;
1608 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1613 /* The current input line is too long to fit in the buffer.
1614 Double the buffer size and try again, keeping it properly
1616 size_t line_alloc = buf->alloc / sizeof (struct line);
1617 buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
1618 buf->alloc = line_alloc * sizeof (struct line);
1623 /* Compare strings A and B as numbers without explicitly converting them to
1624 machine numbers. Comparatively slow for short strings, but asymptotically
1628 numcompare (const char *a, const char *b)
1630 while (blanks[to_uchar (*a)])
1632 while (blanks[to_uchar (*b)])
1635 return strnumcmp (a, b, decimal_point, thousands_sep);
1639 general_numcompare (const char *sa, const char *sb)
1641 /* FIXME: add option to warn about failed conversions. */
1642 /* FIXME: maybe add option to try expensive FP conversion
1643 only if A and B can't be compared more cheaply/accurately. */
1647 double a = strtod (sa, &ea);
1648 double b = strtod (sb, &eb);
1650 /* Put conversion errors at the start of the collating sequence. */
1652 return sb == eb ? 0 : -1;
1656 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1657 conversion errors but before numbers; sort them by internal
1658 bit-pattern, for lack of a more portable alternative. */
1664 : memcmp ((char *) &a, (char *) &b, sizeof a));
1667 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1668 Return 0 if the name in S is not recognized. */
1671 getmonth (char const *month, size_t len)
1674 size_t hi = MONTHS_PER_YEAR;
1675 char const *monthlim = month + len;
1679 if (month == monthlim)
1681 if (!blanks[to_uchar (*month)])
1688 size_t ix = (lo + hi) / 2;
1689 char const *m = month;
1690 char const *n = monthtab[ix].name;
1695 return monthtab[ix].val;
1696 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1701 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1713 /* A source of random data. */
1714 static struct randread_source *randread_source;
1716 /* Return the Ith randomly-generated state. The caller must invoke
1717 random_state (H) for all H less than I before invoking random_state
1720 static struct md5_ctx
1721 random_state (size_t i)
1723 /* An array of states resulting from the random data, and counts of
1724 its used and allocated members. */
1725 static struct md5_ctx *state;
1727 static size_t allocated;
1729 struct md5_ctx *s = &state[i];
1733 unsigned char buf[MD5_DIGEST_SIZE];
1739 state = X2NREALLOC (state, &allocated);
1743 randread (randread_source, buf, sizeof buf);
1745 md5_process_bytes (buf, sizeof buf, s);
1751 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1752 with length LENGTHB. Return negative if less, zero if equal,
1753 positive if greater. */
1756 cmp_hashes (char const *texta, size_t lena,
1757 char const *textb, size_t lenb)
1759 /* Try random hashes until a pair of hashes disagree. But if the
1760 first pair of random hashes agree, check whether the keys are
1761 identical and if so report no difference. */
1766 uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1767 struct md5_ctx s[2];
1768 s[0] = s[1] = random_state (i);
1769 md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
1770 md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
1771 diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1774 if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1781 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1782 using one or more random hash functions. */
1785 compare_random (char *restrict texta, size_t lena,
1786 char *restrict textb, size_t lenb)
1790 if (! hard_LC_COLLATE)
1791 diff = cmp_hashes (texta, lena, textb, lenb);
1794 /* Transform the text into the basis of comparison, so that byte
1795 strings that would otherwise considered to be equal are
1796 considered equal here even if their bytes differ. */
1799 char stackbuf[4000];
1800 size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1801 bool a_fits = tlena <= sizeof stackbuf;
1802 size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1803 (a_fits ? sizeof stackbuf - tlena : 0),
1806 if (a_fits && tlena + tlenb <= sizeof stackbuf)
1810 /* Adding 1 to the buffer size lets xmemxfrm run a bit
1811 faster by avoiding the need for an extra buffer copy. */
1812 buf = xmalloc (tlena + tlenb + 1);
1813 xmemxfrm (buf, tlena + 1, texta, lena);
1814 xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1817 diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1819 if (buf != stackbuf)
1826 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1827 using filevercmp. See lib/filevercmp.h for function description. */
1830 compare_version (char *restrict texta, size_t lena,
1831 char *restrict textb, size_t lenb)
1835 /* It is necessary to save the character after the end of the field.
1836 "filevercmp" works with NUL terminated strings. Our blocks of
1837 text are not necessarily terminated with a NUL byte. */
1838 char sv_a = texta[lena];
1839 char sv_b = textb[lenb];
1844 diff = filevercmp (texta, textb);
1852 /* Compare two lines A and B trying every key in sequence until there
1853 are no more keys or a difference is found. */
1856 keycompare (const struct line *a, const struct line *b)
1858 struct keyfield const *key = keylist;
1860 /* For the first iteration only, the key positions have been
1861 precomputed for us. */
1862 char *texta = a->keybeg;
1863 char *textb = b->keybeg;
1864 char *lima = a->keylim;
1865 char *limb = b->keylim;
1871 char const *translate = key->translate;
1872 bool const *ignore = key->ignore;
1874 /* Find the lengths. */
1875 size_t lena = lima <= texta ? 0 : lima - texta;
1876 size_t lenb = limb <= textb ? 0 : limb - textb;
1878 /* Actually compare the fields. */
1881 diff = compare_random (texta, lena, textb, lenb);
1882 else if (key->numeric | key->general_numeric)
1884 char savea = *lima, saveb = *limb;
1886 *lima = *limb = '\0';
1887 diff = ((key->numeric ? numcompare : general_numcompare)
1889 *lima = savea, *limb = saveb;
1891 else if (key->version)
1892 diff = compare_version (texta, lena, textb, lenb);
1893 else if (key->month)
1894 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1895 /* Sorting like this may become slow, so in a simple locale the user
1896 can select a faster sort that is similar to ascii sort. */
1897 else if (hard_LC_COLLATE)
1899 if (ignore || translate)
1902 size_t size = lena + 1 + lenb + 1;
1903 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1904 char *copy_b = copy_a + lena + 1;
1905 size_t new_len_a, new_len_b, i;
1907 /* Ignore and/or translate chars before comparing. */
1908 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1912 copy_a[new_len_a] = (translate
1913 ? translate[to_uchar (texta[i])]
1915 if (!ignore || !ignore[to_uchar (texta[i])])
1920 copy_b[new_len_b] = (translate
1921 ? translate[to_uchar (textb[i])]
1923 if (!ignore || !ignore[to_uchar (textb[i])])
1928 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1930 if (sizeof buf < size)
1934 diff = - NONZERO (lenb);
1938 diff = xmemcoll (texta, lena, textb, lenb);
1942 #define CMP_WITH_IGNORE(A, B) \
1947 while (texta < lima && ignore[to_uchar (*texta)]) \
1949 while (textb < limb && ignore[to_uchar (*textb)]) \
1951 if (! (texta < lima && textb < limb)) \
1953 diff = to_uchar (A) - to_uchar (B); \
1960 diff = (texta < lima) - (textb < limb); \
1965 CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1966 translate[to_uchar (*textb)]);
1968 CMP_WITH_IGNORE (*texta, *textb);
1971 diff = - NONZERO (lenb);
1978 while (texta < lima && textb < limb)
1980 diff = (to_uchar (translate[to_uchar (*texta++)])
1981 - to_uchar (translate[to_uchar (*textb++)]));
1988 diff = memcmp (texta, textb, MIN (lena, lenb));
1992 diff = lena < lenb ? -1 : lena != lenb;
2002 /* Find the beginning and limit of the next field. */
2003 if (key->eword != SIZE_MAX)
2004 lima = limfield (a, key), limb = limfield (b, key);
2006 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
2008 if (key->sword != SIZE_MAX)
2009 texta = begfield (a, key), textb = begfield (b, key);
2012 texta = a->text, textb = b->text;
2013 if (key->skipsblanks)
2015 while (texta < lima && blanks[to_uchar (*texta)])
2017 while (textb < limb && blanks[to_uchar (*textb)])
2028 return key->reverse ? -diff : diff;
2031 /* Compare two lines A and B, returning negative, zero, or positive
2032 depending on whether A compares less than, equal to, or greater than B. */
2035 compare (const struct line *a, const struct line *b)
2040 /* First try to compare on the specified keys (if any).
2041 The only two cases with no key at all are unadorned sort,
2042 and unadorned sort -r. */
2045 diff = keycompare (a, b);
2046 if (diff | unique | stable)
2050 /* If the keys all compare equal (or no keys were specified)
2051 fall through to the default comparison. */
2052 alen = a->length - 1, blen = b->length - 1;
2055 diff = - NONZERO (blen);
2058 else if (hard_LC_COLLATE)
2059 diff = xmemcoll (a->text, alen, b->text, blen);
2060 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
2061 diff = alen < blen ? -1 : alen != blen;
2063 return reverse ? -diff : diff;
2066 /* Check that the lines read from FILE_NAME come in order. Return
2067 true if they are in order. If CHECKONLY == 'c', also print a
2068 diagnostic (FILE_NAME, line number, contents of line) to stderr if
2069 they are not in order. */
2072 check (char const *file_name, char checkonly)
2074 FILE *fp = xfopen (file_name, "r");
2075 struct buffer buf; /* Input buffer. */
2076 struct line temp; /* Copy of previous line. */
2078 uintmax_t line_number = 0;
2079 struct keyfield const *key = keylist;
2080 bool nonunique = ! unique;
2081 bool ordered = true;
2083 initbuf (&buf, sizeof (struct line),
2084 MAX (merge_buffer_size, sort_size));
2087 while (fillbuf (&buf, fp, file_name))
2089 struct line const *line = buffer_linelim (&buf);
2090 struct line const *linebase = line - buf.nlines;
2092 /* Make sure the line saved from the old buffer contents is
2093 less than or equal to the first line of the new buffer. */
2094 if (alloc && nonunique <= compare (&temp, line - 1))
2098 if (checkonly == 'c')
2100 struct line const *disorder_line = line - 1;
2101 uintmax_t disorder_line_number =
2102 buffer_linelim (&buf) - disorder_line + line_number;
2103 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
2104 fprintf (stderr, _("%s: %s:%s: disorder: "),
2105 program_name, file_name,
2106 umaxtostr (disorder_line_number, hr_buf));
2107 write_bytes (disorder_line->text, disorder_line->length,
2108 stderr, _("standard error"));
2116 /* Compare each line in the buffer with its successor. */
2117 while (linebase < --line)
2118 if (nonunique <= compare (line, line - 1))
2119 goto found_disorder;
2121 line_number += buf.nlines;
2123 /* Save the last line of the buffer. */
2124 if (alloc < line->length)
2131 alloc = line->length;
2135 while (alloc < line->length);
2137 temp.text = xrealloc (temp.text, alloc);
2139 memcpy (temp.text, line->text, line->length);
2140 temp.length = line->length;
2143 temp.keybeg = temp.text + (line->keybeg - line->text);
2144 temp.keylim = temp.text + (line->keylim - line->text);
2148 xfclose (fp, file_name);
2154 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2155 files (all of which are at the start of the FILES array), and
2156 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2157 Close input and output files before returning.
2158 OUTPUT_FILE gives the name of the output file. If it is NULL,
2159 the output file is standard output. If OFP is NULL, the output
2160 file has not been opened yet (or written to, if standard output). */
2163 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2164 FILE *ofp, char const *output_file)
2166 FILE **fps = xnmalloc (nmerge, sizeof *fps);
2167 /* Input streams for each file. */
2168 struct buffer *buffer = xnmalloc (nmerge, sizeof *buffer);
2169 /* Input buffers for each file. */
2170 struct line saved; /* Saved line storage for unique check. */
2171 struct line const *savedline = NULL;
2172 /* &saved if there is a saved line. */
2173 size_t savealloc = 0; /* Size allocated for the saved line. */
2174 struct line const **cur = xnmalloc (nmerge, sizeof *cur);
2175 /* Current line in each line table. */
2176 struct line const **base = xnmalloc (nmerge, sizeof *base);
2177 /* Base of each line table. */
2178 size_t *ord = xnmalloc (nmerge, sizeof *ord);
2179 /* Table representing a permutation of fps,
2180 such that cur[ord[0]] is the smallest line
2181 and will be next output. */
2185 struct keyfield const *key = keylist;
2188 /* Read initial lines from each input file. */
2189 for (i = 0; i < nfiles; )
2191 fps[i] = (files[i].pid
2192 ? open_temp (files[i].name, files[i].pid)
2193 : xfopen (files[i].name, "r"));
2194 initbuf (&buffer[i], sizeof (struct line),
2195 MAX (merge_buffer_size, sort_size / nfiles));
2196 if (fillbuf (&buffer[i], fps[i], files[i].name))
2198 struct line const *linelim = buffer_linelim (&buffer[i]);
2199 cur[i] = linelim - 1;
2200 base[i] = linelim - buffer[i].nlines;
2205 /* fps[i] is empty; eliminate it from future consideration. */
2206 xfclose (fps[i], files[i].name);
2210 zaptemp (files[i].name);
2212 free (buffer[i].buf);
2214 for (j = i; j < nfiles; ++j)
2215 files[j] = files[j + 1];
2220 ofp = xfopen (output_file, "w");
2222 /* Set up the ord table according to comparisons among input lines.
2223 Since this only reorders two items if one is strictly greater than
2224 the other, it is stable. */
2225 for (i = 0; i < nfiles; ++i)
2227 for (i = 1; i < nfiles; ++i)
2228 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2229 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2231 /* Repeatedly output the smallest line until no input remains. */
2234 struct line const *smallest = cur[ord[0]];
2236 /* If uniquified output is turned on, output only the first of
2237 an identical series of lines. */
2240 if (savedline && compare (savedline, smallest))
2243 write_bytes (saved.text, saved.length, ofp, output_file);
2248 if (savealloc < smallest->length)
2253 savealloc = smallest->length;
2256 while ((savealloc *= 2) < smallest->length);
2258 saved.text = xrealloc (saved.text, savealloc);
2260 saved.length = smallest->length;
2261 memcpy (saved.text, smallest->text, saved.length);
2265 saved.text + (smallest->keybeg - smallest->text);
2267 saved.text + (smallest->keylim - smallest->text);
2272 write_bytes (smallest->text, smallest->length, ofp, output_file);
2274 /* Check if we need to read more lines into core. */
2275 if (base[ord[0]] < smallest)
2276 cur[ord[0]] = smallest - 1;
2279 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2281 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2282 cur[ord[0]] = linelim - 1;
2283 base[ord[0]] = linelim - buffer[ord[0]].nlines;
2287 /* We reached EOF on fps[ord[0]]. */
2288 for (i = 1; i < nfiles; ++i)
2289 if (ord[i] > ord[0])
2292 xfclose (fps[ord[0]], files[ord[0]].name);
2293 if (ord[0] < ntemps)
2296 zaptemp (files[ord[0]].name);
2298 free (buffer[ord[0]].buf);
2299 for (i = ord[0]; i < nfiles; ++i)
2301 fps[i] = fps[i + 1];
2302 files[i] = files[i + 1];
2303 buffer[i] = buffer[i + 1];
2304 cur[i] = cur[i + 1];
2305 base[i] = base[i + 1];
2307 for (i = 0; i < nfiles; ++i)
2308 ord[i] = ord[i + 1];
2313 /* The new line just read in may be larger than other lines
2314 already in main memory; push it back in the queue until we
2315 encounter a line larger than it. Optimize for the common
2316 case where the new line is smallest. */
2321 size_t ord0 = ord[0];
2322 size_t count_of_smaller_lines;
2326 int cmp = compare (cur[ord0], cur[ord[probe]]);
2327 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2331 probe = (lo + hi) / 2;
2334 count_of_smaller_lines = lo - 1;
2335 for (j = 0; j < count_of_smaller_lines; j++)
2336 ord[j] = ord[j + 1];
2337 ord[count_of_smaller_lines] = ord0;
2340 /* Free up some resources every once in a while. */
2341 if (MAX_PROCS_BEFORE_REAP < nprocs)
2345 if (unique && savedline)
2347 write_bytes (saved.text, saved.length, ofp, output_file);
2351 xfclose (ofp, output_file);
2359 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2360 and HI (with NHI members). T, LO, and HI point just past their
2361 respective arrays, and the arrays are in reverse order. NLO and
2362 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
2365 mergelines (struct line *t,
2366 struct line const *lo, size_t nlo,
2367 struct line const *hi, size_t nhi)
2370 if (compare (lo - 1, hi - 1) <= 0)
2375 /* HI - NHI equalled T - (NLO + NHI) when this function
2376 began. Therefore HI must equal T now, and there is no
2377 need to copy from HI to T. */
2395 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2396 NLINES must be at least 2.
2397 The input and output arrays are in reverse order, and LINES and
2398 TEMP point just past the end of their respective arrays.
2400 Use a recursive divide-and-conquer algorithm, in the style
2401 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
2402 the optimization suggested by exercise 5.2.4-10; this requires room
2403 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
2404 writes that this memory optimization was originally published by
2405 D. A. Bell, Comp J. 1 (1958), 75. */
2408 sortlines (struct line *lines, size_t nlines, struct line *temp)
2412 if (0 < compare (&lines[-1], &lines[-2]))
2414 struct line tmp = lines[-1];
2415 lines[-1] = lines[-2];
2421 size_t nlo = nlines / 2;
2422 size_t nhi = nlines - nlo;
2423 struct line *lo = lines;
2424 struct line *hi = lines - nlo;
2425 struct line *sorted_lo = temp;
2427 sortlines (hi, nhi, temp);
2429 sortlines_temp (lo, nlo, sorted_lo);
2431 sorted_lo[-1] = lo[-1];
2433 mergelines (lines, sorted_lo, nlo, hi, nhi);
2437 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2438 rather than sorting in place. */
2441 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2445 /* Declare `swap' as int, not bool, to work around a bug
2446 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2447 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
2448 int swap = (0 < compare (&lines[-1], &lines[-2]));
2449 temp[-1] = lines[-1 - swap];
2450 temp[-2] = lines[-2 + swap];
2454 size_t nlo = nlines / 2;
2455 size_t nhi = nlines - nlo;
2456 struct line *lo = lines;
2457 struct line *hi = lines - nlo;
2458 struct line *sorted_hi = temp - nlo;
2460 sortlines_temp (hi, nhi, sorted_hi);
2462 sortlines (lo, nlo, temp);
2464 mergelines (temp, lo, nlo, sorted_hi, nhi);
2468 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2469 the same as OUTFILE. If found, merge the found instances (and perhaps
2470 some other files) into a temporary file so that it can in turn be
2471 merged into OUTFILE without destroying OUTFILE before it is completely
2472 read. Return the new value of NFILES, which differs from the old if
2473 some merging occurred.
2475 This test ensures that an otherwise-erroneous use like
2476 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2477 It's not clear that POSIX requires this nicety.
2478 Detect common error cases, but don't try to catch obscure cases like
2479 "cat ... FILE ... | sort -m -o FILE"
2480 where traditional "sort" doesn't copy the input and where
2481 people should know that they're getting into trouble anyway.
2482 Catching these obscure cases would slow down performance in
2486 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2487 size_t nfiles, char const *outfile)
2490 bool got_outstat = false;
2491 struct stat outstat;
2493 for (i = ntemps; i < nfiles; i++)
2495 bool is_stdin = STREQ (files[i].name, "-");
2499 if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2506 ? stat (outfile, &outstat)
2507 : fstat (STDOUT_FILENO, &outstat))
2514 ? fstat (STDIN_FILENO, &instat)
2515 : stat (files[i].name, &instat))
2517 && SAME_INODE (instat, outstat));
2524 char *temp = create_temp (&tftp, &pid);
2525 mergefps (&files[i],0, nfiles - i, tftp, temp);
2526 files[i].name = temp;
2535 /* Merge the input FILES. NTEMPS is the number of files at the
2536 start of FILES that are temporary; it is zero at the top level.
2537 NFILES is the total number of files. Put the output in
2538 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
2541 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2542 char const *output_file)
2544 while (nmerge < nfiles)
2546 /* Number of input files processed so far. */
2549 /* Number of output files generated so far. */
2552 /* nfiles % NMERGE; this counts input files that are left over
2553 after all full-sized merges have been done. */
2556 /* Number of easily-available slots at the next loop iteration. */
2559 /* Do as many NMERGE-size merges as possible. */
2560 for (out = in = 0; out < nfiles / nmerge; out++, in += nmerge)
2564 char *temp = create_temp (&tfp, &pid);
2565 size_t nt = MIN (ntemps, nmerge);
2567 mergefps (&files[in], nt, nmerge, tfp, temp);
2568 files[out].name = temp;
2569 files[out].pid = pid;
2572 remainder = nfiles - in;
2573 cheap_slots = nmerge - out % nmerge;
2575 if (cheap_slots < remainder)
2577 /* So many files remain that they can't all be put into the last
2578 NMERGE-sized output window. Do one more merge. Merge as few
2579 files as possible, to avoid needless I/O. */
2580 size_t nshortmerge = remainder - cheap_slots + 1;
2583 char *temp = create_temp (&tfp, &pid);
2584 size_t nt = MIN (ntemps, nshortmerge);
2586 mergefps (&files[in], nt, nshortmerge, tfp, temp);
2587 files[out].name = temp;
2588 files[out++].pid = pid;
2592 /* Put the remaining input files into the last NMERGE-sized output
2593 window, so they will be merged in the next pass. */
2594 memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2599 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2600 mergefps (files, ntemps, nfiles, NULL, output_file);
2603 /* Sort NFILES FILES onto OUTPUT_FILE. */
2606 sort (char * const *files, size_t nfiles, char const *output_file)
2610 bool output_file_created = false;
2616 char const *temp_output;
2617 char const *file = *files;
2618 FILE *fp = xfopen (file, "r");
2620 size_t bytes_per_line = (2 * sizeof (struct line)
2621 - sizeof (struct line) / 2);
2624 initbuf (&buf, bytes_per_line,
2625 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2630 while (fillbuf (&buf, fp, file))
2633 struct line *linebase;
2635 if (buf.eof && nfiles
2636 && (bytes_per_line + 1
2637 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2639 /* End of file, but there is more input and buffer room.
2640 Concatenate the next input file; this is faster in
2642 buf.left = buf.used;
2646 line = buffer_linelim (&buf);
2647 linebase = line - buf.nlines;
2649 sortlines (line, buf.nlines, linebase);
2650 if (buf.eof && !nfiles && !ntemps && !buf.left)
2653 tfp = xfopen (output_file, "w");
2654 temp_output = output_file;
2655 output_file_created = true;
2660 temp_output = create_temp (&tfp, NULL);
2666 write_bytes (line->text, line->length, tfp, temp_output);
2668 while (linebase < line && compare (line, line - 1) == 0)
2671 while (linebase < line);
2673 xfclose (tfp, temp_output);
2675 /* Free up some resources every once in a while. */
2676 if (MAX_PROCS_BEFORE_REAP < nprocs)
2679 if (output_file_created)
2688 if (! output_file_created)
2691 struct tempnode *node = temphead;
2692 struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2693 for (i = 0; node; i++)
2695 tempfiles[i].name = node->name;
2696 tempfiles[i].pid = node->pid;
2699 merge (tempfiles, ntemps, ntemps, output_file);
2704 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
2707 insertkey (struct keyfield *key_arg)
2709 struct keyfield **p;
2710 struct keyfield *key = xmemdup (key_arg, sizeof *key);
2712 for (p = &keylist; *p; p = &(*p)->next)
2718 /* Report a bad field specification SPEC, with extra info MSGID. */
2720 static void badfieldspec (char const *, char const *)
2723 badfieldspec (char const *spec, char const *msgid)
2725 error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2726 _(msgid), quote (spec));
2730 /* Report incompatible options. */
2732 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2734 incompatible_options (char const *opts)
2736 error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2740 /* Check compatibility of ordering options. */
2743 check_ordering_compatibility (void)
2745 struct keyfield const *key;
2747 for (key = keylist; key; key = key->next)
2748 if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2749 + key->version + !!key->ignore))
2750 || (key->random && key->translate))
2752 /* The following is too big, but guaranteed to be "big enough". */
2753 char opts[sizeof short_options];
2755 if (key->ignore == nondictionary)
2759 if (key->general_numeric)
2761 if (key->ignore == nonprinting)
2772 incompatible_options (opts);
2776 /* Parse the leading integer in STRING and store the resulting value
2777 (which must fit into size_t) into *VAL. Return the address of the
2778 suffix after the integer. If the value is too large, silently
2779 substitute SIZE_MAX. If MSGID is NULL, return NULL after
2780 failure; otherwise, report MSGID and exit on failure. */
2783 parse_field_count (char const *string, size_t *val, char const *msgid)
2788 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2791 case LONGINT_INVALID_SUFFIX_CHAR:
2796 case LONGINT_OVERFLOW:
2797 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2801 case LONGINT_INVALID:
2803 error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2804 _(msgid), quote (string));
2811 /* Handle interrupts and hangups. */
2814 sighandler (int sig)
2817 signal (sig, SIG_IGN);
2821 signal (sig, SIG_DFL);
2825 /* Set the ordering options for KEY specified in S.
2826 Return the address of the first character in S that
2827 is not a valid ordering option.
2828 BLANKTYPE is the kind of blanks that 'b' should skip. */
2831 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2838 if (blanktype == bl_start || blanktype == bl_both)
2839 key->skipsblanks = true;
2840 if (blanktype == bl_end || blanktype == bl_both)
2841 key->skipeblanks = true;
2844 key->ignore = nondictionary;
2847 key->translate = fold_toupper;
2850 key->general_numeric = true;
2853 /* Option order should not matter, so don't let -i override
2854 -d. -d implies -i, but -i does not imply -d. */
2856 key->ignore = nonprinting;
2862 key->numeric = true;
2868 key->reverse = true;
2871 key->version = true;
2881 static struct keyfield *
2882 key_init (struct keyfield *key)
2884 memset (key, 0, sizeof *key);
2885 key->eword = SIZE_MAX;
2890 main (int argc, char **argv)
2892 struct keyfield *key;
2893 struct keyfield key_buf;
2894 struct keyfield gkey;
2898 bool mergeonly = false;
2899 char *random_source = NULL;
2900 bool need_random = false;
2902 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2903 bool obsolete_usage = (posix2_version () < 200112);
2905 char *files_from = NULL;
2907 char const *outfile = NULL;
2909 initialize_main (&argc, &argv);
2910 set_program_name (argv[0]);
2911 setlocale (LC_ALL, "");
2912 bindtextdomain (PACKAGE, LOCALEDIR);
2913 textdomain (PACKAGE);
2915 initialize_exit_failure (SORT_FAILURE);
2917 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2918 #if HAVE_NL_LANGINFO
2919 hard_LC_TIME = hard_locale (LC_TIME);
2922 /* Get locale's representation of the decimal point. */
2924 struct lconv const *locale = localeconv ();
2926 /* If the locale doesn't define a decimal point, or if the decimal
2927 point is multibyte, use the C locale's decimal point. FIXME:
2928 add support for multibyte decimal points. */
2929 decimal_point = to_uchar (locale->decimal_point[0]);
2930 if (! decimal_point || locale->decimal_point[1])
2931 decimal_point = '.';
2933 /* FIXME: add support for multibyte thousands separators. */
2934 thousands_sep = to_uchar (*locale->thousands_sep);
2935 if (! thousands_sep || locale->thousands_sep[1])
2939 have_read_stdin = false;
2944 static int const sig[] =
2946 /* The usual suspects. */
2947 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
2964 enum { nsigs = sizeof sig / sizeof sig[0] };
2967 struct sigaction act;
2969 sigemptyset (&caught_signals);
2970 for (i = 0; i < nsigs; i++)
2972 sigaction (sig[i], NULL, &act);
2973 if (act.sa_handler != SIG_IGN)
2974 sigaddset (&caught_signals, sig[i]);
2977 act.sa_handler = sighandler;
2978 act.sa_mask = caught_signals;
2981 for (i = 0; i < nsigs; i++)
2982 if (sigismember (&caught_signals, sig[i]))
2983 sigaction (sig[i], &act, NULL);
2985 for (i = 0; i < nsigs; i++)
2986 if (signal (sig[i], SIG_IGN) != SIG_IGN)
2988 signal (sig[i], sighandler);
2989 siginterrupt (sig[i], 1);
2994 /* The signal mask is known, so it is safe to invoke exit_cleanup. */
2995 atexit (exit_cleanup);
2997 gkey.sword = gkey.eword = SIZE_MAX;
2999 gkey.translate = NULL;
3000 gkey.numeric = gkey.general_numeric = gkey.random = gkey.version = false;
3001 gkey.month = gkey.reverse = false;
3002 gkey.skipsblanks = gkey.skipeblanks = false;
3004 files = xnmalloc (argc, sizeof *files);
3008 /* Parse an operand as a file after "--" was seen; or if
3009 pedantic and a file was seen, unless the POSIX version
3010 predates 1003.1-2001 and -c was not seen and the operand is
3011 "-o FILE" or "-oFILE". */
3015 || (posixly_correct && nfiles != 0
3016 && ! (obsolete_usage
3019 && argv[optind][0] == '-' && argv[optind][1] == 'o'
3020 && (argv[optind][2] || optind + 1 != argc)))
3021 || ((c = getopt_long (argc, argv, short_options,
3027 files[nfiles++] = argv[optind++];
3033 if (optarg[0] == '+')
3035 bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
3036 && ISDIGIT (argv[optind][1]));
3037 obsolete_usage |= minus_pos_usage & ~posixly_correct;
3040 /* Treat +POS1 [-POS2] as a key if possible; but silently
3041 treat an operand as a file if it is not a valid +POS1. */
3042 key = key_init (&key_buf);
3043 s = parse_field_count (optarg + 1, &key->sword, NULL);
3045 s = parse_field_count (s + 1, &key->schar, NULL);
3046 if (! (key->sword | key->schar))
3047 key->sword = SIZE_MAX;
3048 if (! s || *set_ordering (s, key, bl_start))
3052 if (minus_pos_usage)
3054 char const *optarg1 = argv[optind++];
3055 s = parse_field_count (optarg1 + 1, &key->eword,
3056 N_("invalid number after `-'"));
3058 s = parse_field_count (s + 1, &key->echar,
3059 N_("invalid number after `.'"));
3060 if (*set_ordering (s, key, bl_end))
3061 badfieldspec (optarg1,
3062 N_("stray character in field spec"));
3069 files[nfiles++] = optarg;
3073 c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
3089 set_ordering (str, &gkey, bl_both);
3095 ? XARGMATCH ("--check", optarg, check_args, check_types)
3100 if (checkonly && checkonly != c)
3101 incompatible_options ("cC");
3105 case COMPRESS_PROGRAM_OPTION:
3106 if (compress_program && !STREQ (compress_program, optarg))
3107 error (SORT_FAILURE, 0, _("multiple compress programs specified"));
3108 compress_program = optarg;
3111 case FILES0_FROM_OPTION:
3112 files_from = optarg;
3116 key = key_init (&key_buf);
3119 s = parse_field_count (optarg, &key->sword,
3120 N_("invalid number at field start"));
3123 /* Provoke with `sort -k0' */
3124 badfieldspec (optarg, N_("field number is zero"));
3128 s = parse_field_count (s + 1, &key->schar,
3129 N_("invalid number after `.'"));
3132 /* Provoke with `sort -k1.0' */
3133 badfieldspec (optarg, N_("character offset is zero"));
3136 if (! (key->sword | key->schar))
3137 key->sword = SIZE_MAX;
3138 s = set_ordering (s, key, bl_start);
3141 key->eword = SIZE_MAX;
3147 s = parse_field_count (s + 1, &key->eword,
3148 N_("invalid number after `,'"));
3151 /* Provoke with `sort -k1,0' */
3152 badfieldspec (optarg, N_("field number is zero"));
3155 s = parse_field_count (s + 1, &key->echar,
3156 N_("invalid number after `.'"));
3159 /* `-k 2,3' is equivalent to `+1 -3'. */
3162 s = set_ordering (s, key, bl_end);
3165 badfieldspec (optarg, N_("stray character in field spec"));
3174 specify_nmerge (oi, c, optarg);
3178 if (outfile && !STREQ (outfile, optarg))
3179 error (SORT_FAILURE, 0, _("multiple output files specified"));
3183 case RANDOM_SOURCE_OPTION:
3184 if (random_source && !STREQ (random_source, optarg))
3185 error (SORT_FAILURE, 0, _("multiple random sources specified"));
3186 random_source = optarg;
3194 specify_sort_size (oi, c, optarg);
3199 char newtab = optarg[0];
3201 error (SORT_FAILURE, 0, _("empty tab"));
3204 if (STREQ (optarg, "\\0"))
3208 /* Provoke with `sort -txx'. Complain about
3209 "multi-character tab" instead of "multibyte tab", so
3210 that the diagnostic's wording does not need to be
3211 changed once multibyte characters are supported. */
3212 error (SORT_FAILURE, 0, _("multi-character tab %s"),
3216 if (tab != TAB_DEFAULT && tab != newtab)
3217 error (SORT_FAILURE, 0, _("incompatible tabs"));
3223 add_temp_dir (optarg);
3231 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3232 through Solaris 7. It is also accepted by many non-Solaris
3233 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3234 -y is marked as obsolete starting with Solaris 8 (1999), but is
3235 still accepted as of Solaris 10 prerelease (2004).
3237 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3238 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3239 and which in general ignores the argument after "-y" if it
3240 consists entirely of digits (it can even be empty). */
3241 if (optarg == argv[optind - 1])
3244 for (p = optarg; ISDIGIT (*p); p++)
3246 optind -= (*p != '\0');
3254 case_GETOPT_HELP_CHAR;
3256 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3259 usage (SORT_FAILURE);
3267 /* When using --files0-from=F, you may not specify any files
3268 on the command-line. */
3271 error (0, 0, _("extra operand %s"), quote (files[0]));
3272 fprintf (stderr, "%s\n",
3273 _("file operands cannot be combined with --files0-from"));
3274 usage (SORT_FAILURE);
3277 if (STREQ (files_from, "-"))
3281 stream = fopen (files_from, "r");
3283 error (SORT_FAILURE, errno, _("cannot open %s for reading"),
3284 quote (files_from));
3287 readtokens0_init (&tok);
3289 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
3290 error (SORT_FAILURE, 0, _("cannot read file names from %s"),
3291 quote (files_from));
3299 for (i = 0; i < nfiles; i++)
3301 if (STREQ (files[i], "-"))
3302 error (SORT_FAILURE, 0, _("when reading file names from stdin, "
3303 "no file name of %s allowed"),
3305 else if (files[i][0] == '\0')
3307 /* Using the standard `filename:line-number:' prefix here is
3308 not totally appropriate, since NUL is the separator, not NL,
3309 but it might be better than nothing. */
3310 unsigned long int file_number = i + 1;
3311 error (SORT_FAILURE, 0,
3312 _("%s:%lu: invalid zero-length file name"),
3313 quotearg_colon (files_from), file_number);
3318 error (SORT_FAILURE, 0, _("no input from %s"),
3319 quote (files_from));
3322 /* Inheritance of global options to individual keys. */
3323 for (key = keylist; key; key = key->next)
3327 || (key->skipsblanks
3333 | key->general_numeric
3336 key->ignore = gkey.ignore;
3337 key->translate = gkey.translate;
3338 key->skipsblanks = gkey.skipsblanks;
3339 key->skipeblanks = gkey.skipeblanks;
3340 key->month = gkey.month;
3341 key->numeric = gkey.numeric;
3342 key->general_numeric = gkey.general_numeric;
3343 key->random = gkey.random;
3344 key->reverse = gkey.reverse;
3345 key->version = gkey.version;
3348 need_random |= key->random;
3351 if (!keylist && (gkey.ignore
3353 || (gkey.skipsblanks
3357 | gkey.general_numeric
3362 need_random |= gkey.random;
3365 check_ordering_compatibility ();
3367 reverse = gkey.reverse;
3371 randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3372 if (! randread_source)
3373 die (_("open failed"), random_source);
3376 if (temp_dir_count == 0)
3378 char const *tmp_dir = getenv ("TMPDIR");
3379 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3384 static char *minus = (char *) "-";
3390 /* Need to re-check that we meet the minimum requirement for memory
3391 usage with the final value for NMERGE. */
3393 sort_size = MAX (sort_size, MIN_SORT_SIZE);
3398 error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3399 quote (files[1]), checkonly);
3403 static char opts[] = {0, 'o', 0};
3404 opts[0] = checkonly;
3405 incompatible_options (opts);
3408 /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3409 input is not properly sorted. */
3410 exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3415 struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3418 for (i = 0; i < nfiles; ++i)
3419 sortfiles[i].name = files[i];
3421 merge (sortfiles, 0, nfiles, outfile);
3422 IF_LINT (free (sortfiles));
3425 sort (files, nfiles, outfile);
3427 if (have_read_stdin && fclose (stdin) == EOF)
3428 die (_("close failed"), "-");
3430 exit (EXIT_SUCCESS);