IMPLEMENTATION MODULE Sort;
(*==============================================================
    Version  : 2.0 16 Sep 1990 C. Lins
    Compiler : Generic pc Modula-2
    Component: Tools - Sorting Utilities

    REVISION HISTORY
    v1.00  13-21 May 1989  C. Lins:
        Initial implementation

   v2.00  16 Sep 1990  C. Lins
     Created generic pc version
   (C) Copyright 1990 Charles A. Lins
==============================================================*)

FROM Relations IMPORT
   (*--type*)  Relation;

(*
This version of Insertion Sort is from Jon Bentley, "Writing
Efficient Programs", pg 64. The use of assignment to break the
swap operation into its components may not be applicable here
as our assignments involve the overhead of procedure calls.
*)

PROCEDURE InsertionSort  (    numItems : CARDINAL    (*--in   *);
                              compare  : CompareProc (*--in   *);
                              swap     : SwapProc    (*--in   *));

VAR        index : CARDINAL;
           jndex : CARDINAL;

BEGIN
   FOR index := 1 TO numItems-1 DO
       jndex := index;
       WHILE (jndex > 0) & (compare(jndex, jndex-1) = less) DO
           swap(jndex, jndex-1);
           DEC(jndex);
       END (*--while*);
   END (*--for*);
END InsertionSort;
(*--------------------*)

(*
A variation on Insertion Sort using a binary (instead of linear)
search of the array. This particular implementation is based on the one
given in Wirth []. While the algorithmic complexity is no better than
linear insertion sort, the actual execution time tends to be less due to
a reduction in the number of comparisons. This improvement does not hold
when the array is already initially sorted or when the array contains
a large number of identical values.
*)

PROCEDURE BinaryInsertionSort (    numItems : CARDINAL    (*--in   *);
                                   compare  : CompareProc (*--in   *);
                                   assign   : AssignProc  (*--in   *));

VAR        index  : CARDINAL;      (*--loop index over items of the array *)
           jndex  : CARDINAL;
           middle : CARDINAL;      (*--middle of the subarray *)
           left   : CARDINAL;      (*--left of of the subarray *)
           right  : CARDINAL;      (*--right of of the subarray *)

BEGIN
   FOR index := 1 TO numItems-1 DO
       assign(numItems, index);
       left := 0;
       right:= index;
       WHILE (left < right) DO
           middle := (left+right) DIV 2;
           IF (compare(middle, numItems) # greater) THEN
               left := middle + 1;
           ELSE
               right := middle;
           END (*--if*);
       END (*--while*);
       FOR jndex := index TO right+1 BY -1 DO
           assign(jndex, jndex-1);
       END (*--for*);
       assign(right, numItems);
   END (*--for*);
END BinaryInsertionSort;
(*--------------------*)

(*
The following is a delayed selection sort algorithm derived from Sedgewick
[pg. 95]. This implementation, while its performance is on the order of
O(n**2), in practice performs better than straight selection sort by
reducing the number of exchanges. The actual running time of the algorithm
varies little regardless of the input.
*)

PROCEDURE SelectionSort  (    numItems : CARDINAL    (*--in   *);
                              compare  : CompareProc (*--in   *);
                              swap     : SwapProc    (*--in   *));

VAR        index   : CARDINAL;     (*--loop index over items of the array *)
           jndex   : CARDINAL;     (*--inner loop index *)
           min     : CARDINAL;     (*--smallest item so far *)

BEGIN
   FOR index := 0 TO numItems-1 DO
       min := index;
       FOR jndex := index+1 TO numItems-1 DO
           IF (compare(jndex, min) = less) THEN
               min := jndex;
           END (*--if*);
       END (*--for*);
       swap(min, index);
   END (*--for*);
END SelectionSort;
(*--------------------*)

(*
ShellSort is a sort algorithm with a long history going back to 1959.
Sedgewick's version which dynamically calculates the offset based
on the equation: h(k-1) = 3*h(k)+1, giving the sequence 1, 4, 13, 40, 121,
364, 1093, 3280, 9841, .... This version has a minor enhancement avoiding
assignment of the temporary back into the same array location from which
it was extracted. Such an optimization may be acceptable here as the
assignment is accomplished using a procedure call ╤ a relatively expensive
operation. For an excellent discussion of this particular implementation
the reader is referred to [, pp. 97-99].
*)

PROCEDURE ShellSort      (    numItems : CARDINAL    (*--in   *);
                              compare  : CompareProc (*--in   *);
                              assign   : AssignProc  (*--in   *));

VAR   index : CARDINAL;
      jndex : INTEGER;
      h     : INTEGER;

BEGIN
   h := 1;
   REPEAT
       h := 3 * h + 1;
   UNTIL VAL(CARDINAL, h) > numItems;
   REPEAT
       h := h DIV 3;
       FOR index := h TO numItems-1 DO
           assign(numItems, index);
           jndex := index;
           LOOP
               IF (compare(jndex-h, numItems) = greater) THEN
                   assign(jndex, jndex-h);
                   DEC(jndex, h);
                   IF (jndex < h) THEN
                       EXIT (*--loop*);
                   END (*--if*);
               ELSE
                   EXIT (*--loop*);
               END (*--if*);
           END (*--loop*);
           IF (jndex # VAL(INTEGER, index)) THEN
               assign(jndex, numItems);
           END (*--if*);
       END (*--for*);
   UNTIL (h = 1);
END ShellSort;
(*--------------------*)

(*
HeapSort is an unusual sort algorithm. It initially constructs a priority
queue followed by repeated extraction of the maximum element from the
queue until the queue is empty. The heap data structure is used for the
priority queue and this shares the space with the array being sorted.
HeapSort has been proven to guarantee sorting N elements in time
proportional to N log N, regardless of the input. This is significant in
that there is no worst case, and therefore, no input sequence causing
HeapSort to run slower.

Both Bentley [] and Sedgewick [] devote considerable attention to the
priority queue data structure and heapsort.
*)

PROCEDURE HeapSort       (    numItems : CARDINAL    (*--in   *);
                              compare  : CompareProc (*--in   *);
                              swap     : SwapProc    (*--in   *));

VAR        index  : CARDINAL;

   PROCEDURE SiftUp (    index : CARDINAL (*--in   *);
                         upper : CARDINAL (*--in   *));

   VAR     jndex  : CARDINAL;

   BEGIN
       WHILE (2 * index) <= upper DO
           jndex := 2 * index;
           IF (jndex < upper) THEN
               IF (compare(jndex, jndex+1) = less) THEN
                   INC(jndex);
               END (*--if*);
           END (*--if*);
           IF (compare(index, jndex) = less) THEN
               swap(index, jndex);
               index := jndex;
           ELSE
               index := upper+1;
           END (*--if*);
       END (*--while*);
   END SiftUp;

BEGIN
  IF (numItems > 1) THEN
       (*-- construct heap --*)
       FOR index := numItems DIV 2 TO 1 BY -1 DO
           SiftUp(index, numItems);
       END (*--for*);

       (*-- repeatedly extract minimum from priority queue heap --*)
       FOR index := numItems-1 TO 1 BY -1 DO
           SiftUp(0, index);
           swap(0, index);
       END (*--for*);
   END (*--if*);
END HeapSort;
(*--------------------*)

(*
Quicksort ╤ the preeminent sort algorithm. Since its introduction in
1961 by C.A.R. Hoare [, ] this algorithm has been subjected to rigorous
mathematical analysis and empirical experiment []. Numerous suggestions
to Hoare's original implementation have been made over the years towards
improving Quicksort especially with regard to the worst case scenarios.
These are discussed in greater detail below and in the references.

This particular implementation has all the optimizations thrown in
(other than using an non-recursive implementation):
   Ñ use an outer while loop to switch to a faster algorithm for
       nearly sorted small arrays (in this case, Insertion sort). Cutoff
       can be any value between 5 and 30, though on our system a value
       of 9 or 10 gives the best results. This is confirmed by empirical
       studies [].
   Ñ call the simplier sort only once (this is slightly more efficient
       when the elements have repeated keys or with large random arrays).
   Ñ reduce the growth of the stack by processing the smaller of the
       two subarrays first ("tail-recursion removal"). This ensures that
       the stack grows to at most about lg N entries.
   Ñ reduce the time in the inner loops by using two indices moving
       towards one another.
   Ñ code the 'split' algorithm in-line instead of using a procedure
       call.
   Ñ instead of selecting a partitioning element at random (or simply
       choosing the first element in a subarray as the partitioning
       element) this variant uses the median-of-three optimization.
       Here we take the median of a small section of the subarray (three
       elements). These are chosen from the first element, the last element,
       and the middle element of the subarray. This should improve the
       running time in the worst-case scenarios.

The only data set on which this algorithm does very poorly is when all
the values in the array are the same (a very unlikely occurance).
*)

PROCEDURE QuickSort      (    numItems : CARDINAL    (*--in   *);
                              compare  : CompareProc (*--in   *);
                              assign   : AssignProc  (*--in   *);
                              swap     : SwapProc    (*--in   *);
                              cutoff   : INTEGER     (*--in   *));

   PROCEDURE InsertionSort (    lower : INTEGER (*--in   *);
                                upper : INTEGER (*--in   *));

   VAR     index : INTEGER;
           jndex : INTEGER;

   BEGIN
       FOR index := lower+1 TO upper DO
           jndex := index;
           WHILE (jndex > lower) & (compare(jndex, jndex-1) = less) DO
               swap(jndex, jndex-1);
               DEC(jndex);
           END (*--while*);
       END (*--for*);
   END InsertionSort;

   PROCEDURE quicksort (    lower : INTEGER (*--in   *);
                            upper : INTEGER (*--in   *));

   VAR     index : INTEGER;
           jndex : INTEGER;
           middle: INTEGER;

   BEGIN
       WHILE (upper - lower > cutoff ) DO
       
           (*--
               Choose a partitioning element by taking the median of the
             lower, middle, and upper elements of the subarray.
           --*)
           middle := (lower+upper) DIV 2;
           IF (compare(middle, upper) = greater) THEN
               swap(middle, upper);
           END (*--if*);
           IF (compare(middle, lower) = greater) THEN
               swap(middle, lower);
           END (*--if*);
           IF (compare(lower, upper) = greater) THEN
               swap(lower, upper);
           END (*--if*);
           index := lower;
           jndex := upper;
           assign(numItems, lower);
           
           (*-- split array in two --*)
           WHILE (index < jndex) DO
               WHILE (compare(jndex, numItems) = greater) DO
                   DEC(jndex);
               END (*--while*);
               assign(index, jndex);
               WHILE (index < jndex) & (compare(index, numItems) # greater) DO
                   INC(index);
               END (*--while*);
               assign(jndex, index);
           END (*--while*);
           assign(index, numItems);

           (*-- sort recursively, the smallest first --*)
           IF (index-lower < upper-index) THEN
               quicksort(lower, index-1);
               lower := index + 1;
           ELSE
               quicksort(index+1, upper);
               upper := index - 1;
           END (*--if*);
       END (*--while*);
   END quicksort;

BEGIN
  IF (numItems > 1) THEN
    quicksort(0, numItems-1);
       InsertionSort(0, numItems-1);
  END (*--if*);
END QuickSort;
(*--------------------*)

(*
BSort from CACM April 1985, pp 400-402 by Roger L. Wainwright. Far
superior to standard QuickSort on arrays that are already sorted or
nearly so. The idea is a combination of Bubble Sort (of all things!)
and Quicksort taking advantage of partially sorted subarrays. The
implementation below is a direct translation into Modula-2 from 
the Pascal version as given in the aforementioned article. 

The reader is referred to Wainwright's original article for a complete
discussion of the operation of this algorithm with a few caveats.
First, the performance results for Quicksort are unfair in that a
non-optimized implementation was obviously used (based on the number
of comparisons). Second, there was no comparative measurement to other
traditional sort algorithms such as Shellsort and Heapsort. The experimental
results shown earlier demonstrate that there are many cases where these
algorithms outperform BSort considerably. The only area where we found
BSort to be an improvement over an optimized implementation of Quicksort
(or other sort algorithms) was on small (<= 200 elements) arrays with a
sortedness ratio <= 2%.
*)

PROCEDURE BSort          (    numItems : CARDINAL    (*--in   *);
                              compare  : CompareProc (*--in   *);
                              assign   : AssignProc  (*--in   *);
                              swap     : SwapProc    (*--in   *));

   PROCEDURE bsort (    m : INTEGER      (*--in   *);
                        n : INTEGER      (*--in   *);
                        midKey : INTEGER (*--in   *));

   VAR     continue : BOOLEAN; (*-- false when partitioning process are completed *)
           leftFlag : BOOLEAN; (*-- true whenever left subfile is not in sorted order *)
           rightFlag: BOOLEAN; (*-- true whenever right subfile is not in sorted order *)
           index    : INTEGER; (*-- used to partition the subfiles so that at any time *)
           jndex    : INTEGER; (*-- k[index] < k[midKey] and k[jndex] >= k[midKey] *)
           size     : INTEGER; (*-- number of items in a subfile *)

   BEGIN
       IF (m < n) THEN
           assign(numItems, midKey);
           leftFlag    := FALSE;
           rightFlag   := FALSE;
           index       := m;
           jndex       := n;
           continue    := TRUE;
           WHILE continue DO
               WHILE (index # jndex) & (compare(index, numItems) = less) DO
                   (*--
                       build the left subfile ensuring that the
                       rightmost key is always the largest of the subfile
                   --*)
                   IF (index # m) & (compare(index-1, index) = greater) THEN
                       swap(index-1, index);
                       leftFlag := TRUE;
                   END (*--if*);
                   INC(index);
               END (*--while*);
           
               WHILE (index # jndex) & (compare(jndex, numItems) # less) DO
                   (*-- 
                       build the right subfile ensuring that the
                       leftmost key is always the smallest of the subfile
                   --*)
                   IF (jndex # n) & (compare(jndex, jndex+1) = greater) THEN
                       swap(jndex+1, jndex);
                       rightFlag := TRUE;
                   END (*--if*);
                   DEC(jndex);
               END (*--while*);
               
               IF (index # jndex) THEN
                   (*--
                       interchange k[index] from the left subfile with
                       k[jndex] from the right subfile
                    --*)
                   swap(jndex, index);
               ELSE
                   (*-- partitioning into left and right subfiles has been completed --*)
                   IF (compare(jndex, numItems) # less) THEN
                       (*-- check the right subfile to ensure that the first element, k[jndex] is the smallest --*)
                       IF (compare(jndex, jndex+1) = greater) THEN
                           swap(jndex+1, jndex);
                           rightFlag := TRUE;
                       END (*--if*);
                   ELSE
                       (*-- check the left subfile to ensure that the last element, k[index-1] is the largest --*)
                       IF (compare(index-1, index) = greater) THEN
                           swap(index-1, index);
                           leftFlag := TRUE;
                       END (*--if*);
                       IF (compare(index-2, index-1) = greater) THEN
                           swap(index-1, index-2);
                       END (*--if*);
                   END (*--if*);
                   continue := FALSE;
               END (*--if*);
           END (*--while*);
           
           (*-- process the left subfile --*)
           size := index - m;
           IF (size > 2) & leftFlag THEN (*-- subfile must have at least 3 elements and not be already sorted *)
               IF (size = 3) THEN                  (*-- special case of 3 elements --*)
                   (*-- place k[m] and k[m+1] in sorted order --*)
                   IF (compare(m, m+1) = greater) THEN
                       swap(m, m+1);
                   END (*--if*);
               ELSE
                   bsort(m, index-2, (m+index-2) DIV 2);
               END (*--if*);
           END (*--if*);
           
           (*-- process the left subfile --*)
           size := n - jndex + 1;
           IF (size > 2) & rightFlag THEN (*-- subfile must have at least 3 elements and not be already sorted *)
               IF (size = 3) THEN                   (*-- special case of 3 elements --*)
                   (*-- place k[j+1] and k[j+2] in sorted order --*)
                   IF (compare(jndex+1, jndex+2) = greater) THEN
                       swap(jndex+1, jndex+2);
                   END (*--if*);
               ELSE
                   bsort(jndex+1, n, (jndex+1+n) DIV 2);
               END (*--if*);
           END (*--if*);
       END (*--if*);
   END bsort;

BEGIN
  IF (numItems > 1) THEN
    bsort(0, numItems-1, numItems DIV 2);
  END (*--if*);
END BSort;
(*--------------------*)

(*
MeanSort derived from algorithm by Dalia Motzkin, MeanSort, Communications of the ACM 26 (4),
April 1983, 250-251. The author believes this algorithm is better than QuickSort, but
Wainwright contends that this isn't so. Obviously, this algorithm is only works for arrays
containing keys whose values have arithmetic operators applicable on them. In this version,
we assume that the array is of INTEGERs and that the sum of any given subarray does not
exceed MAX(LONGINT). We found that on large (2000 elements) random arrays with a sortedness
ratio >= 8% the running time for MeanSort outperformed BSort. In all other cases, BSort was
superior.
*)

PROCEDURE MeanSort       (    numItems : CARDINAL       (*--in   *);
                              compare  : CompareProc    (*--in   *);
                              swap     : SwapProc       (*--in   *);
                              value    : ValueProc      (*--in   *);
                              assign   : AssignItemProc (*--in   *));

   PROCEDURE meansort (    lower : INTEGER (*--in   *);
                           upper : INTEGER (*--in   *);
                           mean  : INTEGER (*--in   *));

   VAR     index    : INTEGER;
           jndex    : INTEGER;
           leftSum  : LONGINT;
           rightSum : LONGINT;

   BEGIN
       IF (lower < upper) THEN
           leftSum := 0;
           rightSum:= 0;
           index   := lower;
           jndex   := upper;
           assign(numItems, mean);
           LOOP
               WHILE (index # jndex) & (compare(index, numItems) = less) DO
                   INC(leftSum, value(index));
                   INC(index);
               END (*--while*);
               WHILE (index # jndex) & (compare(jndex, numItems) # less) DO
                   INC(rightSum, value(jndex));
                   DEC(jndex);
               END (*--while*);
               IF (index # jndex) THEN
                   swap(index, jndex);
               ELSE
                   EXIT (*--loop*);
               END (*--if*);
           END (*--loop*);
           IF (index = lower) THEN
               RETURN;
           END (*--if*);
           INC(rightSum, value(jndex));
           meansort(lower, index-1, VAL(INTEGER, leftSum DIV VAL(LONGINT, index - lower)));
           meansort(jndex, upper, VAL(INTEGER, rightSum DIV VAL(LONGINT, upper - jndex + 1)));
       END (*--if*);
   END meansort;

VAR        sum   : LONGINT;      (*-- sum of array values *)
           index : CARDINAL;       (*-- loop index over the array for computing initial mean *)

BEGIN
  IF (numItems > 1) THEN
       sum := 0;
       FOR index := 0 TO numItems-1 DO
           INC(sum, value(index));
       END (*--for*);
    meansort(0, numItems-1, VAL(INTEGER, sum DIV VAL(LONGINT, numItems)));
  END (*--if*);
END MeanSort;
(*--------------------*)

(*
From Wirth, Algorithms & Data Structures pg 104.
*)

PROCEDURE StraightMerge  (    numItems : CARDINAL    (*--in   *);
                              compare  : CompareProc (*--in   *);
                              assign   : AssignProc  (*--in   *));

VAR      index : CARDINAL;
         jndex : CARDINAL;
         kndex : CARDINAL;
         limit : CARDINAL;
         t     : CARDINAL;
         h, m, p, q, r : INTEGER;
         up : BOOLEAN;

BEGIN
  up := TRUE;
  p  := 1;
  REPEAT
   h := 1;
       m := numItems;
       IF up THEN
           index := 1;
           jndex := numItems;
           kndex := numItems + 1;
           limit := 2 * numItems;
       ELSE
           kndex := 1;
           limit := numItems;
           index := numItems + 1;
           jndex := 2 * numItems;
       END (*--if*);
       REPEAT
           (*-- merge a run from i- and j- sources into k-destination *)
           IF (m >= p) THEN
               q := p;
           ELSE
               q := m;
           END (*--if*);
           DEC(m, q);
           IF (m >= p) THEN
               r := p;
           ELSE
               r := m;
           END (*--if*);
           DEC(m, r);
           WHILE (q > 0) & (r > 0) DO
               IF (compare(index, jndex) = less) THEN
                   assign(kndex, index);
                   kndex := VAL(INTEGER, kndex) + h;
                   INC(index);
                   DEC(q);
               ELSE
                   assign(kndex, jndex);
                   kndex := VAL(INTEGER, kndex) + h;
                   DEC(jndex);
                   DEC(r);
               END (*--if*);
           END (*--while*);
   
           WHILE (r > 0) DO
               assign(kndex, jndex);
               kndex := VAL(INTEGER, kndex) + h;
               DEC(jndex);
               DEC(r);
           END (*--while*);
   
           WHILE (q > 0) DO
               assign(kndex, index);
               kndex := VAL(INTEGER, kndex) + h;
               INC(index);
               DEC(q);
           END (*--while*);
           h := -h;
           t := kndex;
           kndex := limit;
           limit := t;
       UNTIL m = 0;
       up := ~up;
       p  := 2 * p;
   UNTIL VAL(CARDINAL, p) >= numItems;
   IF ~up THEN
       FOR index := 1 TO numItems DO
           assign(index, index + numItems);
       END (*--for*);
   END (*--if*);
END StraightMerge;
(*--------------------*)

(*
Gonnet's version of Bubble Sort. More efficient than the usual version
when the array is in random order or ordered in reverse. When the
array is ordered, or contains the same value in every position of the
array, it uses more time than other implementations. The author does not
recommend this algorithm for any purpose.
*)

PROCEDURE BubbleSort     (    numItems : CARDINAL    (*--in   *);
                              compare  : CompareProc (*--in   *);
                              swap     : SwapProc    (*--in   *));

VAR        index   : CARDINAL;     (*--loop index over items of the array *)
           jndex   : CARDINAL;     (*--lower bound at start of each FOR loop *)
           lower   : CARDINAL;     (*--lower bound of the array to sort *)
           upper   : CARDINAL;     (*--upper bound of the array to sort *)

BEGIN
   lower := 0;
   upper := numItems-1;
   WHILE (upper > lower) DO
       jndex := lower;
       FOR index := lower TO upper-1 DO
           IF (compare(index, index+1) = greater) THEN
               swap(index, index+1);
           END (*--if*);
           jndex := index;
       END (*--for*);
       upper := jndex;
   END (*--while*);
END BubbleSort;
(*--------------------*)

(*
Gonnet's version of Shaker Sort (double-direction bubble sort). This algorithm
performs reasonably well in comparison with the standard Bubblesort. For small,
nearly ordered arrays (<= 200 elements with a sortedness ratio <= 2%) the performance
was better than Quicksort but not as good as Insertion sort.
*)

PROCEDURE ShakerSort     (    numItems : CARDINAL    (*--in   *);
                              compare  : CompareProc (*--in   *);
                              swap     : SwapProc    (*--in   *));

VAR        index   : CARDINAL;     (*--loop index over items of the array *)
           jndex   : CARDINAL;     (*--lower bound at start of each FOR loop *)
           lower   : CARDINAL;     (*--lower bound of the array to sort *)
           upper   : CARDINAL;     (*--upper bound of the array to sort *)

BEGIN
   lower := 0;
   upper := numItems-1;
   WHILE (upper > lower) DO
       jndex := lower;
       FOR index := lower TO upper-1 DO
           IF (compare(index, index+1) = greater) THEN
               swap(index, index+1);
               jndex := index;
           END (*--if*);
       END (*--for*);
       upper := jndex;
       FOR index := upper TO lower+1 BY -1 DO
           IF (compare(index, index-1) = less) THEN
               swap(index, index-1);
               jndex := index;
           END (*--if*);
       END (*--for*);
       lower := jndex;
   END (*--while*);
END ShakerSort;
(*--------------------*)

END Sort.