주어진 배열을 정렬하는 Shell Sort 함수와 배열에서 n번째로 큰 값을 찾는 Selection 함수를 C언어로 구현했다. 그리고 Main에서는 이 둘을 각각 이용해 배열의 중간값을 찾도록 테스트했다.
Shell Sort는 Gap을 어떻게 설정하느냐에 따라 성능이 달라진다. 여기서는 절반씩 줄여나가는 방법과 OEIS A036562 수열을 따르는 방법으로 구현했다.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define N 100000
#define SWAP(a, b, temp) (temp = a, a = b, b = temp)
// (For Test) Print all elements in the list.
void printList(int list[], int length)
{
for (int i = 0; i < length; i++)
printf("%8d %8d\n", i, list[i]);
}
// Generate a random list.
void getRandomList(int list[], int length)
{
while (length--)
list[length] = rand(); // 0 ~ 0x7fff(=32767)
}
// If the list is sorted, print 'Success'. If not, print 'Failure'.
void printListSorted(int list[], int length)
{
for (int i = 1; i < length; i++)
if (list[i] < list[i - 1])
{
printf("Failure\n");
printList(list, length);
return;
}
printf("Success\n");
}
// Shell Sort
void shellSort(int list[], int length)
{
int i, j, temp, gap;
gap = length / 2;
while (gap > 0)
{
for (i = gap; i < length; i++)
{
// insertion sort
temp = list[i];
for (j = i; j >= gap && list[j - gap] > temp; j -= gap)
list[j] = list[j - gap];
list[j] = temp;
}
gap /= 2;
}
}
// Shell Sort
void shellSort_sedgewick(int list[], int length)
{
int gap_sedgewick[] = { 1, 8, 23, 77, 281, 1073, 4193, 16577, 65921, 262913, 1050113, 4197377 };
int i, j, temp, gap, g = 11;
while (g--)
{
gap = gap_sedgewick[g];
for (i = gap; i < length; i++)
{
// insertion sort
temp = list[i];
for (j = i; j >= gap && list[j - gap] > temp; j -= gap)
list[j] = list[j - gap];
list[j] = temp;
}
}
}
// sort by median
void set_median(int list[5])
{
int temp;
// set to: list[1] > list[2]
if (list[1] < list[2])
SWAP(list[1], list[2], temp);
// set to: list[3] > list[4]
if (list[3] < list[4])
SWAP(list[3], list[4], temp);
// set to list[0] is bigger than median
if (list[1] > list[3])
{
SWAP(list[0], list[1], temp);
if (list[1] < list[2])
SWAP(list[1], list[2], temp);
}
else
{
SWAP(list[0], list[3], temp);
if (list[3] < list[4])
SWAP(list[3], list[4], temp);
}
if (list[1] > list[3])
{
if (list[2] < list[3])
SWAP(list[2], list[3], temp);
}
else
{
SWAP(list[1], list[3], temp);
if (list[3] > list[4])
SWAP(list[2], list[3], temp);
else
SWAP(list[2], list[4], temp);
}
}
// find n-th element
int selection(int list[], int length, int n)
{
int i, j, temp, median, length_temp = 5 * ((length + 4) / 5);
int *list_temp, *small_list, *big_list;
int small_length = 0, big_length = 0;
// init list_temp by copying elements and put dummies
list_temp = (int*)malloc(sizeof(int) * length_temp);
for (i = 0; i < length_temp; i++)
list_temp[i] = (i < length) ? list[i] : INT_MAX;
// base case
if (length < 6)
{
if (length < 1) return 0;
shellSort(list_temp, length);
median = list_temp[n];
free(list_temp);
return median;
}
// set median
for (i = 0; i < length_temp; i+= 5)
set_median(list_temp + i);
// sort with medians
for (i = 1; i < length_temp / 5; i++)
for (j = i * 5; j > 0; j -= 5)
{
// compare median
if (list_temp[j - 3] > list_temp[j + 2])
{
SWAP(list_temp[j - 5], list_temp[j + 0], temp);
SWAP(list_temp[j - 4], list_temp[j + 1], temp);
SWAP(list_temp[j - 3], list_temp[j + 2], temp);
SWAP(list_temp[j - 2], list_temp[j + 3], temp);
SWAP(list_temp[j - 1], list_temp[j + 4], temp);
}
else break;
}
small_list = (int*)malloc(sizeof(int) * (length_temp - length_temp / 10 * 3));
big_list = (int*)malloc(sizeof(int) * (length_temp - length_temp / 10 * 3));
// set median and its index
temp = ((length_temp - 1) / 10) * 5 + 2;
median = list_temp[temp];
// add elements that smaller than median
for (i = 2; i < temp; i += 5)
{
small_list[small_length++] = list_temp[i];
small_list[small_length++] = list_temp[i + 1];
small_list[small_length++] = list_temp[i + 2];
}
small_list[small_length++] = list_temp[temp + 1];
small_list[small_length++] = list_temp[temp + 2];
// add elements that bigger than median
for (i = temp + 5; i < length_temp; i += 5)
{
big_list[big_length++] = list_temp[i];
big_list[big_length++] = list_temp[i - 1];
big_list[big_length++] = list_temp[i - 2];
}
big_list[big_length++] = list_temp[temp - 1];
big_list[big_length++] = list_temp[temp - 2];
// add remaining elements by comparing with median
for (i = 0; i < temp - 2; i += 5)
{
if (list_temp[i] < median)
small_list[small_length++] = list_temp[i];
else
big_list[big_length++] = list_temp[i];
if (list_temp[i + 1] < median)
small_list[small_length++] = list_temp[i + 1];
else
big_list[big_length++] = list_temp[i + 1];
}
for (i = temp + 6; i < length_temp; i += 5)
{
if (list_temp[i] < median)
small_list[small_length++] = list_temp[i];
else
big_list[big_length++] = list_temp[i];
if (list_temp[i + 1] < median)
small_list[small_length++] = list_temp[i + 1];
else
big_list[big_length++] = list_temp[i + 1];
}
free(list_temp);
// find median recursively
if (small_length > n)
median = selection(small_list, small_length, n);
else if (small_length < n)
median = selection(big_list, big_length, n - small_length - 1);
free(small_list);
free(big_list);
return median;
}
int main(void)
{
int i, median;
int *list = (int*)malloc(sizeof(int) * N);
clock_t time;
void(*fp[2])(int[], int) = { shellSort, shellSort_sedgewick };
char fn[2][20] = { "shellSort", "shellSort_sedgewick" };
// Sort and record the time required.
for (i = 0; i < 2; i++)
{
getRandomList(list, N);
time = clock();
fp[i](list, N);
printf("%s: %.4lfs, ", fn[i], (clock() - time) / 1000.0);
printListSorted(list, N);
}
// find the median and check it
getRandomList(list, N);
median = selection(list, N, N / 2);
shellSort_sedgewick(list, N);
printf("median:\n\texpected: %d, result: %d", median, list[N / 2]);
free(list);
return 0;
}