본문 바로가기

학업 정리

Shell Sort & Selection

주어진 배열을 정렬하는 Shell Sort 함수와 배열에서 n번째로 큰 값을 찾는 Selection 함수를 C언어로 구현했다. 그리고 Main에서는 이 둘을 각각 이용해 배열의 중간값을 찾도록 테스트했다.

 

Shell Sort는 Gap을 어떻게 설정하느냐에 따라 성능이 달라진다. 여기서는 절반씩 줄여나가는 방법과 OEIS A036562 수열을 따르는 방법으로 구현했다.

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define N 100000
#define SWAP(a, b, temp) (temp = a, a = b, b = temp)

// (For Test) Print all elements in the list.
void printList(int list[], int length)
{
	for (int i = 0; i < length; i++)
		printf("%8d %8d\n", i, list[i]);
}

// Generate a random list.
void getRandomList(int list[], int length)
{
	while (length--)
		list[length] = rand(); // 0 ~ 0x7fff(=32767)
}

// If the list is sorted, print 'Success'. If not, print 'Failure'.
void printListSorted(int list[], int length)
{
	for (int i = 1; i < length; i++)
		if (list[i] < list[i - 1])
		{
			printf("Failure\n");
			printList(list, length);
			return;
		}
	printf("Success\n");
}

// Shell Sort
void shellSort(int list[], int length)
{
	int i, j, temp, gap;

	gap = length / 2;
	while (gap > 0)
	{
		for (i = gap; i < length; i++)
		{
			// insertion sort
			temp = list[i];
			for (j = i; j >= gap && list[j - gap] > temp; j -= gap)
				list[j] = list[j - gap];
			list[j] = temp;
		}
		gap /= 2;
	}
}

// Shell Sort
void shellSort_sedgewick(int list[], int length)
{
	int gap_sedgewick[] = { 1, 8, 23, 77, 281, 1073, 4193, 16577, 65921, 262913, 1050113, 4197377 };
	int i, j, temp, gap, g = 11;
	
	while (g--)
	{
		gap = gap_sedgewick[g];
		for (i = gap; i < length; i++)
		{
			// insertion sort
			temp = list[i];
			for (j = i; j >= gap && list[j - gap] > temp; j -= gap)
				list[j] = list[j - gap];
			list[j] = temp;
		}
	}
}

// sort by median
void set_median(int list[5])
{
	int temp;

	// set to: list[1] > list[2]
	if (list[1] < list[2])
		SWAP(list[1], list[2], temp);

	// set to: list[3] > list[4]
	if (list[3] < list[4])
		SWAP(list[3], list[4], temp);

	// set to list[0] is bigger than median
	if (list[1] > list[3])
	{
		SWAP(list[0], list[1], temp);
		if (list[1] < list[2])
			SWAP(list[1], list[2], temp);
	}
	else
	{
		SWAP(list[0], list[3], temp);
		if (list[3] < list[4])
			SWAP(list[3], list[4], temp);
	}

	if (list[1] > list[3])
	{
		if (list[2] < list[3])
			SWAP(list[2], list[3], temp);
	}
	else
	{
		SWAP(list[1], list[3], temp);
		if (list[3] > list[4])
			SWAP(list[2], list[3], temp);
		else
			SWAP(list[2], list[4], temp);
	}
}

// find n-th element
int selection(int list[], int length, int n)
{
	int i, j, temp, median, length_temp = 5 * ((length + 4) / 5);
	int *list_temp, *small_list, *big_list;
	int small_length = 0, big_length = 0;

	// init list_temp by copying elements and put dummies
	list_temp = (int*)malloc(sizeof(int) * length_temp);
	for (i = 0; i < length_temp; i++)
		list_temp[i] = (i < length) ? list[i] : INT_MAX;

	// base case
	if (length < 6)
	{
		if (length < 1) return 0;

		shellSort(list_temp, length);
		median = list_temp[n];
		free(list_temp);
		return median;
	}

	// set median
	for (i = 0; i < length_temp; i+= 5)
		set_median(list_temp + i);
	
	// sort with medians
	for (i = 1; i < length_temp / 5; i++)
		for (j = i * 5; j > 0; j -= 5)
		{
			// compare median
			if (list_temp[j - 3] > list_temp[j + 2])
			{
				SWAP(list_temp[j - 5], list_temp[j + 0], temp);
				SWAP(list_temp[j - 4], list_temp[j + 1], temp);
				SWAP(list_temp[j - 3], list_temp[j + 2], temp);
				SWAP(list_temp[j - 2], list_temp[j + 3], temp);
				SWAP(list_temp[j - 1], list_temp[j + 4], temp);
			}
			else break;
		}

	small_list = (int*)malloc(sizeof(int) * (length_temp - length_temp / 10 * 3));
	big_list   = (int*)malloc(sizeof(int) * (length_temp - length_temp / 10 * 3));

	// set median and its index
	temp = ((length_temp - 1) / 10) * 5 + 2;
	median = list_temp[temp];

	// add elements that smaller than median
	for (i = 2; i < temp; i += 5)
	{
		small_list[small_length++] = list_temp[i];
		small_list[small_length++] = list_temp[i + 1];
		small_list[small_length++] = list_temp[i + 2];
	}
	small_list[small_length++] = list_temp[temp + 1];
	small_list[small_length++] = list_temp[temp + 2];

	// add elements that bigger than median
	for (i = temp + 5; i < length_temp; i += 5)
	{
		big_list[big_length++] = list_temp[i];
		big_list[big_length++] = list_temp[i - 1];
		big_list[big_length++] = list_temp[i - 2];
	}
	big_list[big_length++] = list_temp[temp - 1];
	big_list[big_length++] = list_temp[temp - 2];


	// add remaining elements by comparing with median
	for (i = 0; i < temp - 2; i += 5)
	{
		if (list_temp[i] < median)
			small_list[small_length++] = list_temp[i];
		else
			big_list[big_length++] = list_temp[i];

		if (list_temp[i + 1] < median)
			small_list[small_length++] = list_temp[i + 1];
		else
			big_list[big_length++] = list_temp[i + 1];
	}
	for (i = temp + 6; i < length_temp; i += 5)
	{
		if (list_temp[i] < median)
			small_list[small_length++] = list_temp[i];
		else
			big_list[big_length++] = list_temp[i];
		if (list_temp[i + 1] < median)
			small_list[small_length++] = list_temp[i + 1];
		else
			big_list[big_length++] = list_temp[i + 1];
	}

	free(list_temp);

	// find median recursively
	if (small_length > n)
		median = selection(small_list, small_length, n);
	else if (small_length < n)
		median = selection(big_list, big_length, n - small_length - 1);

	free(small_list);
	free(big_list);

	return median;
}

int main(void)
{
	int i, median;
	int *list = (int*)malloc(sizeof(int) * N);
	clock_t time;
	void(*fp[2])(int[], int) = { shellSort, shellSort_sedgewick };
	char fn[2][20] = { "shellSort", "shellSort_sedgewick" };

	// Sort and record the time required.
	for (i = 0; i < 2; i++)
	{
		getRandomList(list, N);
		time = clock();
		fp[i](list, N);
		printf("%s: %.4lfs, ", fn[i], (clock() - time) / 1000.0);
		printListSorted(list, N);
	}

	// find the median and check it
	getRandomList(list, N);
	median = selection(list, N, N / 2);
	shellSort_sedgewick(list, N);
	printf("median:\n\texpected: %d, result: %d", median, list[N / 2]);

	free(list);
	return 0;
}