본문 바로가기

학업 정리

인공지능 과제 - 단일 퍼셉트론 구현

코드 설명

​ 3개의 perceptron을 만들어 각각 AND-gate, OR-gate, XOR-gate의 동작을 하도록 학습시킨다.

​ main함수에서는 perceptron의 학습에 사용될 데이터를 정의하고 perceptron을 만들어 학습시킨다.

​ Perceptron.h 헤더파일에서 Perceptron을 class로 정의했으며 Calculate, Train 등 여러 메소드가 구현되어있다. Perceptron의 weight와 threshold는 contructor에서 아래 범위의 랜덤한 실수(float)로 초기화된다.

  • weight: -1이상 1이하
  • threshold: 0초과 1이하

코드

src.c

/*

    2019-2 인공지능 과제2 코드
    2015920003 컴퓨터과학부 김건호

*/
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include "Perceptron.h"

int main(void)
{
    // Dataset for training
    int *training_input[4] = {
        new int[2]{ 0, 0 },
        new int[2]{ 0, 1 },
        new int[2]{ 1, 0 },
        new int[2]{ 1, 1 }
    };
    int training_output_AND[4] = { 0, 0, 0, 1 };
    int training_output_OR[4]  = { 0, 1, 1, 1 };
    int training_output_XOR[4] = { 1, 0, 0, 1 };

    // To set perceptrons's weights and threshold randomly, perform srand() on main()
    srand(time(NULL));
    rand();

    // Initialize perceptrons
    Perceptron perceptron_AND(2);
    Perceptron perceptron_OR(2);
    Perceptron perceptron_XOR(2);

    // Train AND-gate
    printf("Train AND\n");
    perceptron_AND.Train(4, training_input, training_output_AND);

    // Train OR-gate
    printf("Train OR\n");
    perceptron_OR.Train(4, training_input, training_output_OR);

    // Train XOR-gate
    printf("Train XOR\n");
    perceptron_XOR.Train(4, training_input, training_output_XOR);

    // Wait for end
    printf("Enter to exit\n");
    getchar();
}

Perceptron.h

#define INITIAL_LEARNING_RATE 0.26f
class Perceptron
{
private:
    /* Member Variables */
    int n;                           // dimension of perceptron
    float *weights;                  // weight values
    float *before_weights;           // saved weight values before training
    float threshold;                 // threshold
    float C = INITIAL_LEARNING_RATE; // learning rate

    /* Private Methods */
    float *MallocForWeights();
    void SaveWeights();
    int CalculateTrainingDataset(int length, int **inputs, int *outputs, bool feedback);
    void Feedback(int *input, int output, int expect);

public:
    /* Public Methods */
    Perceptron(int n);
    int Calculate(int *input);
    int Train(int length, int **input, int *output);
    void PrintWeights();
};

/*
    Description:
        Perform malloc() for weight values.

    Parameters:
        (none)

    Return:
        (memory address)
*/
float *Perceptron::MallocForWeights()
{
    float *p = NULL;
    while (p == NULL)
        p = (float *)malloc(sizeof(float) * n);
    return p;
}

/*
    Description:
        Save current weight values before training.

    Parameters:
        (none)

    Return:
        (none)
*/
void Perceptron::SaveWeights()
{
    for (int i = 0; i < n; i++)
        before_weights[i] = weights[i];
}

/*
    Description:
        Calculate and count the number of errors.
        If necessary, readjust the weight values by feedback.

    Parameters:
        length: length of training-dataset
        inputs: [length][n]
        outputs: [length]
        feedback: if true, readjust the weight values

    Return:
        (the number of errors)
*/
int Perceptron::CalculateTrainingDataset(int length, int **inputs, int *outputs, bool feedback)
{
    int i, error;
    int *input;
    int output;
    int expect;

    // for each training data,
    for (i = error = 0; i < length; i++)
    {
        // calculate
        input  = inputs[i];
        output = Calculate(input);
        expect = outputs[i];

        // if wrong,
        if (output != expect)
        {
            // count error up
            error++;
            // and readjust the weight values
            if (feedback)
                Feedback(input, output, expect);
        }
    }

    // return the number of errors
    return error;
}

/*
    Description:
        Readjust the weight values with training result.

    Parameters:
        input: input of training-data
        output: calculated output
        expect: expected output

    Return:
        (none)
*/
void Perceptron::Feedback(int *input, int output, int expect)
{
    for (int i = 0; i < n; i++)
        weights[i] += C * (expect - output) * 1 * input[i];
}

/*
    Description:
        Constructor.
        Initialize the weights and threshold randomly.
            weights: from -1 to 1
            threshold: from 0 to 1 (not 0)

    Parameters:
        N: dimension of perceptron

    Return:
        (none)
*/
Perceptron::Perceptron(int N)
{
    n = N;
    weights = MallocForWeights();
    before_weights = MallocForWeights();

    // initialize the weights and threshold
    for (int i = 0; i < n; i++)
        weights[i] = (rand() % 1001) / 500.0f - 1;
    threshold = (rand() % 1000 + 1) / 1000.0f;
}

/*
    Description:
        Calculate the output for the given inputs.

    Parameters:
        input: [n]

    Return:
        0
        1
*/
int Perceptron::Calculate(int *input)
{
    float net = -threshold;
    for (int i = 0; i < n; i++)
        net += input[i] * weights[i];
    return (net > 0) ? 1 : 0;
}

/*
    Description:
        Train perceptron with training dataset.
        Every repeated round:
            1. Save current weight values.
            2. Calculate and feedback(readjust the weights).
            3. Count the number of errors.
            4. If error is 0, end.
            5. If not, repeat above process (and reduce the learning rate when needed).

    Parameters:
        length: length of training-dataset
        inputs: [length][n]
        outputs: [length]

    Return:
        0: trained
        1: not trained
*/
int Perceptron::Train(int length, int **inputs, int *outputs)
{
    bool trainable;
    int i, error, round;

    // handle exception
    if (length < 1)
    {
        printf("\nNo data to train.\n\n");
        return 1;
    }

    // print threshold, weights, and errors before training
    printf("threshold: %8f\n", threshold);
    PrintWeights();
    error = CalculateTrainingDataset(length, inputs, outputs, false);
    printf("\nError: %d\n\n", error);

    // train
    round = 1;
    trainable = true;
    while (trainable)
    {
        printf("[Round %03d]\t", round++);

        // save current weight values
        SaveWeights();

        // calculate and feedback
        CalculateTrainingDataset(length, inputs, outputs, true);

        // print readjusted weight values
        PrintWeights();

        // calculate and count the number of errors
        error = CalculateTrainingDataset(length, inputs, outputs, false);

        // print the number of errors
        printf("\tError: %d\n", error);

        // end training when all is correct :)
        if (error == 0)
        {
            printf("\nTrained successfully.\n\n");
            return 0;
        }
        else
        {
            // check if the weight value has changed
            trainable = false;
            for (i = 0; i < n; i++)
                if (weights[i] != before_weights[i])
                    trainable = true;

            /*
                Note:
                    if you want to apply 'Constant Learning Rate',
                    just perform 'continue' right here.
            */
            // continue;

            // or apply 'Dynamic Learning Rate'
            if (!trainable)
            {
                // if no weigth was changed,
                // down the learning rate
                C /= 2;
                /*
                    If the value of C is small enough for
                    the computer to recognize it as zero,
                    training fail...
                */
                if (C == 0) C = INITIAL_LEARNING_RATE; // training may end... :(
                else
                {
                    printf("\n[Reduce Learning Rate: %g]\n\n", C);
                    trainable = true;
                }
            }
        }
    }

    printf("\nNot trained.\n\n");
    return 1;
}

/*
    Description:
        Print all weight values

    Parameters:
        (none)

    Return:
        (none)
*/
void Perceptron::PrintWeights()
{
    printf("weight:");
    for (int i = 0; i < n; i++)
        printf(" %+8f", weights[i]);
}

참고

  • Perceptron.h에서 Train함수 내용 중 // continue; 를 주석해제하면 고정된 learning rate를 사용한다. 이 경우 learning rate에 비해 threshold가 낮을 때 AND-gate 학습에 실패할 수 있다.
  • 위 문제를 해결하면서 동시에 학습 속도를 높이기 위해, 학습 과정에서 learning rate가 감소할 수 있게 했다. 하지만, 삼고한 learning rate를 리셋하는 코드는 누락되어 있다.
  • 현재 learning rate는 항상 반으로 줄어든다. 줄어드는 폭이 너무 크거나 작으면 학습횟수가 늘어날 수 있다.
  • XOR-gate는 2차원 단일 퍼셉트론으로 구현할 수 없다.
  • Activation Function은 Binary Step을 섰다. 이것은 과제에서 정해진 사항이다.