#include #include "ImC/imc_kernel.h" #include "ImC/imc_extension.h" #define INPUT_SIZE 20 #define KERNEL_SIZE 6 #define OUTPUT_SIZE (INPUT_SIZE - KERNEL_SIZE + 1) static int input[INPUT_SIZE][INPUT_SIZE] = { [0 ... INPUT_SIZE - 1] = {[0 ... INPUT_SIZE - 1] = 2}}; static int kernel[KERNEL_SIZE][KERNEL_SIZE] = { [0 ... KERNEL_SIZE - 1] = {[0 ... KERNEL_SIZE - 1] = 2}}; void vConv2d() { // int IMC_REPEAT = 100; int IMC_REPEAT = imcBENCH_REPEAT_COUNT; int output[OUTPUT_SIZE * OUTPUT_SIZE]; IMC_UNROLL(8) for (int imc_repeat = 0; imc_repeat < IMC_REPEAT; imc_repeat++) { for (int _i = 0; _i < OUTPUT_SIZE * OUTPUT_SIZE; _i++) { int i = _i / OUTPUT_SIZE; int j = _i % OUTPUT_SIZE; int sum = 0; IMC_UNROLL(KERNEL_SIZE) for (int ii = 0; ii < KERNEL_SIZE; ii++) { IMC_UNROLL(KERNEL_SIZE) for (int jj = 0; jj < KERNEL_SIZE; jj++) { int input_i = i + ii; int input_j = j + jj; sum += input[input_i][input_j] * kernel[ii][jj]; __asm(" nop"); } __asm(" nop"); } // output[i * OUTPUT_SIZE + j] = sum; output[_i] = sum; __asm(" nop"); } // } } int sum = 0; // #pragma clang loop unroll(disable) for (int _i = 0; _i < OUTPUT_SIZE * OUTPUT_SIZE; _i++) { printf("%d ", output[_i]); if (_i % OUTPUT_SIZE == OUTPUT_SIZE - 1) { printf("\r\n"); } sum += output[_i]; } printf("\r\n"); printf("(OUT) sum: %d\r\n", sum); }