Browse Source

Add support for loop splitting

Youngbin Kim 1 year ago
parent
commit
8203ee4330

+ 10 - 1
CMakeLists.txt

@@ -13,6 +13,7 @@ set(IMC_ENABLE_EXTENSION 1 CACHE BOOL "")
 set(IMC_USE_CHECKPOINT_PASS_COUNTER 1 CACHE BOOL "")
 set(IMC_USE_CHECKPOINT_PASS_COUNTER 1 CACHE BOOL "")
 set(IMC_USE_CHECKPOINT_VOLTAGE_CHECK 0 CACHE BOOL "")
 set(IMC_USE_CHECKPOINT_VOLTAGE_CHECK 0 CACHE BOOL "")
 set(IMC_CHECKPOINT_PASS_COUNT 100 CACHE STRING "")
 set(IMC_CHECKPOINT_PASS_COUNT 100 CACHE STRING "")
+set(IMC_SPLIT_LOOP 0 CACHE BOOL "")
 
 
 # set(CMAKE_C_COMPILER arm-none-eabi-gcc) #tested with: clang, arm-none-eabi-gcc
 # set(CMAKE_C_COMPILER arm-none-eabi-gcc) #tested with: clang, arm-none-eabi-gcc
 # set(CMAKE_C_COMPILER /home/ybkim/workspace/imc/llvm/llvm-17/build/bin/clang)
 # set(CMAKE_C_COMPILER /home/ybkim/workspace/imc/llvm/llvm-17/build/bin/clang)
@@ -177,6 +178,12 @@ set(CMAKE_EXE_LINKER_FLAGS "${LD_ADDITIONAL_FILES} -T ${LINKER_SCRIPT}")
 project(imc_freeRTOS C ASM)
 project(imc_freeRTOS C ASM)
 add_executable(imc_freeRTOS ${SRC_FILES})
 add_executable(imc_freeRTOS ${SRC_FILES})
 
 
+if(IMC_APP_FILES)
+    if(IMC_SPLIT_LOOP)
+        split_loop(IMC_APP_FILES)
+    endif()
+endif()
+
 if(IMC_APP_FILES)
 if(IMC_APP_FILES)
     if(IMC_INSERT_COMPILER_CHECKPOINTS)
     if(IMC_INSERT_COMPILER_CHECKPOINTS)
         if(NOT CMAKE_C_COMPILER MATCHES "clang")
         if(NOT CMAKE_C_COMPILER MATCHES "clang")
@@ -208,7 +215,9 @@ if(IMC_APP_FILES)
         endforeach()
         endforeach()
         add_library(imc_apps "${APP_OPTS}")
         add_library(imc_apps "${APP_OPTS}")
     else()
     else()
-        add_library(imc_apps "${IMC_APP_FILES}")
+        if(NOT IMC_SPLIT_LOOP)
+            add_library(imc_apps "${IMC_APP_FILES}")
+        endif()
     endif()
     endif()
 endif()
 endif()
 
 

+ 2 - 0
Core/Inc/ImC/imc_extension.h

@@ -13,4 +13,6 @@
     #define imcPRINT_RECOVERY_MESSAGE 0
     #define imcPRINT_RECOVERY_MESSAGE 0
 #endif
 #endif
 
 
+int __imc_get_loop_pass_count();
+
 #endif
 #endif

+ 8 - 0
Core/Src/ImC/imc_extension.c

@@ -0,0 +1,8 @@
+#include <stdio.h>
+
+#include "ImC/imc_extension.h"
+
+int __imc_get_loop_pass_count() {
+    // printf("__imc_get_loop_pass_count() called\r\n");
+    return 10;
+}

+ 1 - 0
Core/Src/benchmarks/conv2d/conv2d.c

@@ -1,5 +1,6 @@
 #include <stdio.h>
 #include <stdio.h>
 #include "ImC/imc_kernel.h"
 #include "ImC/imc_kernel.h"
+#include "ImC/imc_extension.h"
 
 
 #define INPUT_SIZE 20
 #define INPUT_SIZE 20
 #define KERNEL_SIZE 6
 #define KERNEL_SIZE 6

+ 4 - 1
Core/Src/benchmarks/fft/fft.c

@@ -8,6 +8,8 @@
 #include "fft.h"
 #include "fft.h"
 #include "cmsis_os.h"
 #include "cmsis_os.h"
 
 
+#include "ImC/imc_extension.h"
+
 #define CHECKPOINTER(p) CheckPointer(p, #p)
 #define CHECKPOINTER(p) CheckPointer(p, #p)
 
 
 static void CheckPointer(void *p, char *name)
 static void CheckPointer(void *p, char *name)
@@ -97,7 +99,8 @@ void vFFT()
                 "Error in fft():  NumSamples=%u is not power of two\n",
                 "Error in fft():  NumSamples=%u is not power of two\n",
                 NumSamples);
                 NumSamples);
 
 
-            exit(1);
+            // exit(1);
+            // return;
         }
         }
 
 
         if (InverseTransform)
         if (InverseTransform)

+ 2 - 0
Core/Src/benchmarks/matmul/matmul.c

@@ -1,5 +1,7 @@
 #include <stdio.h>
 #include <stdio.h>
 
 
+#include "ImC/imc_extension.h"
+
 #define MAT_SIZE 10
 #define MAT_SIZE 10
 
 
 int A[MAT_SIZE][MAT_SIZE] = {
 int A[MAT_SIZE][MAT_SIZE] = {

+ 6 - 85
imc/exprs/validation/validation.py

@@ -23,11 +23,12 @@ def main():
 
 
     for benchmark in benchmarks:
     for benchmark in benchmarks:
         config.bench_name = benchmark
         config.bench_name = benchmark
-        config.insert_compiler_checkpoints = True
-        config.use_checkpoint_pass_counter = True
+        config.insert_compiler_checkpoints = False
+        config.use_checkpoint_pass_counter = False
         config.use_checkpoint_voltage_check = False
         config.use_checkpoint_voltage_check = False
         config.checkpoint_pass_count = 1000
         config.checkpoint_pass_count = 1000
         config.bench_infinite_loop = True
         config.bench_infinite_loop = True
+        config.split_loop = True
 
 
         pps.set_voltage(3.3, 1)
         pps.set_voltage(3.3, 1)
         pps.set_current(0.1, 1)
         pps.set_current(0.1, 1)
@@ -39,11 +40,11 @@ def main():
             binary = env.build_binary(config, build_dir)
             binary = env.build_binary(config, build_dir)
             env.clear_nvm_and_load_binary(binary, resume=False)
             env.clear_nvm_and_load_binary(binary, resume=False)
         
         
-        pps.set_current(0.015, 1)
-        time.sleep(1)
+        # pps.set_current(0.015, 1)
+        # time.sleep(1)
 
 
         env.resume_board(terminate=True)
         env.resume_board(terminate=True)
-        total_iterations = 5
+        total_iterations = 2
         records = SerialWatcher(benchmark, total_iterations).run()
         records = SerialWatcher(benchmark, total_iterations).run()
         # records = measure_execution_time(benchmark, total_iterations)
         # records = measure_execution_time(benchmark, total_iterations)
         df = pd.DataFrame(records)
         df = pd.DataFrame(records)
@@ -51,71 +52,6 @@ def main():
         print(df)
         print(df)
 
 
 
 
-def measure_execution_time(bench_nanme, total_iterations):
-    ser = imc_utils.serial_device.get_serial()
-
-    num_finished = 0
-    start_detected = False
-    time_takens = []
-    time_total = 0
-    outputs = []
-    records = []
-    num_recovery = 0
-
-    ser.reset_input_buffer()
-
-    while num_finished < total_iterations:
-        if ser.readable():
-            res = ser.readline()
-            try:
-                line = res.decode()[: len(res) - 1]
-            except:
-                print("readline() exception")
-                continue
-
-        if line.startswith("hardfault"):
-            print("\nHARD FAULT")
-
-        if not start_detected:
-            if line.startswith("Start benchmark"):
-                print("\nbenchmark start detected")
-                t_start = time.time()
-                start_detected = True
-        else:
-            if line.startswith("End benchmark"):
-                t_end = time.time()
-                t_diff = t_end - t_start
-                time_takens.append(t_diff)
-                time_total += t_diff
-                num_finished += 1
-                print(
-                    f"\nbenchmark end detected, time: {t_diff:.2f} secs, finished: {num_finished}, average: {time_total/num_finished:.2f} secs"
-                )
-                start_detected = False
-                record = {
-                    "bench_name": bench_nanme,
-                    "start": t_start,
-                    "end": t_end,
-                    "time_taken": t_diff,
-                    "recovery": num_recovery,
-                    "outputs": outputs,
-                    "is_correct": check_output_is_correct(bench_nanme, outputs)
-                }
-                records.append(record)
-                outputs = []
-                num_recovery = 0
-
-            elif line.startswith("(OUT)"):
-                print(line)
-                outputs.append(line[6:].strip())
-            
-            elif line.startswith("Start recovery"):
-                num_recovery += 1
-                print(f"recovery: #{num_recovery}", end="\r")
-
-    return records
-
-
 def get_default_build_config():
 def get_default_build_config():
     config = BuildConfigM33()
     config = BuildConfigM33()
     config.bench_name = "vBasicMath"
     config.bench_name = "vBasicMath"
@@ -129,21 +65,6 @@ def get_default_build_config():
     return config
     return config
 
 
 
 
-correct_outputs = {
-    "vBasicMath": "Sum: -9313",
-    "vCrc": "210692533",
-    "vFFT": "2807, 915",
-    "vSha": "4926a88d 0ca714f4 a9ebc1eb def37b8e 3911ee0f",
-}
-
-def check_output_is_correct(bench_name, outputs):
-    if len(outputs) == 0 or bench_name not in correct_outputs:
-        return False
-    is_single_output = len(outputs) == 1
-    is_output_correct = correct_outputs[bench_name] == outputs[0]
-    return is_single_output and is_output_correct
-
-
 def save_records(bench_name, df):
 def save_records(bench_name, df):
     with open(f"output/{bench_name}.pickle", "wb") as f:
     with open(f"output/{bench_name}.pickle", "wb") as f:
         pickle.dump(df, f)
         pickle.dump(df, f)

+ 33 - 1
imc_extension.cmake

@@ -129,4 +129,36 @@ endif()
 
 
 foreach(BENCH_SRC ${BENCHMARK_SRC_FILES})
 foreach(BENCH_SRC ${BENCHMARK_SRC_FILES})
     list(APPEND SRC_FILES ${BENCH_SRC})
     list(APPEND SRC_FILES ${BENCH_SRC})
-endforeach()
+endforeach()
+
+function(split_loop imc_files)
+    list(APPEND IMC_APP_FILES
+        Core/Src/ImC/imc_extension.c
+    )
+    get_target_property(SRC_INCLUDES imc_freeRTOS INCLUDE_DIRECTORIES)
+    foreach(dir ${SRC_INCLUDES})
+        list(APPEND INCLUDE_DIRS "-I${dir}")
+    endforeach()
+    string(REPLACE " " ";" C_FLAGS ${CMAKE_C_FLAGS})
+
+    set(BASIC_OPT_PASSES "mem2reg,indvars,instnamer,unify-loop-exits,lcssa,simplifycfg,loop-simplify")
+    set(IMC_OPT_PASSES "imc-loop-opt")
+    set(IMC_FLAGS ${IMC_BASE})
+    set(LOOP_SPLIT_PLUGIN /home/ybkim/workspace/imc/loop_duplication/build/lib/libImcLoopOpt.so)
+
+    # set(TARGET_TRIPPLE "--target=armv7em-none-eabi --sysroot=${CMAKE_SYSROOT}")
+
+    foreach(APP_SRC ${IMC_APP_FILES})
+        add_custom_command(
+            OUTPUT ${PROJECT_BINARY_DIR}/${APP_SRC}.s
+            DEPENDS ${APP_SRC}
+            # COMMAND ${CLANG_BIN} --target=armv8m-none-eabi --sysroot=${CMAKE_SYSROOT} -g ${C_FLAGS} -O0 -Xclang -disable-O0-optnone ${INCLUDE_DIRS} -D USE_HAL_DRIVER -D STM32L552xx -c -emit-llvm ${PROJECT_SOURCE_DIR}/${APP_SRC} -o ${PROJECT_BINARY_DIR}/${APP_SRC}.bc
+            COMMAND ${CMAKE_C_COMPILER} -g ${C_FLAGS} -O0 -Xclang -disable-O0-optnone ${INCLUDE_DIRS} -D USE_HAL_DRIVER -D STM32L552xx -c -emit-llvm ${PROJECT_SOURCE_DIR}/${APP_SRC} -o ${PROJECT_BINARY_DIR}/${APP_SRC}.bc
+            COMMAND ${OPT_BIN} -o ${PROJECT_BINARY_DIR}/${APP_SRC}_imc.bc -load-pass-plugin=${LOOP_SPLIT_PLUGIN} -passes=${BASIC_OPT_PASSES},${IMC_OPT_PASSES} ${PROJECT_BINARY_DIR}/${APP_SRC}.bc
+            COMMAND ${OPT_BIN} ${OPT} -o ${PROJECT_BINARY_DIR}/${APP_SRC}.opt.bc ${PROJECT_BINARY_DIR}/${APP_SRC}_imc.bc
+            COMMAND ${LLC_BIN} ${IMC_FLAGS} ${CPU} ${OPT} -o ${PROJECT_BINARY_DIR}/${APP_SRC}.s ${PROJECT_BINARY_DIR}/${APP_SRC}.opt.bc
+        )
+        list(APPEND APP_OPTS ${PROJECT_BINARY_DIR}/${APP_SRC}.s)
+    endforeach()
+    add_library(imc_apps "${APP_OPTS}")
+endfunction()

+ 3 - 2
setup_build_dir.py

@@ -10,12 +10,13 @@ OPENOCD_SCRIPT = f"{WORKSPACE_ROOT}/imc_freertos_app_m33.cfg"
 BUILD_DIR = f"{WORKSPACE_ROOT}/build"
 BUILD_DIR = f"{WORKSPACE_ROOT}/build"
 
 
 config = BuildConfigM33()
 config = BuildConfigM33()
-config.bench_name = "vStringSearch"
+config.bench_name = "vFFT"
 config.insert_compiler_checkpoints = False
 config.insert_compiler_checkpoints = False
 config.enable_extension = True
 config.enable_extension = True
 config.use_checkpoint_pass_counter = False
 config.use_checkpoint_pass_counter = False
-config.use_checkpoint_voltage_check = True
+config.use_checkpoint_voltage_check = False
 config.bench_infinite_loop = True
 config.bench_infinite_loop = True
+config.split_loop = True
 
 
 env = TestEnv(WORKSPACE_ROOT, NVM_RESET_BIN, OPENOCD_SCRIPT)
 env = TestEnv(WORKSPACE_ROOT, NVM_RESET_BIN, OPENOCD_SCRIPT)