hace 1 año · d12b84dcf5
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,5 @@ imc_freertos_app_m33.launch
 
				 build/
			
 
				 build.sh
			
 
				 imc/exprs/**/output**
			
 
				-__pycache__
			
 
				+__pycache__
			
 
				+imc/exprs/**/*.pdf
			
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -225,6 +225,7 @@ else()
 
				     endif()
			
 
				 endif()
			
 
				 
			
 
				+target_link_options(imc_freeRTOS PRIVATE "LINKER:-gc-sections")
			
 
				 if(IMC_APP_FILES)
			
 
				     target_link_libraries(imc_freeRTOS PRIVATE imc_apps c m nosys gcc)
			
 
				 else()
			
--- a/Core/Inc/ImC/imc_extension.h
+++ b/Core/Inc/ImC/imc_extension.h
@@ -37,12 +37,26 @@
 
				     #define imcPRINT_STATS 0
			
 
				 #endif
			
 
				 
			
 
				+#ifndef imcCUSTOM_UNROLL
			
 
				+    #define imcCUSTOM_UNROLL 0 
			
 
				+#endif
			
 
				+
			
 
				 #define IMC_KERNEL_NVM __attribute__((section(".kernel_nvm")))
			
 
				 
			
 
				 #define imcCAP_VOL_HIGH 4800
			
 
				 #define imcCAP_VOL_LOW 3500
			
 
				 #define imcENERGY_TOTAL (imcCAP_VOL_HIGH*imcCAP_VOL_HIGH - imcCAP_VOL_LOW*imcCAP_VOL_LOW)
			
 
				 
			
 
				+#if (imcCUSTOM_UNROLL)
			
 
				+// #define STRINGIFY(x) ("clang loop unroll_count(" ##str ")")
			
 
				+#define DO_PRAGMA(x) _Pragma ( #x )
			
 
				+#define IMC_UNROLL(x) DO_PRAGMA( clang loop unroll_count(x+1) )
			
 
				+#define IMC_UNROLL_FULL() DO_PRAGMA( clang loop unroll(full) )
			
 
				+#else
			
 
				+#define IMC_UNROLL(x) 
			
 
				+#define IMC_UNROLL_FULL()
			
 
				+#endif
			
 
				+
			
 
				 int __imc_get_loop_pass_count(int loop_id);
			
 
				 void __imc_finish_loop_skipping();
			
 
				 
			
--- a/Core/Src/benchmarks/aes/aes.c
+++ b/Core/Src/benchmarks/aes/aes.c
@@ -262,6 +262,7 @@ void AES_CBC_encrypt_buffer(struct AES_ctx *ctx, uint8_t *buf, size_t length)
 
				 void vAes() {
			
 
				     // int IMC_REPEAT = 1000;
			
 
				     int IMC_REPEAT = imcBENCH_REPEAT_COUNT;
			
 
				+    IMC_UNROLL(1)
			
 
				     for(int imc=0; imc < IMC_REPEAT; imc++) {
			
 
				         uint8_t key[] = {0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c};
			
 
				         uint8_t out[] = {0x76, 0x49, 0xab, 0xac, 0x81, 0x19, 0xb2, 0x46, 0xce, 0xe9, 0x8e, 0x9b, 0x12, 0xe9, 0x19, 0x7d,
			
@@ -288,6 +289,7 @@ void vAes() {
 
				             uint8_t tempa[4]; // Used for the column/row operations
			
 
				 
			
 
				             // The first round key is the key itself.
			
 
				+            IMC_UNROLL(Nk)
			
 
				             for (i = 0; i < Nk; ++i)
			
 
				             {
			
 
				                 RoundKey[(i * 4) + 0] = Key[(i * 4) + 0];
			
@@ -297,6 +299,7 @@ void vAes() {
 
				             }
			
 
				 
			
 
				             // All other round keys are found from the previous round keys.
			
 
				+            IMC_UNROLL(Nb * (Nr + 1) - Nk)
			
 
				             for (i = Nk; i < Nb * (Nr + 1); ++i)
			
 
				             {
			
 
				                 {
			
@@ -354,6 +357,7 @@ void vAes() {
 
				                 // XorWithIv(buf, Iv);
			
 
				                 {
			
 
				                     uint8_t j;
			
 
				+                    IMC_UNROLL(AES_BLOCKLEN)
			
 
				                     for (j = 0; j < AES_BLOCKLEN; ++j) // The block in AES is always 128bit no matter the key size
			
 
				                     {
			
 
				                         // buf[j] ^= Iv[j];
			
--- a/Core/Src/benchmarks/basicmath/basicmath.c
+++ b/Core/Src/benchmarks/basicmath/basicmath.c
@@ -38,8 +38,10 @@ void vBasicMath()
 
				       // imcREQUEST_CHECKPOINT();
			
 
				       for (b1 = 10; b1 > 0; b1--)
			
 
				       {
			
 
				+        IMC_UNROLL(2)
			
 
				         for (c1 = 5; c1 < 15; c1 += 0.5)
			
 
				         {
			
 
				+          IMC_UNROLL(10)
			
 
				           for (d1 = -1; d1 > -11; d1--)
			
 
				           {
			
 
				             SolveCubic(a1, b1, c1, d1, &solutions, x);
			
--- a/Core/Src/benchmarks/conv2d/conv2d.c
+++ b/Core/Src/benchmarks/conv2d/conv2d.c
@@ -19,25 +19,19 @@ void vConv2d()
 
				 
			
 
				     int output[OUTPUT_SIZE * OUTPUT_SIZE];
			
 
				 
			
 
				-// #pragma clang loop unroll(disable)
			
 
				+    IMC_UNROLL(8)
			
 
				     for (int imc_repeat = 0; imc_repeat < IMC_REPEAT; imc_repeat++)
			
 
				     {
			
 
				 
			
 
				-// #pragma clang loop unroll(disable)
			
 
				-// for(int i=0; i<OUTPUT_SIZE; i++) {
			
 
				-//     #pragma clang loop unroll(disable)
			
 
				-//     for(int j=0; j<OUTPUT_SIZE; j++) {
			
 
				-
			
 
				-// #pragma clang loop unroll(disable)
			
 
				         for (int _i = 0; _i < OUTPUT_SIZE * OUTPUT_SIZE; _i++)
			
 
				         {
			
 
				             int i = _i / OUTPUT_SIZE;
			
 
				             int j = _i % OUTPUT_SIZE;
			
 
				             int sum = 0;
			
 
				-            // #pragma clang loop unroll(disable)
			
 
				+            IMC_UNROLL(KERNEL_SIZE)
			
 
				             for (int ii = 0; ii < KERNEL_SIZE; ii++)
			
 
				             {
			
 
				-                // #pragma clang loop unroll(disable)
			
 
				+                IMC_UNROLL(KERNEL_SIZE)
			
 
				                 for (int jj = 0; jj < KERNEL_SIZE; jj++)
			
 
				                 {
			
 
				                     int input_i = i + ii;
			
@@ -53,7 +47,7 @@ void vConv2d()
 
				 
			
 
				     int sum = 0;
			
 
				 
			
 
				-#pragma clang loop unroll(disable)
			
 
				+// #pragma clang loop unroll(disable)
			
 
				     for (int _i = 0; _i < OUTPUT_SIZE * OUTPUT_SIZE; _i++)
			
 
				     {
			
 
				         printf("%d ", output[_i]);
			
--- a/Core/Src/benchmarks/crc/crc_32.c
+++ b/Core/Src/benchmarks/crc/crc_32.c
@@ -199,6 +199,7 @@ void vCrc()
 
				             // printf("%d\r\n", &_binary_small_modified_pcm_start);

			
 
				             // printf("%d\r\n", &_binary_small_modified_pcm_end);

			
 
				 

			
 
				+            IMC_UNROLL(9)

			
 
				             for (int i = 0; i < size; i += 1)

			
 
				             {

			
 
				                   c = input[i];

			
--- a/Core/Src/benchmarks/fft/fft.c
+++ b/Core/Src/benchmarks/fft/fft.c
@@ -44,22 +44,28 @@ void vFFT()
 
				     float coeff[MAXWAVES];
			
 
				     float amp[MAXWAVES];
			
 
				 
			
 
				+    // IMC_UNROLL(1)
			
 
				     for (int imc_repeat=0; imc_repeat < IMC_REPEAT; imc_repeat++) {
			
 
				 
			
 
				     /* Makes MAXWAVES waves of random amplitude and period */
			
 
				+    IMC_UNROLL(4)
			
 
				     for (i = 0; i < MAXWAVES; i++)
			
 
				     {
			
 
				         // coeff[i] = rand() % 1000;
			
 
				         // amp[i] = rand() % 1000;
			
 
				         coeff[i] = 123;
			
 
				         amp[i] = 456;
			
 
				+        // coeff[i] = i;
			
 
				+        // amp[i] = i+1;
			
 
				     }
			
 
				+    IMC_UNROLL(4)
			
 
				     for (i = 0; i < MAXSIZE; i++)
			
 
				     {
			
 
				         /*   RealIn[i]=rand();*/
			
 
				         RealIn[i] = 0;
			
 
				         ImagIn[i] = 0;
			
 
				         float sum = 0;
			
 
				+        IMC_UNROLL(4)
			
 
				         for (j = 0; j < MAXWAVES; j++)
			
 
				         {
			
 
				             /* randomly select sin or cos */
			
--- a/Core/Src/benchmarks/matmul/matmul.c
+++ b/Core/Src/benchmarks/matmul/matmul.c
@@ -13,12 +13,16 @@ void vMatMul() {
 
				     // int IMC_REPEAT = 2000;
			
 
				     int IMC_REPEAT = imcBENCH_REPEAT_COUNT;
			
 
				     int C[MAT_SIZE * MAT_SIZE] = { 0 };
			
 
				+
			
 
				     for(int imc=0; imc<IMC_REPEAT; imc++) {
			
 
				 
			
 
				-        for(int _i=0; _i<MAT_SIZE*MAT_SIZE; _i++) {
			
 
				+        IMC_UNROLL(3)
			
 
				+        for (int _i = 0; _i < MAT_SIZE * MAT_SIZE; _i++)
			
 
				+        {
			
 
				             int sum = 0;
			
 
				             int i = _i / MAT_SIZE;
			
 
				             int j = _i % MAT_SIZE;
			
 
				+            IMC_UNROLL(MAT_SIZE)
			
 
				             for(int k=0; k<MAT_SIZE; k++) {
			
 
				                 sum += A[i][k] * B[k][j];
			
 
				             }
			
--- a/Core/Src/benchmarks/sha/sha.c
+++ b/Core/Src/benchmarks/sha/sha.c
@@ -49,9 +49,11 @@ static void sha_transform(SHA_INFO *sha_info)
 
				     int i;
			
 
				     LONG temp, A, B, C, D, E, W[80];
			
 
				 
			
 
				+    IMC_UNROLL(16)
			
 
				     for (i = 0; i < 16; ++i) {
			
 
				 	W[i] = sha_info->data[i];
			
 
				     }
			
 
				+    IMC_UNROLL(64)
			
 
				     for (i = 16; i < 80; ++i) {
			
 
				 	W[i] = W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16];
			
 
				 #ifdef USE_MODIFIED_SHA
			
--- a/Core/Src/benchmarks/stringsearch/stringsearch_local.c
+++ b/Core/Src/benchmarks/stringsearch/stringsearch_local.c
@@ -109,6 +109,7 @@ void vStringSearch()
 
				 	  
			
 
				 	//   int IMC_REPEAT = 5;
			
 
				 	  int IMC_REPEAT = imcBENCH_REPEAT_COUNT;
			
 
				+	  IMC_UNROLL(3)
			
 
				 	  for(int imc_repeat=0; imc_repeat < IMC_REPEAT; imc_repeat++) {
			
 
				 
			
 
				       for (i = 0; find_strings[i]; i++)
			
--- a/imc/exprs/date2025/3_adaptive/draw_graph_expr_3.ipynb
+++ b/imc/exprs/date2025/3_adaptive/draw_graph_expr_3.ipynb
--- a/imc/exprs/date2025/3_adaptive/draw_graph_expr_3.py
+++ b/imc/exprs/date2025/3_adaptive/draw_graph_expr_3.py
@@ -3,38 +3,38 @@ import os
 
				 import pandas as pd
			
 
				 import seaborn as sns
			
 
				 import matplotlib.pyplot as plt
			
 
				+import matplotlib.ticker
			
 
				+from scipy import stats
			
 
				+import numpy as np
			
 
				 
			
 
				 import plot_utils
			
 
				 
			
 
				-def validate_and_cleanup_df(df):
			
 
				-    # rename_dict = {
			
 
				-    #     "checkpoint_enabled": "ckpt",
			
 
				-    #     "voltage_check_enabled": "VC",
			
 
				-    #     "pass_counter_enabled": "PC"
			
 
				-    # }
			
 
				-    # df = df.rename(columns=rename_dict)
			
 
				+config_order = ["counter", "unroll", "adaptive"]  
			
 
				+bench_order = ["fft", "basicmath", "crc", "sha", "stringsearch", "aes", "conv2d", "matmul"]
			
 
				 
			
 
				-    mask = df["is_correct"] == False
			
 
				-    if mask.any():
			
 
				-        print("drop rows with incorrect output")
			
 
				-        print(df[mask])
			
 
				+def validate_and_cleanup_df(df, config_name):
			
 
				 
			
 
				-    df = df[~mask]
			
 
				-    assert(df["is_correct"].all())
			
 
				-
			
 
				-    # def category_mapper(row):
			
 
				-    #     if row["ckpt"] == False:
			
 
				-    #         return "Baseline"
			
 
				-    #     else:
			
 
				-    #         if row["VC"] == False:
			
 
				-    #             assert(row["PC"] == True)
			
 
				-    #             return "PC"
			
 
				-    #         else:
			
 
				-    #             assert(row["PC"] == False)
			
 
				-    #             return "VC"
			
 
				-
			
 
				-    # category_column_name = "config"
			
 
				-    # df[category_column_name] = df.apply(category_mapper, axis=1)
			
 
				+    # mask = df["is_correct"] == False
			
 
				+    # if mask.any():
			
 
				+    #     print(f"({config_name}) drop {mask.sum()} rows with incorrect output")
			
 
				+    # df = df[~mask]
			
 
				+    # assert(df["is_correct"].all())
			
 
				+    
			
 
				+    bench_name = df.iloc[0,0]
			
 
				+    z_score = stats.zscore(df["time_taken"])
			
 
				+    mask2 = np.abs(z_score) > 4.35
			
 
				+    if mask2.any():
			
 
				+        print(f"({bench_name}, {config_name}) remove {mask2.sum()} outliers")
			
 
				+        time = float(df[mask2]["time_taken"].iloc[0])
			
 
				+        average = df["time_taken"].mean()
			
 
				+        print(f"time: {time:.2f}, average: {average:.2f}")
			
 
				+
			
 
				+    df = df[~mask2].copy()
			
 
				+
			
 
				+    def rename_bench_name(row):
			
 
				+        return row["bench_name"].lower()[1:]
			
 
				+
			
 
				+    df["bench_name"] = df.apply(rename_bench_name, axis=1)
			
 
				 
			
 
				     def process_stats(row):
			
 
				         lines = row["stats"]
			
@@ -47,7 +47,7 @@ def validate_and_cleanup_df(df):
 
				             if key.startswith("checkpoint executed"):
			
 
				                 result["ckpt_exec"] = val
			
 
				         return result
			
 
				-    
			
 
				+
			
 
				     df_stats = df.apply(process_stats, axis=1, result_type="expand")
			
 
				     df = pd.concat([df, df_stats], axis=1)
			
 
				 
			
@@ -80,8 +80,8 @@ def get_base_df():
 
				     output_dir = "/home/ybkim/workspace/imc/imc_freertos_app_m33/imc/exprs/date2025/3_adaptive/output"
			
 
				 
			
 
				     all_dfs = []
			
 
				-    configs = ["pass_count", "adaptive"]
			
 
				-    drop_index = list(range(0, 3))
			
 
				+    configs = ["pass_count", "adaptive", "unroll"]
			
 
				+    drop_index = list(range(0, 1))
			
 
				     drop_index = None
			
 
				 
			
 
				     for benchmark in benchmarks:
			
@@ -94,73 +94,243 @@ def get_base_df():
 
				                 orig_df = pickle.load(f)
			
 
				                 if drop_index and config_name == "adaptive":
			
 
				                     orig_df = orig_df.drop(drop_index)
			
 
				-                df = validate_and_cleanup_df(orig_df)
			
 
				+                df = validate_and_cleanup_df(orig_df, config_name)
			
 
				                 df["config"] = config_name
			
 
				                 all_dfs.append(df)
			
 
				 
			
 
				     orig_df = pd.concat(all_dfs)
			
 
				+
			
 
				+    def rename_config(row):
			
 
				+        name = row["config"]
			
 
				+        d = {
			
 
				+            "adaptive": "adaptive",
			
 
				+            "pass_count": "counter",
			
 
				+            "unroll": "unroll"
			
 
				+        }
			
 
				+        return d[name]
			
 
				+    orig_df["config"] = orig_df.apply(rename_config, axis=1)
			
 
				+
			
 
				     return orig_df
			
 
				 
			
 
				 
			
 
				-def draw_checkpoint_count():
			
 
				-    plot_utils.set_theme_seaborn(kind="line")
			
 
				-    orig_df = get_base_df()
			
 
				+def remove_error_stats(orig_df):
			
 
				     mask = orig_df["ckpt_trig"].abs() > 1e7 
			
 
				     df = orig_df[~mask]
			
 
				-    mask = orig_df["ckpt_exec"].abs() > 1e6
			
 
				-    df = orig_df[~mask]
			
 
				-    benchmarks = list(df["bench_name"].unique())
			
 
				-    print(benchmarks)
			
 
				-    fig_size = (12, 3.5 * len(benchmarks))
			
 
				-    n_rows = len(benchmarks)
			
 
				-    n_cols = 1
			
 
				-    hspace = 0.08
			
 
				+    mask = df["ckpt_exec"].abs() > 1e6
			
 
				+    df = df[~mask]
			
 
				+    mask = df["ckpt_trig"] < 0
			
 
				+    df = df[~mask]
			
 
				+    mask = df["ckpt_exec"] < 0
			
 
				+    df = df[~mask]
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def draw_checkpoint_count():
			
 
				+    rc = {
			
 
				+        "lines.linewidth": 1.5,
			
 
				+        "axes.titlepad": 10,
			
 
				+        "ytick.major.pad": -4,
			
 
				+        "ytick.labelsize": 20,
			
 
				+        "legend.fontsize": 23,
			
 
				+        "axes.labelsize": 25,
			
 
				+    }
			
 
				+    plot_utils.set_theme_seaborn(kind="line", rc_custom=rc)
			
 
				+
			
 
				+    orig_df = get_base_df()
			
 
				+    # mask = orig_df["ckpt_trig"].abs() > 1e7 
			
 
				+    # df = orig_df[~mask]
			
 
				+    # mask = orig_df["ckpt_exec"].abs() > 1e6
			
 
				+    # df = orig_df[~mask]
			
 
				+    # mask = orig_df["ckpt_trig"] < 0
			
 
				+    # df = orig_df[~mask]
			
 
				+    # mask = orig_df["ckpt_exec"] < 0
			
 
				+    # df = orig_df[~mask]
			
 
				+    df = remove_error_stats(orig_df)
			
 
				+
			
 
				+    benchmarks = bench_order
			
 
				+    fig_size = (13, 5.5)
			
 
				+    n_rows = 2
			
 
				+    n_cols = 4
			
 
				+    hspace = 0.05
			
 
				     fig = plt.figure(figsize=fig_size)
			
 
				-    axes = fig.subplots(n_rows, n_cols, sharex=False, gridspec_kw={"hspace": hspace})
			
 
				-    for (i, (key, d_orig)) in enumerate(df.groupby(["bench_name"])):
			
 
				-        d = d_orig.reset_index()
			
 
				-        # mask = d["config"] == "adaptive"
			
 
				-        # d = d[mask]
			
 
				-        y = "ckpt_exec"
			
 
				-        # y = "ckpt_trig"
			
 
				+    axes = fig.subplots(n_rows, n_cols, sharex=True, gridspec_kw={"hspace": hspace})
			
 
				+    df = df[df["config"] == "adaptive"]
			
 
				+    df = df.rename(columns={
			
 
				+        "ckpt_trig": "trig",
			
 
				+        "ckpt_exec": "exec",
			
 
				+    })
			
 
				+    df = df.drop(columns=["config", "time_taken"])
			
 
				+    for i, benchmark in enumerate(benchmarks):
			
 
				+        d = df[df["bench_name"] == benchmark].reset_index()
			
 
				+        print(d)
			
 
				+
			
 
				+        id_vars = ["index", "bench_name"]
			
 
				+        value_vars = ["trig", "exec"]
			
 
				+        d = d.melt(id_vars=id_vars, value_vars=value_vars, value_name="count", var_name="type")
			
 
				+        # d = d.reset_index().melt(id_vars=["bench_name"], var_name="config", value_name="normalized")
			
 
				         print(d)
			
 
				 
			
 
				         ax = axes.reshape(-1)[i]
			
 
				         sns.lineplot(
			
 
				             data=d,
			
 
				             x="index",
			
 
				-            y=y,
			
 
				-            hue="config",
			
 
				-            ax=ax
			
 
				+            y="count",
			
 
				+            hue="type",
			
 
				+            ax=ax,
			
 
				         )
			
 
				-        ax.set_title(key[0])
			
 
				+        ax.set_title(benchmark)
			
 
				+        ax.set_xlabel("")
			
 
				+        ax.set_ylabel("")
			
 
				+        if i / 4 >= 1:
			
 
				+            ax.set_xlabel("Execution #")
			
 
				+        if i in [0, 4]:
			
 
				+            ax.set_ylabel("Count (k)")
			
 
				+        ax.yaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, pos: f"{x/1000:.0f}"))
			
 
				+        ax.yaxis.grid(visible=True, which="both")
			
 
				+        ax.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(3))
			
 
				+        ax.yaxis.set_minor_locator(matplotlib.ticker.AutoMinorLocator(3))
			
 
				+
			
 
				+        ax.xaxis.set_minor_locator(matplotlib.ticker.AutoMinorLocator(2))
			
 
				+        ax.xaxis.grid(visible=True, which="both")
			
 
				+
			
 
				+        if i == 0:
			
 
				+            ax.legend(
			
 
				+                ncol=1, loc="upper left", bbox_to_anchor=(0.33, 0.995), labelspacing=0.3,
			
 
				+                handlelength=1,
			
 
				+            )
			
 
				+            # ax.get_legend().remove()
			
 
				+        else:
			
 
				+            ax.get_legend().remove()
			
 
				+
			
 
				+    return fig
			
 
				 
			
 
				 
			
 
				 def draw_graph():
			
 
				     orig_df = get_base_df()
			
 
				-    df = orig_df.groupby(["bench_name", "config"]).mean()
			
 
				-    df = df.reset_index()
			
 
				+    # df = orig_df.groupby(["bench_name", "config"]).mean()
			
 
				+    df = orig_df.reset_index()
			
 
				 
			
 
				-    # df = df.pivot(index="bench_name", columns="config", values="time_taken")
			
 
				 
			
 
				-    # df2 = pd.DataFrame()
			
 
				-    # df2["PC"] = df["PC"] / df["Baseline"]
			
 
				-    # df2["VC"] = df["VC"] / df["Baseline"]
			
 
				+    err_kws = {
			
 
				+        "linewidth": 1.2,
			
 
				+        "color": "black",
			
 
				+    }
			
 
				 
			
 
				-    # df2 = df2.reset_index().melt(id_vars=["bench_name"], var_name="config", value_name="normalized")
			
 
				-    # df2 = df2.sort_values(by="bench_name")
			
 
				-    # df2 = df2[df2["config"] == "PC"]
			
 
				-    # df2 = df2[df2["config"] == "VC"]
			
 
				-    # df2 = df2[df2["bench_name"] == "vFFT"]
			
 
				+    rc = {
			
 
				+        "legend.fontsize": 24,
			
 
				+    }
			
 
				+
			
 
				+    plot_utils.set_theme_seaborn(rc)
			
 
				 
			
 
				-    plot_utils.set_theme_seaborn()
			
 
				     g = sns.catplot(
			
 
				         data=df,
			
 
				         kind="bar",
			
 
				         x="bench_name",
			
 
				-        y = "time_taken",
			
 
				+        y="time_taken",
			
 
				         hue="config",
			
 
				-        aspect=2.3
			
 
				+        legend="brief",
			
 
				+        legend_out=False,
			
 
				+        aspect=2.5,
			
 
				+        errorbar=lambda x: (x.min(), x.max()),
			
 
				+        hue_order=config_order,
			
 
				+        order=bench_order,
			
 
				+        err_kws=err_kws,
			
 
				+        capsize=0.2,
			
 
				     )
			
 
				+    ax = g.ax
			
 
				+    ax.set_ylim([0, 35])
			
 
				+    ax.set_xlabel("Benchmark")
			
 
				+    ax.set_ylabel("Time (s)")
			
 
				+    g.ax.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(6))
			
 
				+
			
 
				+    ax = g.facet_axis(0, 0)
			
 
				+
			
 
				+    for i, c in enumerate(ax.containers):
			
 
				+        labels = [f"{(v.get_height()):.0f}" if v.get_height() > 50 else "" for v in c]
			
 
				+        padding = 125
			
 
				+        ax.bar_label(c, labels=labels, label_type="center", padding=padding, size=16)
			
 
				+
			
 
				+    num_categories = len(df.bench_name.unique())
			
 
				+    num_hues = len(df.config.unique())
			
 
				+    plot_utils.draw_hatch(ax, num_categories, num_hues)
			
 
				+    
			
 
				+    ax.legend(ncol=1, loc="upper right", bbox_to_anchor=(0.265, 0.995), labelspacing=0.3)
			
 
				+
			
 
				+    sns.despine(g.fig, right=False, top=False)
			
 
				 
			
 
				     return g
			
 
				+
			
 
				+def draw_checkpoint_frequency_comparison():
			
 
				+    orig_df = get_base_df()
			
 
				+    df = remove_error_stats(orig_df)
			
 
				+    df = df.drop(columns=["time_taken"])
			
 
				+    df = df.groupby(["bench_name", "config"]).mean()
			
 
				+    df = df.reset_index()
			
 
				+
			
 
				+    print(df)
			
 
				+    base_df = df
			
 
				+    adaptive_df = base_df[base_df["config"] == "adaptive"]
			
 
				+    adaptive_df = adaptive_df.set_index("bench_name")
			
 
				+
			
 
				+    rc = {
			
 
				+        "legend.fontsize": 24,
			
 
				+    }
			
 
				+    plot_utils.set_theme_seaborn(rc)
			
 
				+
			
 
				+    benchmarks = bench_order
			
 
				+    fig_size = (10, 6.2)
			
 
				+    n_rows = 2
			
 
				+    n_cols = 1
			
 
				+    hspace = 0.05
			
 
				+    fig = plt.figure(figsize=fig_size)
			
 
				+    axes = fig.subplots(n_rows, n_cols, squeeze=True, sharex=False, gridspec_kw={"hspace": hspace})
			
 
				+
			
 
				+    configs = ["counter", "unroll"]
			
 
				+    for i, config in enumerate(configs):
			
 
				+        d = base_df[base_df["config"] == config]
			
 
				+        d = d.set_index("bench_name")
			
 
				+
			
 
				+        df2 = pd.DataFrame()
			
 
				+        df2["trig"] = adaptive_df["ckpt_trig"] / d["ckpt_trig"]
			
 
				+        df2["exec"] = adaptive_df["ckpt_exec"] / d["ckpt_exec"]
			
 
				+        df2 = df2.reset_index()
			
 
				+        df2 = df2.melt(id_vars=["bench_name"], value_vars=["trig", "exec"], var_name="type", value_name="normalized")
			
 
				+
			
 
				+        print(axes)
			
 
				+        ax = axes[i]
			
 
				+
			
 
				+        g = sns.barplot(
			
 
				+            data=df2,
			
 
				+            x="bench_name",
			
 
				+            y="normalized",
			
 
				+            hue="type",
			
 
				+            legend="brief",
			
 
				+            order=bench_order,
			
 
				+            ax = ax,
			
 
				+        )
			
 
				+        ax.set_title(f"vs. {config}", fontsize=22)
			
 
				+        ax.set_ylim([0, 0.8])
			
 
				+        ax.set_xlabel("Benchmark")
			
 
				+        ax.set_ylabel("Ratio")
			
 
				+        # ax.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(6))
			
 
				+        ax.yaxis.set_minor_locator(matplotlib.ticker.AutoMinorLocator(5))
			
 
				+        ax.yaxis.grid(visible=True, which="both")
			
 
				+
			
 
				+        for c in ax.containers:
			
 
				+            labels = [f"{(v.get_height()):.2f}" if v.get_height() > 0.8 else "" for v in c]
			
 
				+            padding = 80
			
 
				+            ax.bar_label(c, labels=labels, label_type="center", padding=padding, size=16)
			
 
				+
			
 
				+        num_categories = len(df.bench_name.unique())
			
 
				+        num_hues = len(df.config.unique())
			
 
				+        plot_utils.draw_hatch(ax, num_categories, num_hues)
			
 
				+
			
 
				+        if i == 0:
			
 
				+            ax.set_xlabel("")
			
 
				+            ax.legend(ncol=1, loc="upper right", bbox_to_anchor=(1, 1), labelspacing=0.3)
			
 
				+        else:
			
 
				+            ax.get_legend().remove()
			
 
				+
			
 
				+        sns.despine(fig, right=False, top=False)
			
 
				+
			
 
				+    return fig
			
--- a/imc/exprs/date2025/3_adaptive/run_expr_3.py
+++ b/imc/exprs/date2025/3_adaptive/run_expr_3.py
@@ -29,6 +29,10 @@ def get_build_config(benchmark, config_name):
 
				         # config.enable_static_loop_pass_count = True
			
 
				         # config.loop_pass_count = 50
			
 
				         config.max_loop_ids = 30
			
 
				+    
			
 
				+    if config_name == "unroll":
			
 
				+        config.use_checkpoint_voltage_check = True
			
 
				+        config.custom_unroll = True
			
 
				 
			
 
				     return config
			
 
				 
			
@@ -47,11 +51,11 @@ def main():
 
				         "vAes",
			
 
				     ]
			
 
				     # benchmarks = [
			
 
				-    #     "vBasicMath"
			
 
				+    #     "vAes"
			
 
				     # ]
			
 
				 
			
 
				-    configs = ["pass_count", "adaptive"]
			
 
				-    # configs = ["adaptive"]
			
 
				+    configs = ["pass_count", "adaptive", "unroll"]
			
 
				+    # configs = ["adaptive", "unroll"]
			
 
				 
			
 
				     total_iterations = 20
			
 
				 
			
@@ -105,6 +109,7 @@ def get_default_build_config():
 
				     config.enable_static_loop_pass_count = False
			
 
				     config.enable_adaptive_loop_pass_count = False
			
 
				     config.print_stats = True
			
 
				+    config.custom_unroll = False
			
 
				     return config
			
 
				 
			
 
				 
			
--- a/imc/exprs/date2025/5_size_overhead/draw_graph_expr_5.ipynb
+++ b/imc/exprs/date2025/5_size_overhead/draw_graph_expr_5.ipynb
--- a/imc/exprs/date2025/5_size_overhead/draw_graph_expr_5.py
+++ b/imc/exprs/date2025/5_size_overhead/draw_graph_expr_5.py
@@ -0,0 +1,115 @@
 
				+import pickle
			
 
				+import os
			
 
				+import pandas as pd
			
 
				+import seaborn as sns
			
 
				+import matplotlib.pyplot as plt
			
 
				+
			
 
				+import plot_utils
			
 
				+
			
 
				+
			
 
				+def validate_and_cleanup_df(df):
			
 
				+    # rename_dict = {
			
 
				+    #     "checkpoint_enabled": "ckpt",
			
 
				+    #     "voltage_check_enabled": "VC",
			
 
				+    #     "pass_counter_enabled": "PC"
			
 
				+    # }
			
 
				+    # df = df.rename(columns=rename_dict)
			
 
				+
			
 
				+    mask = df["is_correct"] == False
			
 
				+    if mask.any():
			
 
				+        print("drop rows with incorrect output")
			
 
				+        print(df[mask])
			
 
				+
			
 
				+    df = df[~mask]
			
 
				+    assert df["is_correct"].all()
			
 
				+
			
 
				+    # def category_mapper(row):
			
 
				+    #     if row["ckpt"] == False:
			
 
				+    #         return "Baseline"
			
 
				+    #     else:
			
 
				+    #         if row["VC"] == False:
			
 
				+    #             assert(row["PC"] == True)
			
 
				+    #             return "PC"
			
 
				+    #         else:
			
 
				+    #             assert(row["PC"] == False)
			
 
				+    #             return "VC"
			
 
				+
			
 
				+    # category_column_name = "config"
			
 
				+    # df[category_column_name] = df.apply(category_mapper, axis=1)
			
 
				+
			
 
				+    def process_stats(row):
			
 
				+        lines = row["stats"]
			
 
				+        result = {}
			
 
				+        for line in lines:
			
 
				+            key, val = line.split(":")
			
 
				+            val = int(val.strip())
			
 
				+            if key.startswith("checkpoint triggered"):
			
 
				+                result["ckpt_trig"] = val
			
 
				+            if key.startswith("checkpoint executed"):
			
 
				+                result["ckpt_exec"] = val
			
 
				+        return result
			
 
				+
			
 
				+    df_stats = df.apply(process_stats, axis=1, result_type="expand")
			
 
				+    df = pd.concat([df, df_stats], axis=1)
			
 
				+
			
 
				+    drop_columns = [
			
 
				+        "start",
			
 
				+        "end",
			
 
				+        "recovery",
			
 
				+        "outputs",
			
 
				+        "stats",
			
 
				+        # "ckpt",
			
 
				+        # "VC",
			
 
				+        # "PC",
			
 
				+        "is_correct",
			
 
				+    ]
			
 
				+    df = df.drop(columns=drop_columns)
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def get_base_df():
			
 
				+    benchmarks = [
			
 
				+        "vBasicMath",
			
 
				+        "vCrc",
			
 
				+        "vFFT",
			
 
				+        "vSha",
			
 
				+        "vStringSearch",
			
 
				+        "vMatMul",
			
 
				+        "vConv2d",
			
 
				+        "vAes",
			
 
				+    ]
			
 
				+    benchmarks = [
			
 
				+        "vStringSearch"
			
 
				+    ]
			
 
				+    output_dir = "/home/ybkim/workspace/imc/imc_freertos_app_m33/imc/exprs/date2025/5_size_overhead/output"
			
 
				+
			
 
				+    all_dfs = []
			
 
				+
			
 
				+    for benchmark in benchmarks:
			
 
				+        pickle_filename = f"{output_dir}/{benchmark}.pickle"
			
 
				+        if not os.path.exists(pickle_filename):
			
 
				+            print(f"pass loading {benchmark}")
			
 
				+            continue
			
 
				+        with open(pickle_filename, "rb") as f:
			
 
				+            df = pickle.load(f)
			
 
				+            mask = df["config"] == "original"
			
 
				+            base = int(df[mask]["text"].iloc[0])
			
 
				+            df["normalized_text"] = df["text"] / base
			
 
				+            df = df[~mask]
			
 
				+            all_dfs.append(df)
			
 
				+
			
 
				+    orig_df = pd.concat(all_dfs)
			
 
				+    return orig_df
			
 
				+
			
 
				+
			
 
				+def draw_graph():
			
 
				+    orig_df = get_base_df()
			
 
				+    mask = orig_df["config"] != "original"
			
 
				+    df = orig_df[mask]
			
 
				+
			
 
				+    plot_utils.set_theme_seaborn()
			
 
				+    g = sns.catplot(
			
 
				+        data=df, kind="bar", x="benchmark", y="normalized_text", hue="config", aspect=2.3
			
 
				+    )
			
 
				+
			
 
				+    return g
			
--- a/imc/exprs/date2025/5_size_overhead/run_expr_5.py
+++ b/imc/exprs/date2025/5_size_overhead/run_expr_5.py
@@ -0,0 +1,145 @@
 
				+import tempfile
			
 
				+import time
			
 
				+import pickle
			
 
				+import pandas as pd
			
 
				+import os
			
 
				+import subprocess
			
 
				+import pprint
			
 
				+
			
 
				+from imc_utils.pps_e36311a import PPS_E36311A
			
 
				+from imc_utils.build_config.cortex_m33 import BuildConfigM33
			
 
				+from imc_utils.build_config.test_env import TestEnv
			
 
				+from imc_utils.serial_watch import SerialWatcher
			
 
				+
			
 
				+WORKSPACE_ROOT = "/home/ybkim/workspace/imc/imc_freertos_app_m33"
			
 
				+NVM_RESET_BIN = f"{WORKSPACE_ROOT}/imc/utils/nvm_reset.elf"
			
 
				+OPENOCD_SCRIPT = f"{WORKSPACE_ROOT}/imc_freertos_app_m33.cfg"
			
 
				+
			
 
				+
			
 
				+def get_build_config(benchmark, config_name):
			
 
				+    config = get_default_build_config()
			
 
				+    config.bench_name = benchmark
			
 
				+    bench_repeat_count = config.bench_repeat_count_small[benchmark]
			
 
				+    config.bench_repeat_count = bench_repeat_count
			
 
				+
			
 
				+    if config_name == "original":
			
 
				+        config.insert_compiler_checkpoints = False
			
 
				+
			
 
				+    if config_name == "pass_count":
			
 
				+        config.use_checkpoint_pass_counter = True
			
 
				+        config.checkpoint_pass_count = config.pass_count_10ms[benchmark]
			
 
				+
			
 
				+    if config_name == "adaptive":
			
 
				+        config.use_checkpoint_voltage_check = True
			
 
				+        config.split_loop = True
			
 
				+        config.enable_adaptive_loop_pass_count = True
			
 
				+        config.max_loop_ids = 30
			
 
				+    
			
 
				+    if config_name == "unroll":
			
 
				+        config.custom_unroll = True
			
 
				+
			
 
				+    return config
			
 
				+
			
 
				+
			
 
				+def get_flash_size(binary):
			
 
				+    flash_sections = [
			
 
				+        ".isr_vector",
			
 
				+        ".text",
			
 
				+        ".rodata",
			
 
				+        ".ARM.extab",
			
 
				+        ".ARM",
			
 
				+        ".preinit_array",
			
 
				+        ".init_array",
			
 
				+        ".fini_array",
			
 
				+        ".data"
			
 
				+    ]
			
 
				+    total = 0
			
 
				+    text = 0
			
 
				+    inst = f"objdump -h {binary}".split()
			
 
				+    output = subprocess.run(inst, capture_output=True)
			
 
				+    lines = output.stdout.decode().split("\n")
			
 
				+    for line in lines:
			
 
				+        tokens = line.strip().split()
			
 
				+        if len(tokens) < 1:
			
 
				+            continue
			
 
				+        if tokens[0].isdigit():
			
 
				+            section, size_in_hex = tokens[1], tokens[2]
			
 
				+            size = int(size_in_hex, base=16)
			
 
				+            if section in flash_sections:
			
 
				+                print(section, size_in_hex, size)
			
 
				+                total += size
			
 
				+                if section == ".text":
			
 
				+                    text = size
			
 
				+    return total, text
			
 
				+    
			
 
				+
			
 
				+def main():
			
 
				+    pps = PPS_E36311A()
			
 
				+    config = get_default_build_config()
			
 
				+
			
 
				+    benchmarks = [
			
 
				+        "vBasicMath",
			
 
				+        "vCrc",
			
 
				+        "vFFT",
			
 
				+        "vSha",
			
 
				+        "vStringSearch",
			
 
				+        "vMatMul",
			
 
				+        "vConv2d",
			
 
				+        "vAes",
			
 
				+    ]
			
 
				+    benchmarks = [
			
 
				+        "vAes"
			
 
				+    ]
			
 
				+
			
 
				+    configs = ["original", "pass_count", "adaptive", "unroll"]
			
 
				+    # configs = ["pass_count"]
			
 
				+
			
 
				+    for benchmark in benchmarks:
			
 
				+        all_records = []
			
 
				+        for config_name in configs:
			
 
				+            config = get_build_config(benchmark, config_name)
			
 
				+
			
 
				+            env = TestEnv(WORKSPACE_ROOT, NVM_RESET_BIN, OPENOCD_SCRIPT)
			
 
				+
			
 
				+            with tempfile.TemporaryDirectory() as build_dir:
			
 
				+                binary = env.build_binary(config, build_dir)
			
 
				+                size, text_size = get_flash_size(binary)
			
 
				+                # size = os.path.getsize(binary)
			
 
				+            
			
 
				+            record = {
			
 
				+                "benchmark": benchmark,
			
 
				+                "config": config_name,
			
 
				+                "flash": size,
			
 
				+                "text": text_size
			
 
				+            }
			
 
				+
			
 
				+            all_records.append(record)
			
 
				+
			
 
				+        df = pd.DataFrame(all_records)
			
 
				+        print(df)
			
 
				+        save_records(benchmark, config_name, df)
			
 
				+
			
 
				+
			
 
				+def get_default_build_config():
			
 
				+    config = BuildConfigM33()
			
 
				+    config.insert_compiler_checkpoints = True
			
 
				+    config.enable_extension = True
			
 
				+    config.use_checkpoint_pass_counter = False
			
 
				+    config.use_checkpoint_voltage_check = False
			
 
				+    config.bench_infinite_loop = True
			
 
				+    config.print_recovery_message = True
			
 
				+    config.split_loop = False
			
 
				+    config.enable_static_loop_pass_count = False
			
 
				+    config.enable_adaptive_loop_pass_count = False
			
 
				+    config.print_stats = True
			
 
				+    config.custom_unroll = False
			
 
				+    return config
			
 
				+
			
 
				+
			
 
				+def save_records(bench_name, config_name, df):
			
 
				+    with open(f"output/{bench_name}.pickle", "wb") as f:
			
 
				+        pickle.dump(df, f)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/imc/exprs/date2025/5_size_overhead/temp.c
+++ b/imc/exprs/date2025/5_size_overhead/temp.c
@@ -0,0 +1,4 @@
 
				+#define DO_PRAGMA(x) _Pragma ( #x )
			
 
				+#define IMC_UNROLL(x) DO_PRAGMA(clang loop unroll_count(x))
			
 
				+
			
 
				+IMC_UNROLL(10)
			
--- a/imc_extension.cmake
+++ b/imc_extension.cmake
@@ -7,6 +7,7 @@ set(IMC_MAX_LOOP_IDS 1 CACHE STRING "")
 
				 set(IMC_LOOP_PASS_COUNT 10 CACHE BOOL "")
			
 
				 set(IMC_BENCH_REPEAT_COUNT 1 CACHE BOOL "")
			
 
				 set(IMC_PRINT_STATS 0 CACHE BOOL "")
			
 
				+set(IMC_CUSTOM_UNROLL 0 CACHE BOOL "")
			
 
				 
			
 
				 set(AVAILABLE_BENCHES "vBasicMath" "vStringSearch" "vFFT" "vSha" "vCrc" "vMatMul" "vConv2d" "adc_demo" "vAes")
			
 
				 if(NOT IMC_BENCH_NAME IN_LIST AVAILABLE_BENCHES)
			
@@ -19,6 +20,7 @@ endif()
 
				 
			
 
				 add_compile_definitions(imcBENCH_NAME=${IMC_BENCH_NAME})
			
 
				 add_compile_definitions(imcBENCH_REPEAT_COUNT=${IMC_BENCH_REPEAT_COUNT})
			
 
				+add_compile_definitions(imcCUSTOM_UNROLL=${IMC_CUSTOM_UNROLL})
			
 
				 
			
 
				 if(IMC_BENCH_INFINITE_LOOP)
			
 
				     add_compile_definitions(imcBENCH_INFINITE_LOOP=1)
			
@@ -161,6 +163,11 @@ function(compile_imc_files)
 
				     set(IMC_OPT_PASSES "")
			
 
				     set(PLUGINS "")
			
 
				 
			
 
				+    if(IMC_CUSTOM_UNROLL)
			
 
				+        list(APPEND IMC_OPT_PASSES "imc-loop-unroll")
			
 
				+        list(APPEND PLUGINS /home/ybkim/workspace/imc/loop_duplication/build/lib/libImcLoopUnroll.so)
			
 
				+    endif()
			
 
				+
			
 
				     if(IMC_SPLIT_LOOP)
			
 
				         list(APPEND IMC_OPT_PASSES "imc-loop-opt")
			
 
				         list(APPEND PLUGINS /home/ybkim/workspace/imc/loop_duplication/build/lib/libImcLoopOpt.so)
			
@@ -183,6 +190,7 @@ function(compile_imc_files)
 
				 
			
 
				     set(ADDITIONAL_DEFS
			
 
				         "-D imcBENCH_REPEAT_COUNT=${IMC_BENCH_REPEAT_COUNT}"
			
 
				+        "-D imcCUSTOM_UNROLL=${IMC_CUSTOM_UNROLL}"
			
 
				     )
			
 
				 
			
 
				     foreach(APP_SRC ${IMC_APP_FILES})
			
--- a/setup_build_dir.py
+++ b/setup_build_dir.py
@@ -10,19 +10,20 @@ OPENOCD_SCRIPT = f"{WORKSPACE_ROOT}/imc_freertos_app_m33.cfg"
 
				 BUILD_DIR = f"{WORKSPACE_ROOT}/build"
			
 
				 
			
 
				 config = BuildConfigM33()
			
 
				-config.bench_name = "vSha"
			
 
				+config.bench_name = "vFFT"
			
 
				 config.insert_compiler_checkpoints = True
			
 
				 config.enable_extension = True
			
 
				 config.use_checkpoint_pass_counter = False
			
 
				 config.use_checkpoint_voltage_check = True
			
 
				 config.bench_infinite_loop = True
			
 
				-config.split_loop = True
			
 
				+config.split_loop = False
			
 
				 config.loop_pass_count = 30
			
 
				 config.enable_static_loop_pass_count = False
			
 
				-config.enable_adaptive_loop_pass_count = True
			
 
				+config.enable_adaptive_loop_pass_count = False
			
 
				 config.max_loop_ids = 30
			
 
				 config.bench_repeat_count = config.bench_repeat_count_small[config.bench_name]
			
 
				 config.print_stats = True
			
 
				+config.custom_unroll = True
			
 
				 
			
 
				 env = TestEnv(WORKSPACE_ROOT, NVM_RESET_BIN, OPENOCD_SCRIPT)