Explorar el Código

Add loop unrolling experiments

Youngbin Kim hace 1 año
padre
commit
d12b84dcf5

+ 2 - 1
.gitignore

@@ -8,4 +8,5 @@ imc_freertos_app_m33.launch
 build/
 build.sh
 imc/exprs/**/output**
-__pycache__
+__pycache__
+imc/exprs/**/*.pdf

+ 1 - 0
CMakeLists.txt

@@ -225,6 +225,7 @@ else()
     endif()
 endif()
 
+target_link_options(imc_freeRTOS PRIVATE "LINKER:-gc-sections")
 if(IMC_APP_FILES)
     target_link_libraries(imc_freeRTOS PRIVATE imc_apps c m nosys gcc)
 else()

+ 14 - 0
Core/Inc/ImC/imc_extension.h

@@ -37,12 +37,26 @@
     #define imcPRINT_STATS 0
 #endif
 
+#ifndef imcCUSTOM_UNROLL
+    #define imcCUSTOM_UNROLL 0 
+#endif
+
 #define IMC_KERNEL_NVM __attribute__((section(".kernel_nvm")))
 
 #define imcCAP_VOL_HIGH 4800
 #define imcCAP_VOL_LOW 3500
 #define imcENERGY_TOTAL (imcCAP_VOL_HIGH*imcCAP_VOL_HIGH - imcCAP_VOL_LOW*imcCAP_VOL_LOW)
 
+#if (imcCUSTOM_UNROLL)
+// #define STRINGIFY(x) ("clang loop unroll_count(" ##str ")")
+#define DO_PRAGMA(x) _Pragma ( #x )
+#define IMC_UNROLL(x) DO_PRAGMA( clang loop unroll_count(x+1) )
+#define IMC_UNROLL_FULL() DO_PRAGMA( clang loop unroll(full) )
+#else
+#define IMC_UNROLL(x) 
+#define IMC_UNROLL_FULL()
+#endif
+
 int __imc_get_loop_pass_count(int loop_id);
 void __imc_finish_loop_skipping();
 

+ 4 - 0
Core/Src/benchmarks/aes/aes.c

@@ -262,6 +262,7 @@ void AES_CBC_encrypt_buffer(struct AES_ctx *ctx, uint8_t *buf, size_t length)
 void vAes() {
     // int IMC_REPEAT = 1000;
     int IMC_REPEAT = imcBENCH_REPEAT_COUNT;
+    IMC_UNROLL(1)
     for(int imc=0; imc < IMC_REPEAT; imc++) {
         uint8_t key[] = {0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c};
         uint8_t out[] = {0x76, 0x49, 0xab, 0xac, 0x81, 0x19, 0xb2, 0x46, 0xce, 0xe9, 0x8e, 0x9b, 0x12, 0xe9, 0x19, 0x7d,
@@ -288,6 +289,7 @@ void vAes() {
             uint8_t tempa[4]; // Used for the column/row operations
 
             // The first round key is the key itself.
+            IMC_UNROLL(Nk)
             for (i = 0; i < Nk; ++i)
             {
                 RoundKey[(i * 4) + 0] = Key[(i * 4) + 0];
@@ -297,6 +299,7 @@ void vAes() {
             }
 
             // All other round keys are found from the previous round keys.
+            IMC_UNROLL(Nb * (Nr + 1) - Nk)
             for (i = Nk; i < Nb * (Nr + 1); ++i)
             {
                 {
@@ -354,6 +357,7 @@ void vAes() {
                 // XorWithIv(buf, Iv);
                 {
                     uint8_t j;
+                    IMC_UNROLL(AES_BLOCKLEN)
                     for (j = 0; j < AES_BLOCKLEN; ++j) // The block in AES is always 128bit no matter the key size
                     {
                         // buf[j] ^= Iv[j];

+ 2 - 0
Core/Src/benchmarks/basicmath/basicmath.c

@@ -38,8 +38,10 @@ void vBasicMath()
       // imcREQUEST_CHECKPOINT();
       for (b1 = 10; b1 > 0; b1--)
       {
+        IMC_UNROLL(2)
         for (c1 = 5; c1 < 15; c1 += 0.5)
         {
+          IMC_UNROLL(10)
           for (d1 = -1; d1 > -11; d1--)
           {
             SolveCubic(a1, b1, c1, d1, &solutions, x);

+ 4 - 10
Core/Src/benchmarks/conv2d/conv2d.c

@@ -19,25 +19,19 @@ void vConv2d()
 
     int output[OUTPUT_SIZE * OUTPUT_SIZE];
 
-// #pragma clang loop unroll(disable)
+    IMC_UNROLL(8)
     for (int imc_repeat = 0; imc_repeat < IMC_REPEAT; imc_repeat++)
     {
 
-// #pragma clang loop unroll(disable)
-// for(int i=0; i<OUTPUT_SIZE; i++) {
-//     #pragma clang loop unroll(disable)
-//     for(int j=0; j<OUTPUT_SIZE; j++) {
-
-// #pragma clang loop unroll(disable)
         for (int _i = 0; _i < OUTPUT_SIZE * OUTPUT_SIZE; _i++)
         {
             int i = _i / OUTPUT_SIZE;
             int j = _i % OUTPUT_SIZE;
             int sum = 0;
-            // #pragma clang loop unroll(disable)
+            IMC_UNROLL(KERNEL_SIZE)
             for (int ii = 0; ii < KERNEL_SIZE; ii++)
             {
-                // #pragma clang loop unroll(disable)
+                IMC_UNROLL(KERNEL_SIZE)
                 for (int jj = 0; jj < KERNEL_SIZE; jj++)
                 {
                     int input_i = i + ii;
@@ -53,7 +47,7 @@ void vConv2d()
 
     int sum = 0;
 
-#pragma clang loop unroll(disable)
+// #pragma clang loop unroll(disable)
     for (int _i = 0; _i < OUTPUT_SIZE * OUTPUT_SIZE; _i++)
     {
         printf("%d ", output[_i]);

+ 1 - 0
Core/Src/benchmarks/crc/crc_32.c

@@ -199,6 +199,7 @@ void vCrc()
             // printf("%d\r\n", &_binary_small_modified_pcm_start);
             // printf("%d\r\n", &_binary_small_modified_pcm_end);
 
+            IMC_UNROLL(9)
             for (int i = 0; i < size; i += 1)
             {
                   c = input[i];

+ 6 - 0
Core/Src/benchmarks/fft/fft.c

@@ -44,22 +44,28 @@ void vFFT()
     float coeff[MAXWAVES];
     float amp[MAXWAVES];
 
+    // IMC_UNROLL(1)
     for (int imc_repeat=0; imc_repeat < IMC_REPEAT; imc_repeat++) {
 
     /* Makes MAXWAVES waves of random amplitude and period */
+    IMC_UNROLL(4)
     for (i = 0; i < MAXWAVES; i++)
     {
         // coeff[i] = rand() % 1000;
         // amp[i] = rand() % 1000;
         coeff[i] = 123;
         amp[i] = 456;
+        // coeff[i] = i;
+        // amp[i] = i+1;
     }
+    IMC_UNROLL(4)
     for (i = 0; i < MAXSIZE; i++)
     {
         /*   RealIn[i]=rand();*/
         RealIn[i] = 0;
         ImagIn[i] = 0;
         float sum = 0;
+        IMC_UNROLL(4)
         for (j = 0; j < MAXWAVES; j++)
         {
             /* randomly select sin or cos */

+ 5 - 1
Core/Src/benchmarks/matmul/matmul.c

@@ -13,12 +13,16 @@ void vMatMul() {
     // int IMC_REPEAT = 2000;
     int IMC_REPEAT = imcBENCH_REPEAT_COUNT;
     int C[MAT_SIZE * MAT_SIZE] = { 0 };
+
     for(int imc=0; imc<IMC_REPEAT; imc++) {
 
-        for(int _i=0; _i<MAT_SIZE*MAT_SIZE; _i++) {
+        IMC_UNROLL(3)
+        for (int _i = 0; _i < MAT_SIZE * MAT_SIZE; _i++)
+        {
             int sum = 0;
             int i = _i / MAT_SIZE;
             int j = _i % MAT_SIZE;
+            IMC_UNROLL(MAT_SIZE)
             for(int k=0; k<MAT_SIZE; k++) {
                 sum += A[i][k] * B[k][j];
             }

+ 2 - 0
Core/Src/benchmarks/sha/sha.c

@@ -49,9 +49,11 @@ static void sha_transform(SHA_INFO *sha_info)
     int i;
     LONG temp, A, B, C, D, E, W[80];
 
+    IMC_UNROLL(16)
     for (i = 0; i < 16; ++i) {
 	W[i] = sha_info->data[i];
     }
+    IMC_UNROLL(64)
     for (i = 16; i < 80; ++i) {
 	W[i] = W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16];
 #ifdef USE_MODIFIED_SHA

+ 1 - 0
Core/Src/benchmarks/stringsearch/stringsearch_local.c

@@ -109,6 +109,7 @@ void vStringSearch()
 	  
 	//   int IMC_REPEAT = 5;
 	  int IMC_REPEAT = imcBENCH_REPEAT_COUNT;
+	  IMC_UNROLL(3)
 	  for(int imc_repeat=0; imc_repeat < IMC_REPEAT; imc_repeat++) {
 
       for (i = 0; find_strings[i]; i++)

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 66 - 206
imc/exprs/date2025/3_adaptive/draw_graph_expr_3.ipynb


+ 237 - 67
imc/exprs/date2025/3_adaptive/draw_graph_expr_3.py

@@ -3,38 +3,38 @@ import os
 import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt
+import matplotlib.ticker
+from scipy import stats
+import numpy as np
 
 import plot_utils
 
-def validate_and_cleanup_df(df):
-    # rename_dict = {
-    #     "checkpoint_enabled": "ckpt",
-    #     "voltage_check_enabled": "VC",
-    #     "pass_counter_enabled": "PC"
-    # }
-    # df = df.rename(columns=rename_dict)
+config_order = ["counter", "unroll", "adaptive"]  
+bench_order = ["fft", "basicmath", "crc", "sha", "stringsearch", "aes", "conv2d", "matmul"]
 
-    mask = df["is_correct"] == False
-    if mask.any():
-        print("drop rows with incorrect output")
-        print(df[mask])
+def validate_and_cleanup_df(df, config_name):
 
-    df = df[~mask]
-    assert(df["is_correct"].all())
-
-    # def category_mapper(row):
-    #     if row["ckpt"] == False:
-    #         return "Baseline"
-    #     else:
-    #         if row["VC"] == False:
-    #             assert(row["PC"] == True)
-    #             return "PC"
-    #         else:
-    #             assert(row["PC"] == False)
-    #             return "VC"
-
-    # category_column_name = "config"
-    # df[category_column_name] = df.apply(category_mapper, axis=1)
+    # mask = df["is_correct"] == False
+    # if mask.any():
+    #     print(f"({config_name}) drop {mask.sum()} rows with incorrect output")
+    # df = df[~mask]
+    # assert(df["is_correct"].all())
+    
+    bench_name = df.iloc[0,0]
+    z_score = stats.zscore(df["time_taken"])
+    mask2 = np.abs(z_score) > 4.35
+    if mask2.any():
+        print(f"({bench_name}, {config_name}) remove {mask2.sum()} outliers")
+        time = float(df[mask2]["time_taken"].iloc[0])
+        average = df["time_taken"].mean()
+        print(f"time: {time:.2f}, average: {average:.2f}")
+
+    df = df[~mask2].copy()
+
+    def rename_bench_name(row):
+        return row["bench_name"].lower()[1:]
+
+    df["bench_name"] = df.apply(rename_bench_name, axis=1)
 
     def process_stats(row):
         lines = row["stats"]
@@ -47,7 +47,7 @@ def validate_and_cleanup_df(df):
             if key.startswith("checkpoint executed"):
                 result["ckpt_exec"] = val
         return result
-    
+
     df_stats = df.apply(process_stats, axis=1, result_type="expand")
     df = pd.concat([df, df_stats], axis=1)
 
@@ -80,8 +80,8 @@ def get_base_df():
     output_dir = "/home/ybkim/workspace/imc/imc_freertos_app_m33/imc/exprs/date2025/3_adaptive/output"
 
     all_dfs = []
-    configs = ["pass_count", "adaptive"]
-    drop_index = list(range(0, 3))
+    configs = ["pass_count", "adaptive", "unroll"]
+    drop_index = list(range(0, 1))
     drop_index = None
 
     for benchmark in benchmarks:
@@ -94,73 +94,243 @@ def get_base_df():
                 orig_df = pickle.load(f)
                 if drop_index and config_name == "adaptive":
                     orig_df = orig_df.drop(drop_index)
-                df = validate_and_cleanup_df(orig_df)
+                df = validate_and_cleanup_df(orig_df, config_name)
                 df["config"] = config_name
                 all_dfs.append(df)
 
     orig_df = pd.concat(all_dfs)
+
+    def rename_config(row):
+        name = row["config"]
+        d = {
+            "adaptive": "adaptive",
+            "pass_count": "counter",
+            "unroll": "unroll"
+        }
+        return d[name]
+    orig_df["config"] = orig_df.apply(rename_config, axis=1)
+
     return orig_df
 
 
-def draw_checkpoint_count():
-    plot_utils.set_theme_seaborn(kind="line")
-    orig_df = get_base_df()
+def remove_error_stats(orig_df):
     mask = orig_df["ckpt_trig"].abs() > 1e7 
     df = orig_df[~mask]
-    mask = orig_df["ckpt_exec"].abs() > 1e6
-    df = orig_df[~mask]
-    benchmarks = list(df["bench_name"].unique())
-    print(benchmarks)
-    fig_size = (12, 3.5 * len(benchmarks))
-    n_rows = len(benchmarks)
-    n_cols = 1
-    hspace = 0.08
+    mask = df["ckpt_exec"].abs() > 1e6
+    df = df[~mask]
+    mask = df["ckpt_trig"] < 0
+    df = df[~mask]
+    mask = df["ckpt_exec"] < 0
+    df = df[~mask]
+    return df
+
+
+def draw_checkpoint_count():
+    rc = {
+        "lines.linewidth": 1.5,
+        "axes.titlepad": 10,
+        "ytick.major.pad": -4,
+        "ytick.labelsize": 20,
+        "legend.fontsize": 23,
+        "axes.labelsize": 25,
+    }
+    plot_utils.set_theme_seaborn(kind="line", rc_custom=rc)
+
+    orig_df = get_base_df()
+    # mask = orig_df["ckpt_trig"].abs() > 1e7 
+    # df = orig_df[~mask]
+    # mask = orig_df["ckpt_exec"].abs() > 1e6
+    # df = orig_df[~mask]
+    # mask = orig_df["ckpt_trig"] < 0
+    # df = orig_df[~mask]
+    # mask = orig_df["ckpt_exec"] < 0
+    # df = orig_df[~mask]
+    df = remove_error_stats(orig_df)
+
+    benchmarks = bench_order
+    fig_size = (13, 5.5)
+    n_rows = 2
+    n_cols = 4
+    hspace = 0.05
     fig = plt.figure(figsize=fig_size)
-    axes = fig.subplots(n_rows, n_cols, sharex=False, gridspec_kw={"hspace": hspace})
-    for (i, (key, d_orig)) in enumerate(df.groupby(["bench_name"])):
-        d = d_orig.reset_index()
-        # mask = d["config"] == "adaptive"
-        # d = d[mask]
-        y = "ckpt_exec"
-        # y = "ckpt_trig"
+    axes = fig.subplots(n_rows, n_cols, sharex=True, gridspec_kw={"hspace": hspace})
+    df = df[df["config"] == "adaptive"]
+    df = df.rename(columns={
+        "ckpt_trig": "trig",
+        "ckpt_exec": "exec",
+    })
+    df = df.drop(columns=["config", "time_taken"])
+    for i, benchmark in enumerate(benchmarks):
+        d = df[df["bench_name"] == benchmark].reset_index()
+        print(d)
+
+        id_vars = ["index", "bench_name"]
+        value_vars = ["trig", "exec"]
+        d = d.melt(id_vars=id_vars, value_vars=value_vars, value_name="count", var_name="type")
+        # d = d.reset_index().melt(id_vars=["bench_name"], var_name="config", value_name="normalized")
         print(d)
 
         ax = axes.reshape(-1)[i]
         sns.lineplot(
             data=d,
             x="index",
-            y=y,
-            hue="config",
-            ax=ax
+            y="count",
+            hue="type",
+            ax=ax,
         )
-        ax.set_title(key[0])
+        ax.set_title(benchmark)
+        ax.set_xlabel("")
+        ax.set_ylabel("")
+        if i / 4 >= 1:
+            ax.set_xlabel("Execution #")
+        if i in [0, 4]:
+            ax.set_ylabel("Count (k)")
+        ax.yaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, pos: f"{x/1000:.0f}"))
+        ax.yaxis.grid(visible=True, which="both")
+        ax.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(3))
+        ax.yaxis.set_minor_locator(matplotlib.ticker.AutoMinorLocator(3))
+
+        ax.xaxis.set_minor_locator(matplotlib.ticker.AutoMinorLocator(2))
+        ax.xaxis.grid(visible=True, which="both")
+
+        if i == 0:
+            ax.legend(
+                ncol=1, loc="upper left", bbox_to_anchor=(0.33, 0.995), labelspacing=0.3,
+                handlelength=1,
+            )
+            # ax.get_legend().remove()
+        else:
+            ax.get_legend().remove()
+
+    return fig
 
 
 def draw_graph():
     orig_df = get_base_df()
-    df = orig_df.groupby(["bench_name", "config"]).mean()
-    df = df.reset_index()
+    # df = orig_df.groupby(["bench_name", "config"]).mean()
+    df = orig_df.reset_index()
 
-    # df = df.pivot(index="bench_name", columns="config", values="time_taken")
 
-    # df2 = pd.DataFrame()
-    # df2["PC"] = df["PC"] / df["Baseline"]
-    # df2["VC"] = df["VC"] / df["Baseline"]
+    err_kws = {
+        "linewidth": 1.2,
+        "color": "black",
+    }
 
-    # df2 = df2.reset_index().melt(id_vars=["bench_name"], var_name="config", value_name="normalized")
-    # df2 = df2.sort_values(by="bench_name")
-    # df2 = df2[df2["config"] == "PC"]
-    # df2 = df2[df2["config"] == "VC"]
-    # df2 = df2[df2["bench_name"] == "vFFT"]
+    rc = {
+        "legend.fontsize": 24,
+    }
+
+    plot_utils.set_theme_seaborn(rc)
 
-    plot_utils.set_theme_seaborn()
     g = sns.catplot(
         data=df,
         kind="bar",
         x="bench_name",
-        y = "time_taken",
+        y="time_taken",
         hue="config",
-        aspect=2.3
+        legend="brief",
+        legend_out=False,
+        aspect=2.5,
+        errorbar=lambda x: (x.min(), x.max()),
+        hue_order=config_order,
+        order=bench_order,
+        err_kws=err_kws,
+        capsize=0.2,
     )
+    ax = g.ax
+    ax.set_ylim([0, 35])
+    ax.set_xlabel("Benchmark")
+    ax.set_ylabel("Time (s)")
+    g.ax.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(6))
+
+    ax = g.facet_axis(0, 0)
+
+    for i, c in enumerate(ax.containers):
+        labels = [f"{(v.get_height()):.0f}" if v.get_height() > 50 else "" for v in c]
+        padding = 125
+        ax.bar_label(c, labels=labels, label_type="center", padding=padding, size=16)
+
+    num_categories = len(df.bench_name.unique())
+    num_hues = len(df.config.unique())
+    plot_utils.draw_hatch(ax, num_categories, num_hues)
+    
+    ax.legend(ncol=1, loc="upper right", bbox_to_anchor=(0.265, 0.995), labelspacing=0.3)
+
+    sns.despine(g.fig, right=False, top=False)
 
     return g
+
+def draw_checkpoint_frequency_comparison():
+    orig_df = get_base_df()
+    df = remove_error_stats(orig_df)
+    df = df.drop(columns=["time_taken"])
+    df = df.groupby(["bench_name", "config"]).mean()
+    df = df.reset_index()
+
+    print(df)
+    base_df = df
+    adaptive_df = base_df[base_df["config"] == "adaptive"]
+    adaptive_df = adaptive_df.set_index("bench_name")
+
+    rc = {
+        "legend.fontsize": 24,
+    }
+    plot_utils.set_theme_seaborn(rc)
+
+    benchmarks = bench_order
+    fig_size = (10, 6.2)
+    n_rows = 2
+    n_cols = 1
+    hspace = 0.05
+    fig = plt.figure(figsize=fig_size)
+    axes = fig.subplots(n_rows, n_cols, squeeze=True, sharex=False, gridspec_kw={"hspace": hspace})
+
+    configs = ["counter", "unroll"]
+    for i, config in enumerate(configs):
+        d = base_df[base_df["config"] == config]
+        d = d.set_index("bench_name")
+
+        df2 = pd.DataFrame()
+        df2["trig"] = adaptive_df["ckpt_trig"] / d["ckpt_trig"]
+        df2["exec"] = adaptive_df["ckpt_exec"] / d["ckpt_exec"]
+        df2 = df2.reset_index()
+        df2 = df2.melt(id_vars=["bench_name"], value_vars=["trig", "exec"], var_name="type", value_name="normalized")
+
+        print(axes)
+        ax = axes[i]
+
+        g = sns.barplot(
+            data=df2,
+            x="bench_name",
+            y="normalized",
+            hue="type",
+            legend="brief",
+            order=bench_order,
+            ax = ax,
+        )
+        ax.set_title(f"vs. {config}", fontsize=22)
+        ax.set_ylim([0, 0.8])
+        ax.set_xlabel("Benchmark")
+        ax.set_ylabel("Ratio")
+        # ax.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(6))
+        ax.yaxis.set_minor_locator(matplotlib.ticker.AutoMinorLocator(5))
+        ax.yaxis.grid(visible=True, which="both")
+
+        for c in ax.containers:
+            labels = [f"{(v.get_height()):.2f}" if v.get_height() > 0.8 else "" for v in c]
+            padding = 80
+            ax.bar_label(c, labels=labels, label_type="center", padding=padding, size=16)
+
+        num_categories = len(df.bench_name.unique())
+        num_hues = len(df.config.unique())
+        plot_utils.draw_hatch(ax, num_categories, num_hues)
+
+        if i == 0:
+            ax.set_xlabel("")
+            ax.legend(ncol=1, loc="upper right", bbox_to_anchor=(1, 1), labelspacing=0.3)
+        else:
+            ax.get_legend().remove()
+
+        sns.despine(fig, right=False, top=False)
+
+    return fig

+ 8 - 3
imc/exprs/date2025/3_adaptive/run_expr_3.py

@@ -29,6 +29,10 @@ def get_build_config(benchmark, config_name):
         # config.enable_static_loop_pass_count = True
         # config.loop_pass_count = 50
         config.max_loop_ids = 30
+    
+    if config_name == "unroll":
+        config.use_checkpoint_voltage_check = True
+        config.custom_unroll = True
 
     return config
 
@@ -47,11 +51,11 @@ def main():
         "vAes",
     ]
     # benchmarks = [
-    #     "vBasicMath"
+    #     "vAes"
     # ]
 
-    configs = ["pass_count", "adaptive"]
-    # configs = ["adaptive"]
+    configs = ["pass_count", "adaptive", "unroll"]
+    # configs = ["adaptive", "unroll"]
 
     total_iterations = 20
 
@@ -105,6 +109,7 @@ def get_default_build_config():
     config.enable_static_loop_pass_count = False
     config.enable_adaptive_loop_pass_count = False
     config.print_stats = True
+    config.custom_unroll = False
     return config
 
 

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 29 - 0
imc/exprs/date2025/5_size_overhead/draw_graph_expr_5.ipynb


+ 115 - 0
imc/exprs/date2025/5_size_overhead/draw_graph_expr_5.py

@@ -0,0 +1,115 @@
+import pickle
+import os
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+import plot_utils
+
+
+def validate_and_cleanup_df(df):
+    # rename_dict = {
+    #     "checkpoint_enabled": "ckpt",
+    #     "voltage_check_enabled": "VC",
+    #     "pass_counter_enabled": "PC"
+    # }
+    # df = df.rename(columns=rename_dict)
+
+    mask = df["is_correct"] == False
+    if mask.any():
+        print("drop rows with incorrect output")
+        print(df[mask])
+
+    df = df[~mask]
+    assert df["is_correct"].all()
+
+    # def category_mapper(row):
+    #     if row["ckpt"] == False:
+    #         return "Baseline"
+    #     else:
+    #         if row["VC"] == False:
+    #             assert(row["PC"] == True)
+    #             return "PC"
+    #         else:
+    #             assert(row["PC"] == False)
+    #             return "VC"
+
+    # category_column_name = "config"
+    # df[category_column_name] = df.apply(category_mapper, axis=1)
+
+    def process_stats(row):
+        lines = row["stats"]
+        result = {}
+        for line in lines:
+            key, val = line.split(":")
+            val = int(val.strip())
+            if key.startswith("checkpoint triggered"):
+                result["ckpt_trig"] = val
+            if key.startswith("checkpoint executed"):
+                result["ckpt_exec"] = val
+        return result
+
+    df_stats = df.apply(process_stats, axis=1, result_type="expand")
+    df = pd.concat([df, df_stats], axis=1)
+
+    drop_columns = [
+        "start",
+        "end",
+        "recovery",
+        "outputs",
+        "stats",
+        # "ckpt",
+        # "VC",
+        # "PC",
+        "is_correct",
+    ]
+    df = df.drop(columns=drop_columns)
+    return df
+
+
+def get_base_df():
+    benchmarks = [
+        "vBasicMath",
+        "vCrc",
+        "vFFT",
+        "vSha",
+        "vStringSearch",
+        "vMatMul",
+        "vConv2d",
+        "vAes",
+    ]
+    benchmarks = [
+        "vStringSearch"
+    ]
+    output_dir = "/home/ybkim/workspace/imc/imc_freertos_app_m33/imc/exprs/date2025/5_size_overhead/output"
+
+    all_dfs = []
+
+    for benchmark in benchmarks:
+        pickle_filename = f"{output_dir}/{benchmark}.pickle"
+        if not os.path.exists(pickle_filename):
+            print(f"pass loading {benchmark}")
+            continue
+        with open(pickle_filename, "rb") as f:
+            df = pickle.load(f)
+            mask = df["config"] == "original"
+            base = int(df[mask]["text"].iloc[0])
+            df["normalized_text"] = df["text"] / base
+            df = df[~mask]
+            all_dfs.append(df)
+
+    orig_df = pd.concat(all_dfs)
+    return orig_df
+
+
+def draw_graph():
+    orig_df = get_base_df()
+    mask = orig_df["config"] != "original"
+    df = orig_df[mask]
+
+    plot_utils.set_theme_seaborn()
+    g = sns.catplot(
+        data=df, kind="bar", x="benchmark", y="normalized_text", hue="config", aspect=2.3
+    )
+
+    return g

+ 145 - 0
imc/exprs/date2025/5_size_overhead/run_expr_5.py

@@ -0,0 +1,145 @@
+import tempfile
+import time
+import pickle
+import pandas as pd
+import os
+import subprocess
+import pprint
+
+from imc_utils.pps_e36311a import PPS_E36311A
+from imc_utils.build_config.cortex_m33 import BuildConfigM33
+from imc_utils.build_config.test_env import TestEnv
+from imc_utils.serial_watch import SerialWatcher
+
+WORKSPACE_ROOT = "/home/ybkim/workspace/imc/imc_freertos_app_m33"
+NVM_RESET_BIN = f"{WORKSPACE_ROOT}/imc/utils/nvm_reset.elf"
+OPENOCD_SCRIPT = f"{WORKSPACE_ROOT}/imc_freertos_app_m33.cfg"
+
+
+def get_build_config(benchmark, config_name):
+    config = get_default_build_config()
+    config.bench_name = benchmark
+    bench_repeat_count = config.bench_repeat_count_small[benchmark]
+    config.bench_repeat_count = bench_repeat_count
+
+    if config_name == "original":
+        config.insert_compiler_checkpoints = False
+
+    if config_name == "pass_count":
+        config.use_checkpoint_pass_counter = True
+        config.checkpoint_pass_count = config.pass_count_10ms[benchmark]
+
+    if config_name == "adaptive":
+        config.use_checkpoint_voltage_check = True
+        config.split_loop = True
+        config.enable_adaptive_loop_pass_count = True
+        config.max_loop_ids = 30
+    
+    if config_name == "unroll":
+        config.custom_unroll = True
+
+    return config
+
+
+def get_flash_size(binary):
+    flash_sections = [
+        ".isr_vector",
+        ".text",
+        ".rodata",
+        ".ARM.extab",
+        ".ARM",
+        ".preinit_array",
+        ".init_array",
+        ".fini_array",
+        ".data"
+    ]
+    total = 0
+    text = 0
+    inst = f"objdump -h {binary}".split()
+    output = subprocess.run(inst, capture_output=True)
+    lines = output.stdout.decode().split("\n")
+    for line in lines:
+        tokens = line.strip().split()
+        if len(tokens) < 1:
+            continue
+        if tokens[0].isdigit():
+            section, size_in_hex = tokens[1], tokens[2]
+            size = int(size_in_hex, base=16)
+            if section in flash_sections:
+                print(section, size_in_hex, size)
+                total += size
+                if section == ".text":
+                    text = size
+    return total, text
+    
+
+def main():
+    pps = PPS_E36311A()
+    config = get_default_build_config()
+
+    benchmarks = [
+        "vBasicMath",
+        "vCrc",
+        "vFFT",
+        "vSha",
+        "vStringSearch",
+        "vMatMul",
+        "vConv2d",
+        "vAes",
+    ]
+    benchmarks = [
+        "vAes"
+    ]
+
+    configs = ["original", "pass_count", "adaptive", "unroll"]
+    # configs = ["pass_count"]
+
+    for benchmark in benchmarks:
+        all_records = []
+        for config_name in configs:
+            config = get_build_config(benchmark, config_name)
+
+            env = TestEnv(WORKSPACE_ROOT, NVM_RESET_BIN, OPENOCD_SCRIPT)
+
+            with tempfile.TemporaryDirectory() as build_dir:
+                binary = env.build_binary(config, build_dir)
+                size, text_size = get_flash_size(binary)
+                # size = os.path.getsize(binary)
+            
+            record = {
+                "benchmark": benchmark,
+                "config": config_name,
+                "flash": size,
+                "text": text_size
+            }
+
+            all_records.append(record)
+
+        df = pd.DataFrame(all_records)
+        print(df)
+        save_records(benchmark, config_name, df)
+
+
+def get_default_build_config():
+    config = BuildConfigM33()
+    config.insert_compiler_checkpoints = True
+    config.enable_extension = True
+    config.use_checkpoint_pass_counter = False
+    config.use_checkpoint_voltage_check = False
+    config.bench_infinite_loop = True
+    config.print_recovery_message = True
+    config.split_loop = False
+    config.enable_static_loop_pass_count = False
+    config.enable_adaptive_loop_pass_count = False
+    config.print_stats = True
+    config.custom_unroll = False
+    return config
+
+
+def save_records(bench_name, config_name, df):
+    with open(f"output/{bench_name}.pickle", "wb") as f:
+        pickle.dump(df, f)
+
+
+if __name__ == "__main__":
+    main()

+ 4 - 0
imc/exprs/date2025/5_size_overhead/temp.c

@@ -0,0 +1,4 @@
+#define DO_PRAGMA(x) _Pragma ( #x )
+#define IMC_UNROLL(x) DO_PRAGMA(clang loop unroll_count(x))
+
+IMC_UNROLL(10)

+ 8 - 0
imc_extension.cmake

@@ -7,6 +7,7 @@ set(IMC_MAX_LOOP_IDS 1 CACHE STRING "")
 set(IMC_LOOP_PASS_COUNT 10 CACHE BOOL "")
 set(IMC_BENCH_REPEAT_COUNT 1 CACHE BOOL "")
 set(IMC_PRINT_STATS 0 CACHE BOOL "")
+set(IMC_CUSTOM_UNROLL 0 CACHE BOOL "")
 
 set(AVAILABLE_BENCHES "vBasicMath" "vStringSearch" "vFFT" "vSha" "vCrc" "vMatMul" "vConv2d" "adc_demo" "vAes")
 if(NOT IMC_BENCH_NAME IN_LIST AVAILABLE_BENCHES)
@@ -19,6 +20,7 @@ endif()
 
 add_compile_definitions(imcBENCH_NAME=${IMC_BENCH_NAME})
 add_compile_definitions(imcBENCH_REPEAT_COUNT=${IMC_BENCH_REPEAT_COUNT})
+add_compile_definitions(imcCUSTOM_UNROLL=${IMC_CUSTOM_UNROLL})
 
 if(IMC_BENCH_INFINITE_LOOP)
     add_compile_definitions(imcBENCH_INFINITE_LOOP=1)
@@ -161,6 +163,11 @@ function(compile_imc_files)
     set(IMC_OPT_PASSES "")
     set(PLUGINS "")
 
+    if(IMC_CUSTOM_UNROLL)
+        list(APPEND IMC_OPT_PASSES "imc-loop-unroll")
+        list(APPEND PLUGINS /home/ybkim/workspace/imc/loop_duplication/build/lib/libImcLoopUnroll.so)
+    endif()
+
     if(IMC_SPLIT_LOOP)
         list(APPEND IMC_OPT_PASSES "imc-loop-opt")
         list(APPEND PLUGINS /home/ybkim/workspace/imc/loop_duplication/build/lib/libImcLoopOpt.so)
@@ -183,6 +190,7 @@ function(compile_imc_files)
 
     set(ADDITIONAL_DEFS
         "-D imcBENCH_REPEAT_COUNT=${IMC_BENCH_REPEAT_COUNT}"
+        "-D imcCUSTOM_UNROLL=${IMC_CUSTOM_UNROLL}"
     )
 
     foreach(APP_SRC ${IMC_APP_FILES})

+ 4 - 3
setup_build_dir.py

@@ -10,19 +10,20 @@ OPENOCD_SCRIPT = f"{WORKSPACE_ROOT}/imc_freertos_app_m33.cfg"
 BUILD_DIR = f"{WORKSPACE_ROOT}/build"
 
 config = BuildConfigM33()
-config.bench_name = "vSha"
+config.bench_name = "vFFT"
 config.insert_compiler_checkpoints = True
 config.enable_extension = True
 config.use_checkpoint_pass_counter = False
 config.use_checkpoint_voltage_check = True
 config.bench_infinite_loop = True
-config.split_loop = True
+config.split_loop = False
 config.loop_pass_count = 30
 config.enable_static_loop_pass_count = False
-config.enable_adaptive_loop_pass_count = True
+config.enable_adaptive_loop_pass_count = False
 config.max_loop_ids = 30
 config.bench_repeat_count = config.bench_repeat_count_small[config.bench_name]
 config.print_stats = True
+config.custom_unroll = True
 
 env = TestEnv(WORKSPACE_ROOT, NVM_RESET_BIN, OPENOCD_SCRIPT)
 

Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio