瀏覽代碼

Make normal imc_apps take the same path with opt pass, update expr 8

Youngbin Kim 3 月之前
父節點
當前提交
b616956bd8

+ 6 - 0
.envrc

@@ -0,0 +1,6 @@
+if [ -f "$HOME/miniconda3/etc/profile.d/conda.sh" ]; then
+  source "$HOME/miniconda3/etc/profile.d/conda.sh"
+  conda activate embedded
+fi
+
+export PYTHONPATH=$PYTHONPATH:/home/ybkim/workspace/imc/python/imc_utils

+ 2 - 1
.gitignore

@@ -10,4 +10,5 @@ build.sh
 imc/exprs/**/output**
 imc/reports/**/output**
 __pycache__
-imc/exprs/**/*.pdf
+imc/exprs/**/*.pdf
+.cache/

+ 3 - 5
CMakeLists.txt

@@ -2,6 +2,8 @@ cmake_minimum_required(VERSION 3.16...3.24)
 
 # include(static_kernel.cmake)
 
+set(CMAKE_EXPORT_COMPILE_COMMANDS TRUE)
+
 if(true)
 set(CMAKE_BUILD_TYPE Debug)
 set(CMAKE_VERBOSE_MAKEFILE 0)
@@ -181,11 +183,7 @@ add_executable(imc_freeRTOS ${SRC_FILES})
 
 if(IMC_ENABLE_EXTENSION)
     if(IMC_APP_FILES)
-        if(IMC_INSERT_COMPILER_CHECKPOINTS OR IMC_SPLIT_LOOP)
-            compile_imc_files()
-        else()
-            add_library(imc_apps "${IMC_APP_FILES}")
-        endif()
+        compile_imc_files()
     endif()
 else()
     if(IMC_APP_FILES)

+ 59 - 47
Core/Src/ImC/imc_extension.c

@@ -1,5 +1,7 @@
 #include <stdio.h>
 #include <string.h>
+#include <stdint.h>
+#include <math.h>
 
 #include "ImC/imc_kernel.h"
 #include "ImC/imc_extension.h"
@@ -85,7 +87,9 @@ int pass_due_to_low_energy;
     void imc_recover_estimations()
     {
     #if(imcLOOP_OPT_DEBUG && imcLOOP_OPT_DEBUG_VERBOSE)
+        portDISABLE_INTERRUPTS();
         printf("(DEBUG) recover estimation table from buffer %d\r\n", latest_buffer_index);
+        portENABLE_INTERRUPTS();
     #endif
     memcpy(loop_pass_estimation, estimation_backup[latest_buffer_index], sizeof(float) * imcMAX_LOOP_IDS);
     __DSB();
@@ -94,7 +98,9 @@ int pass_due_to_low_energy;
 void imc_backup_estimations() {
     int nextBufferIndex = latest_buffer_index == 0 ? 1 : 0;
     #if(imcLOOP_OPT_DEBUG && imcLOOP_OPT_DEBUG_VERBOSE)
+        portDISABLE_INTERRUPTS();
         printf("(DEBUG) backup estimation table to buffer %d\r\n", nextBufferIndex);
+        portENABLE_INTERRUPTS();
     #endif
     memcpy(estimation_backup[nextBufferIndex], loop_pass_estimation, sizeof(float) * imcMAX_LOOP_IDS);
     latest_buffer_index = nextBufferIndex;
@@ -116,10 +122,12 @@ void imc_init_energy_estimation() {
 
     if(!imc_energy_estimation_initialized) {
         #if(imcLOOP_OPT_DEBUG)
+            portDISABLE_INTERRUPTS();
             printf("(DEBUG) initialize estimation table\r\n");
+            portENABLE_INTERRUPTS();
         #endif
         for(int i=0; i<imcMAX_LOOP_IDS; i++) {
-            loop_pass_estimation[i] = 2;
+            loop_pass_estimation[i] = 3;
         }
         latest_buffer_index = 0;
         #if(imcPRINT_STATS)
@@ -170,11 +178,16 @@ void update_estimation(int loop_id, int counter, int current_energy, int energy_
     float current_estimation = (float)loop_pass_estimation[loop_id];
     if(current_estimation > 1e6) return;
     const float error_margin = imcENERGY_TOTAL * 0.03;
+    // const float error_margin = imcENERGY_TOTAL * 0;
     float max_consumed = energy_at_start + error_margin - (current_energy - error_margin);
     float min_consumed = energy_at_start - error_margin - (current_energy + error_margin);
+    // if(max_consumed <= 0) max_consumed = 1e-3;
+    // if(min_consumed <= 0) {
+    //     min_consumed = 1e-3;
+    // }
     if(max_consumed <= 0) max_consumed = 1e-3;
     if(min_consumed <= 0) {
-        min_consumed = 1e-3;
+        min_consumed = 1;
     }
 
     float measured_min = ((float)(imcENERGY_TOTAL)) / max_consumed * counter;
@@ -207,12 +220,27 @@ void update_estimation(int loop_id, int counter, int current_energy, int energy_
         new_estimation = current_estimation * factor;
         loop_pass_estimation[loop_id] = new_estimation;
     }
-    else return;
+    else {
+        #if(imcLOOP_OPT_DEBUG)
+            portDISABLE_INTERRUPTS();
+            printf("(DEBUG) Estimation not updated; loop_id:%d, counter:%d, measured_min: %f, measured_max: %f, current_estimation: %f\r\n", 
+                loop_id, counter, measured_min, measured_max, current_estimation);
+            portENABLE_INTERRUPTS();
+        #endif
+        return;
+    }
+
+    const int max_estimation = 1000000;
+    if(new_estimation > max_estimation) {
+        new_estimation = max_estimation;
+        loop_pass_estimation[loop_id] = max_estimation;
+    }
 
     #if(imcLOOP_OPT_DEBUG)
     portDISABLE_INTERRUPTS();
     printf("(DEBUG) estimation for loop %d is updated to %f\r\n", loop_id, new_estimation);
     portENABLE_INTERRUPTS();
+
     #endif
 
     imc_backup_estimations();
@@ -263,7 +291,9 @@ void __imc_finish_loop_skipping() {
         // int volt = measure_voltage(ADC_HANDLER_SBC, EPS_CAP_ID_SBC);
         // int energy = volt*volt - imcCAP_VOL_LOW*imcCAP_VOL_LOW;
         #if(imcLOOP_OPT_DEBUG)
-            printf("(DEBUG) loop %d is not perfectly skipped\r\n", loop_id);
+            portDISABLE_INTERRUPTS();
+            // printf("(DEBUG) loop %d is not perfectly skipped\r\n", loop_id);
+            portENABLE_INTERRUPTS();
         #endif
         int energy = get_current_energy();
         int energy_consumed = imc_energy_at_start - energy;
@@ -272,46 +302,6 @@ void __imc_finish_loop_skipping() {
         if(!last_skip_is_zero) {
             update_estimation(loop_id, last_loop_skip_count, energy, imc_energy_at_start);
         }
-
-        /*
-        float possible_iterations_estimated = 0;
-        int underestimated = 0;
-        if(!last_skip_is_zero) {
-            float energy_per_iteration = (float)energy_consumed / last_loop_skip_count;
-            possible_iterations_estimated = (float)energy / energy_per_iteration;
-            underestimated = possible_iterations_estimated > 3;
-        }
-        if(!last_skip_is_zero && underestimated) {
-            int current_estimation = loop_pass_estimation[loop_id];
-            // double inc_factor = 2.5;
-            double inc_factor = possible_iterations_estimated > 3 ? 3 : possible_iterations_estimated;
-            if (current_estimation * inc_factor > (double)INT32_MAX)
-            {
-                current_estimation = INT32_MAX/2-1;
-            }
-            else
-            {
-                current_estimation = (int)(current_estimation * inc_factor);
-            }
-            
-            if (current_estimation < 1) {
-                current_estimation = 1;
-            }
-            loop_pass_estimation[loop_id] = current_estimation;
-            #if (imcLOOP_OPT_DEBUG)
-                portDISABLE_INTERRUPTS();
-                printf("(DEBUG) estimation for loop %d is updated to %d\r\n", loop_id, current_estimation);
-                portENABLE_INTERRUPTS();
-            #endif
-        imc_backup_estimations();
-        } else {
-            #if (imcLOOP_OPT_DEBUG)
-                portDISABLE_INTERRUPTS();
-                printf("(DEBUG) estimation for loop %d is not updated (%d)\r\n", loop_id, loop_pass_estimation[loop_id]);
-                portENABLE_INTERRUPTS();
-            #endif
-        }
-        */
     }
     imc_is_passing_loops_backup = -1;
     imc_is_passing_loops = -1;
@@ -342,7 +332,9 @@ int __imc_get_loop_pass_count(int loop_id) {
         imc_energy_at_start = energy;
 
         #if(imcLOOP_OPT_DEBUG && imcLOOP_OPT_DEBUG_VERBOSE)
-            printf("(DEBUG) ratio: %d\r\n", ratio);
+            portDISABLE_INTERRUPTS();
+            printf("(DEBUG) loopId: %d, ratio: %d\r\n", loop_id, ratio);
+            portENABLE_INTERRUPTS();
         #endif
 
         float current_estimation = loop_pass_estimation[loop_id];
@@ -355,17 +347,37 @@ int __imc_get_loop_pass_count(int loop_id) {
             imc_is_passing_loops_backup = -1;
             // imc_sbc_power_off();
             #if(imcLOOP_OPT_DEBUG && imcLOOP_OPT_DEBUG_VERBOSE)
+                portDISABLE_INTERRUPTS();
                 printf("(DEBUG) ratio: %d\r\n", ratio);
+                portENABLE_INTERRUPTS();
             #endif
             // while(1) { __ASM(" nop"); }
             // printf("__imc_get_loop_pass_count(%d) called; %d returned\r\n", loop_id, last_loop_skip_count);
         }
         else {
             double r = (double)(ratio) / 100.0;
-            last_loop_skip_count = (int)((double)current_estimation * r);
+            double target_skip_count = current_estimation * r;
+            #if(imcLOOP_OPT_DEBUG && imcLOOP_OPT_DEBUG_VERBOSE)
+                portDISABLE_INTERRUPTS();
+                printf("(DEBUG) loop_id:%d, current estimation: %f, target_skip_count: %f, r: %f\r\n", loop_id, current_estimation, target_skip_count, r);
+                portENABLE_INTERRUPTS();
+            #endif
+            const int max_skip_count = 10000;
+            // if(target_skip_count > max_skip_count) {
+            //     last_loop_skip_count = max_skip_count;
+            // } else {
+            //     last_loop_skip_count = (int)target_skip_count;
+            // }
+            if (isnan(target_skip_count) || target_skip_count < 1.0) { 
+                last_loop_skip_count = 0;
+            } else if (target_skip_count > max_skip_count) {
+                last_loop_skip_count = max_skip_count;
+            } else {
+                last_loop_skip_count = (int)target_skip_count;
+            }
             // last_loop_skip_count = loop_pass_estimation[loop_id] * ((double)(ratio) / 100.0);
         }
-        // printf("loopId: %d, volt: %d, energy: %d, total: %d, ratio: %d, est:%d, count:%d\r\n", loop_id, volt, energy, imcENERGY_TOTAL, ratio, current_estimation, last_loop_skip_count);
+        // printf("loopId: %d, volt: %d, energy: %d, total: %d, ratio: %d, est:%f, count:%d\r\n", loop_id, volt, energy, imcENERGY_TOTAL, ratio, current_estimation, last_loop_skip_count);
         #if (imcPRINT_LATENCY_OVERHEAD)
             unsigned int end_tick = DWT->CYCCNT;
             imc_ticks_getCSC += end_tick - start_tick;

+ 2 - 1
Core/Src/stm32l5xx_it.c

@@ -22,6 +22,7 @@
 #include "stm32l5xx_it.h"
 /* Private includes ----------------------------------------------------------*/
 /* USER CODE BEGIN Includes */
+#include <stdio.h>
 /* USER CODE END Includes */
 
 /* Private typedef -----------------------------------------------------------*/
@@ -87,7 +88,7 @@ void NMI_Handler(void)
 void HardFault_Handler(void)
 {
   /* USER CODE BEGIN HardFault_IRQn 0 */
-
+  printf("HardFault_Handler\r\n");
   /* USER CODE END HardFault_IRQn 0 */
   while (1)
   {

文件差異過大導致無法顯示
+ 66 - 6
imc/exprs/date2025/5_size_overhead/draw_graph_expr_5.ipynb


+ 67 - 0
imc/exprs/date2025/6_varying_power/voltage_trace_simulator.py

@@ -0,0 +1,67 @@
+import time
+import pandas as pd
+
+
+def parse_energy_trace_nrel(data_file, include_datetime=False, target_panel_size_m2=0.00013, target_input_voltage=3.3):
+    """
+    Parse NREL solar energy trace data and convert it to current in mA.
+    
+    Args:
+        data_file: Path to the CSV file containing NREL solar data
+        include_datetime: If True, returns full DataFrame with datetime; if False, returns only current trace
+        target_panel_size_m2: Solar panel size in square meters (default: 0.00013 for 1cm x 1cm)
+        target_input_voltage: Target voltage for current calculation (default: 3.3V)
+    
+    Returns:
+        Either a pandas Series of current values in mA or a DataFrame with datetime if include_datetime=True
+    """
+    df = pd.read_csv(data_file)
+    power_col = df.columns[2]
+    trace_mA = df[power_col] * 1000
+    
+    # Scale by panel size
+    trace_mA = trace_mA * target_panel_size_m2
+    
+    # Convert to current at target voltage
+    trace_mA = trace_mA / target_input_voltage
+    
+    if include_datetime:
+        df["Global Horizontal [W/m^2]"] = trace_mA
+        return df
+    else:
+        return trace_mA
+
+
+def simulate_current(smu, wait_s, trace_mA, verbose=True):
+    """
+    Simulate current variations using an SMU device according to a trace.
+    
+    Args:
+        smu: SMU device object with set_current_limit() method
+        wait_s: Wait time in seconds between current changes
+        trace_mA: Array/Series of current values in mA to simulate
+        verbose: If True, print progress information
+    """
+    for i, curr in enumerate(trace_mA):
+        smu.set_current_limit(curr/1000)
+        time.sleep(wait_s)
+        if verbose:
+            print(f"Set current to {curr:.3f}mA ({i+1}/{len(trace_mA)})")
+
+
+def simulate_current_with_pps(pps, channel, wait_s, trace_mA, verbose=True):
+    """
+    Alternative current simulation using PPS device.
+    
+    Args:
+        pps: PPS device object with set_current() method
+        channel: Channel number for the PPS device
+        wait_s: Wait time in seconds between current changes
+        trace_mA: Array/Series of current values in mA to simulate
+        verbose: If True, print progress information
+    """
+    for i, curr in enumerate(trace_mA):
+        pps.set_current(curr / 1000, channel)
+        time.sleep(wait_s)
+        if verbose:
+            print(f"Set current to {curr:.3f}mA ({i+1}/{len(trace_mA)})")

文件差異過大導致無法顯示
+ 69 - 3
imc/exprs/tvlsi2025/8_size_tradeoff/draw_graph_expr_8.ipynb


+ 90 - 34
imc/exprs/tvlsi2025/8_size_tradeoff/draw_graph_expr_8.py

@@ -9,18 +9,28 @@ import plot_utils
 
 def draw_graph():
     benchmarks = [
+        "vFFT",
         "vBasicMath",
         "vCrc",
-        "vFFT",
         "vSha",
         "vStringSearch",
-        "vMatMul",
-        "vConv2d",
         "vAes",
+        "vConv2d",
+        "vMatMul",
     ]
     duplication_ratios = list(range(10, 101, 10))
+    # duplication_ratios = list(range(20, 101, 20))
     config_name = "adaptive"
 
+    # Load base file sizes for normalization
+    try:
+        base_df = pd.read_pickle("output/base_file_sizes.pickle")
+        base_sizes = dict(zip(base_df['benchmark'], base_df['func']))
+        print("Base sizes loaded:", base_sizes)
+    except FileNotFoundError:
+        print("Warning: base_file_sizes.pickle not found. Using raw sizes.")
+        base_sizes = {}
+
     all_dfs = []
 
     for benchmark in benchmarks:
@@ -30,64 +40,110 @@ def draw_graph():
                 all_dfs.append(df)
             except FileNotFoundError:
                 continue
-    
+
     all_df = pd.concat(all_dfs, ignore_index=True)
-    print(all_df)
+    print(all_df.columns)
+    columns = ["benchmark", "time_taken", "duplication_ratio", "func", "text"]
+    df = all_df[columns].copy()
+    df = df.groupby(["benchmark", "duplication_ratio"]).mean()
+    df = df.reset_index()
+    df["duplication_ratio"] = df["duplication_ratio"].astype(str)
+    
+    # Normalize function sizes against base sizes
+    if base_sizes:
+        df["func_normalized"] = df.apply(
+            lambda row: row["func"] / base_sizes.get(row["benchmark"], 1), axis=1
+        )
+        print("Normalized function sizes:")
+        print(df[["benchmark", "duplication_ratio", "func", "func_normalized"]])
+    else:
+        df["func_normalized"] = df["func"]
+    
+    print(df)
 
     rc = {
         "lines.linewidth": 1.5,
         "axes.titlepad": 10,
-        "ytick.major.pad": -4,
+        "axes.labelpad": 7,
+        "ytick.major.pad": 1,
         "ytick.labelsize": 20,
         "legend.fontsize": 23,
-        "axes.labelsize": 25,
+        "axes.labelsize": 26,
+        "axes.titlesize": 24,
     }
     plot_utils.set_theme_seaborn(kind="bar", rc_custom=rc)
 
-    fig_size = (18, 6)
+    fig_size = (24, 7)
     n_rows = 2
     n_cols = 4
-    hspace = 0.05
+    hspace = 0.07
     fig = plt.figure(figsize=fig_size)
     axes = fig.subplots(n_rows, n_cols, sharex=True, gridspec_kw={"hspace": hspace})
 
     for i, benchmark in enumerate(benchmarks):
         ax = axes.reshape(-1)[i]
-        mask = (all_df["benchmark"] == benchmark)
-        df = all_df[mask]
-        if df.empty:
+        mask = (df["benchmark"] == benchmark)
+        plot_df = df[mask]
+        if plot_df.empty:
             continue
-        sns.barplot(data=df, x="duplication_ratio", y="func", ax=ax)
-        ax.set_title(benchmark)
+        sns.barplot(data=plot_df, x="duplication_ratio", y="func_normalized", ax=ax)
+        title = benchmark[1:].lower()
+        ax.set_title(title)
         ax.set_xlabel("Duplication Ratio (%)")
-        ax.set_ylabel("Func Size (bytes)")
+        ax.set_ylabel("Func Size")
         ax.grid(True)
 
-        ax.set_title(benchmark)
-        ax.set_xlabel("")
-        ax.set_ylabel("")
-        if i / 4 >= 1:
-            ax.set_xlabel("Execution #")
-        if i in [0, 4]:
-            ax.set_ylabel("Func Size (kB)")
-
-        ax.yaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, pos: f"{x/1000:.1f}"))
+        # Format y-axis as ratio (e.g., 1.5x, 2.0x)
+        ax.yaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, pos: f"{x:.1f}x"))
         ax.yaxis.grid(visible=True, which="both")
         # ax.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(3))
         ax.yaxis.set_minor_locator(matplotlib.ticker.AutoMinorLocator(3))
 
-        ax.xaxis.set_minor_locator(matplotlib.ticker.AutoMinorLocator(2))
+        # ax.xaxis.set_minor_locator(matplotlib.ticker.AutoMinorLocator(2))
         ax.xaxis.grid(visible=True, which="both")
 
+        # Remove any auto-generated legends from barplot
+        if ax.get_legend() is not None:
+            ax.get_legend().remove()
+
+        ax2 = ax.twinx()
+        sns.lineplot(
+            data=plot_df,
+            x="duplication_ratio",
+            y="time_taken",
+            ax=ax2,
+            color="black",  # Optional: for contrast
+            linewidth=1.5,
+            markers="o",
+        )
+        ax2.yaxis.grid(False)  # Prevent grid clash with ax
 
+        # Add legend only to the first subplot
         if i == 0:
-            ax.legend(
-                ncol=1, loc="upper left", bbox_to_anchor=(0.33, 0.995), labelspacing=0.3,
-                handlelength=1,
+            # Get legend handles directly from the existing plot elements
+            bar_handle = ax.patches[0]  # Get the first bar patch
+            line_handle = ax2.lines[0]  # Get the line plot from ax2
+            
+            # Set labels for the handles
+            bar_handle.set_label('Func Size')
+            line_handle.set_label('Time taken')
+            
+            # Add legend to the first subplot
+            legend = ax2.legend(
+                handles=[bar_handle, line_handle],
+                ncol=1, loc="upper left", bbox_to_anchor=(0.0, 0.995), labelspacing=0.3,
+                handlelength=1, framealpha=0.6,
             )
-            # ax.get_legend().remove()
-        else:
-            if ax.get_legend() is not None:
-                ax.get_legend().remove()
-    
-    return fig
+            legend.set_zorder(10)
+
+        ax.set_xlabel("")
+        ax.set_ylabel("")
+        ax2.set_ylabel("")
+        if i / 4 >= 1:
+            ax.set_xlabel("Duplication Ratio (%)")
+        if i in [0, 4]:
+            ax.set_ylabel("Func Size")
+        if i in [3, 7]:
+            ax2.set_ylabel("Time taken (s)")
+
+    return fig

+ 187 - 0
imc/exprs/tvlsi2025/8_size_tradeoff/get_base_file_size.py

@@ -0,0 +1,187 @@
+import tempfile
+import pickle
+import pandas as pd
+import os
+import subprocess
+
+from imc_utils.build_config.cortex_m33 import BuildConfigM33
+from imc_utils.build_config.test_env import TestEnv
+
+WORKSPACE_ROOT = "/home/ybkim/workspace/imc/imc_freertos_app_m33"
+NVM_RESET_BIN = f"{WORKSPACE_ROOT}/imc/utils/nvm_reset.elf"
+OPENOCD_SCRIPT = f"{WORKSPACE_ROOT}/imc_freertos_app_m33.cfg"
+
+
+def get_build_config(benchmark, config_name):
+    config = get_default_build_config()
+    config.bench_name = benchmark
+    bench_repeat_count = config.bench_repeat_count_small[benchmark]
+    config.bench_repeat_count = bench_repeat_count
+
+    if config_name == "original":
+        config.insert_compiler_checkpoints = False
+
+    if config_name == "pass_count":
+        config.use_checkpoint_pass_counter = True
+        config.checkpoint_pass_count = config.pass_count_10ms[benchmark]
+
+    if config_name == "adaptive":
+        config.use_checkpoint_voltage_check = True
+        config.split_loop = True
+        config.enable_adaptive_loop_pass_count = True
+        config.max_loop_ids = 30
+        config.loop_opt_debug = False
+    
+    if config_name == "unroll":
+        config.custom_unroll = True
+
+    return config
+
+
+def get_flash_size(binary):
+    flash_sections = [
+        ".isr_vector",
+        ".text",
+        ".rodata",
+        ".ARM.extab",
+        ".ARM",
+        ".preinit_array",
+        ".init_array",
+        ".fini_array",
+        ".data"
+    ]
+    total = 0
+    text = 0
+    inst = f"objdump -h {binary}".split()
+    output = subprocess.run(inst, capture_output=True)
+    lines = output.stdout.decode().split("\n")
+    for line in lines:
+        tokens = line.strip().split()
+        if len(tokens) < 1:
+            continue
+        if tokens[0].isdigit():
+            section, size_in_hex = tokens[1], tokens[2]
+            size = int(size_in_hex, base=16)
+            if section in flash_sections:
+                print(section, size_in_hex, size)
+                total += size
+                if section == ".text":
+                    text = size
+    return total, text
+
+
+def get_func_size(binary, bench_name):
+    function_names = {
+        "vBasicMath": ["vBasicMath"],
+        "vCrc": ["updateCRC32", "crc32file", "crc32buf", "vCrc"],
+        "vFFT": ["vFFT"],
+        "vSha": [
+            "sha_init",
+            "sha_update",
+            "byte_reverse",
+            "sha_transform",
+            "sha_final",
+            "sha_stream",
+            "sha_print",
+            "vSha",
+        ],
+        "vStringSearch": ["vStringSearch"],
+        "vMatMul": ["vMatMul"],
+        "vConv2d": ["vConv2d"],
+        "vAes": [
+            "AES_init_ctx_iv",
+            "KeyExpansion",
+            "AES_CBC_encrypt_buffer",
+            "XorWithIv",
+            "Cipher",
+            "vAes",
+            "AddRoundKey",
+            "SubBytes",
+            "ShiftRows",
+            "MixColumns",
+            "xtime",
+        ],
+    }
+    funcs = function_names[bench_name]
+    inst = f"nm -S -t d {binary}".split()
+    output = subprocess.run(inst, capture_output=True)
+    lines = output.stdout.decode().split("\n")
+    total_size = 0
+    for line in lines:
+        if len(line) == 0:
+            continue
+        tokens = line.strip().split()
+        if tokens[-1] in funcs:
+            size = int(tokens[1])
+            total_size += size
+    return total_size
+
+
+def get_default_build_config():
+    config = BuildConfigM33()
+    config.insert_compiler_checkpoints = True
+    config.enable_extension = True
+    config.use_checkpoint_pass_counter = False
+    config.use_checkpoint_voltage_check = False
+    config.bench_infinite_loop = True
+    config.print_recovery_message = True
+    config.split_loop = False
+    config.enable_static_loop_pass_count = False
+    config.enable_adaptive_loop_pass_count = False
+    config.print_stats = True
+    config.custom_unroll = False
+    return config
+
+
+def save_records(df):
+    if not os.path.exists("output"):
+        os.makedirs("output")
+    with open("output/base_file_sizes.pickle", "wb") as f:
+        pickle.dump(df, f)
+    with open("output/base_file_sizes.csv", "w") as f:
+        df.to_csv(f, index=False)
+
+
+def main():
+    benchmarks = [
+        "vBasicMath",
+        "vCrc",
+        "vFFT",
+        "vSha",
+        "vStringSearch",
+        "vMatMul",
+        "vConv2d",
+        "vAes",
+    ]
+
+    all_records = []
+    config_name = "original"
+    env = TestEnv(WORKSPACE_ROOT, NVM_RESET_BIN, OPENOCD_SCRIPT)
+
+    for benchmark in benchmarks:
+        print(f"Processing benchmark: {benchmark}")
+        config = get_build_config(benchmark, config_name)
+
+        with tempfile.TemporaryDirectory() as build_dir:
+            binary = env.build_binary(config, build_dir)
+            size, text_size = get_flash_size(binary)
+            func_size = get_func_size(binary, benchmark)
+
+        record = {
+            "benchmark": benchmark,
+            "config": config_name,
+            "flash": size,
+            "text": text_size,
+            "func": func_size,
+        }
+        all_records.append(record)
+        print(f"  Flash: {size}, Text: {text_size}, Func: {func_size}")
+
+    df = pd.DataFrame(all_records)
+    print("\nResults:")
+    print(df)
+    save_records(df)
+
+
+if __name__ == "__main__":
+    main()

+ 44 - 17
imc/exprs/tvlsi2025/8_size_tradeoff/run_expr_8.py

@@ -15,7 +15,6 @@ WORKSPACE_ROOT = "/home/ybkim/workspace/imc/imc_freertos_app_m33"
 NVM_RESET_BIN = f"{WORKSPACE_ROOT}/imc/utils/nvm_reset.elf"
 OPENOCD_SCRIPT = f"{WORKSPACE_ROOT}/imc_freertos_app_m33.cfg"
 
-
 def get_build_config(benchmark, config_name):
     config = get_default_build_config()
     config.bench_name = benchmark
@@ -34,6 +33,7 @@ def get_build_config(benchmark, config_name):
         config.split_loop = True
         config.enable_adaptive_loop_pass_count = True
         config.max_loop_ids = 30
+        config.loop_opt_debug = False
     
     if config_name == "unroll":
         config.custom_unroll = True
@@ -124,20 +124,27 @@ def main():
     config = get_default_build_config()
 
     benchmarks = [
-        "vBasicMath",
-        "vCrc",
-        "vFFT",
-        "vSha",
+        # "vBasicMath",
+        # "vCrc",
+        # "vFFT",
+        # "vSha",
         "vStringSearch",
         "vMatMul",
         "vConv2d",
         "vAes",
     ]
     # benchmarks = [
-    #     "vFFT"
+    #     "vSha",
     # ]
 
+    total_iterations = 5
+    target_current_limit = 0.015
+
+    pps = PPS_E36311A()
+
     duplication_ratios = list(range(10, 101, 10))
+    # duplication_ratios = [100]
+    # duplication_ratios = list(range(20, 101, 20))
 
     for benchmark in benchmarks:
         for duplication_ratio in duplication_ratios:
@@ -146,23 +153,43 @@ def main():
             config = get_build_config(benchmark, config_name)
             config.duplication_ratio = duplication_ratio
 
+            pps.set_voltage(3.3, 1)
+            pps.set_current(0.1, 1)
+            pps.output_on(1)
+
             env = TestEnv(WORKSPACE_ROOT, NVM_RESET_BIN, OPENOCD_SCRIPT)
 
             with tempfile.TemporaryDirectory() as build_dir:
                 binary = env.build_binary(config, build_dir)
                 size, text_size = get_flash_size(binary)
                 func_size = get_func_size(binary, benchmark)
-            
-            record = {
-                "benchmark": benchmark,
-                # "config": config_name,
-                "duplication_ratio": duplication_ratio,
-                "flash": size,
-                "text": text_size,
-                "func": func_size,
-            }
-
-            all_records.append(record)
+                env.clear_nvm_and_load_binary(binary, resume=False)
+
+            pps.set_current(target_current_limit, 1)
+            time.sleep(1)
+
+            env.resume_board(terminate=True)
+            watcher = SerialWatcher(benchmark, total_iterations)
+            records = watcher.run()
+
+            for record in records:
+                record.update({
+                    "benchmark": benchmark,
+                    # "config": config_name,
+                    "duplication_ratio": duplication_ratio,
+                    "flash": size,
+                    "text": text_size,
+                    "func": func_size,
+                })
+                all_records.append(record)
+            # record = {
+            #     "benchmark": benchmark,
+            #     # "config": config_name,
+            #     "duplication_ratio": duplication_ratio,
+            #     "flash": size,
+            #     "text": text_size,
+            #     "func": func_size,
+            # }
             df = pd.DataFrame(all_records)
             print(df)
             save_records(benchmark, duplication_ratio, df)

+ 34 - 9
imc_extension.cmake

@@ -192,17 +192,24 @@ function(compile_imc_files)
     endforeach()
     string(REPLACE " " ";" C_FLAGS ${CMAKE_C_FLAGS})
 
-    set(BASIC_OPT_PASSES "mem2reg,indvars,instnamer,unify-loop-exits,lcssa,simplifycfg,loop-simplify")
-    set(IMC_OPT_PASSES "")
+    set(IMC_BASIC_PASSES "")
+    set(BASIC_OPT_PASSES "mem2reg,indvars,instnamer,simplifycfg,lcssa,loop-simplify")
+    foreach(PASS ${BASIC_OPT_PASSES})
+        list(APPEND IMC_BASIC_PASSES "function\\(${PASS}\\)")
+    endforeach()
+
+    set(IMC_MODULE_PASSES "")
+    set(IMC_FUNCTION_PASSES "")
     set(PLUGINS "")
 
     if(IMC_CUSTOM_UNROLL)
-        list(APPEND IMC_OPT_PASSES "imc-loop-unroll")
+        # Wrap function-level pass with function(...) for the new pass manager; escape parens for sh
+        list(APPEND IMC_FUNCTION_PASSES "function\\(imc-loop-unroll\\)")
         list(APPEND PLUGINS /home/ybkim/workspace/imc/loop_duplication/build/lib/libImcLoopUnroll.so)
     endif()
 
     if(IMC_SPLIT_LOOP)
-        list(APPEND IMC_OPT_PASSES "imc-loop-opt")
+        list(APPEND IMC_MODULE_PASSES "imc-loop-opt")
         list(APPEND PLUGINS /home/ybkim/workspace/imc/loop_duplication/build/lib/libImcLoopOpt.so)
         add_compile_definitions(imcENABLE_STATIC_LOOP_PASS_COUNT=${IMC_ENABLE_STATIC_LOOP_PASS_COUNT})
         add_compile_definitions(imcENABLE_ADAPTIVE_LOOP_PASS_COUNT=${IMC_ENABLE_ADAPTIVE_LOOP_PASS_COUNT})
@@ -211,17 +218,33 @@ function(compile_imc_files)
     endif()
 
     if(IMC_INSERT_COMPILER_CHECKPOINTS)
-        list(APPEND IMC_OPT_PASSES "imc-insert-checkpoints")
+        # Wrap function-level pass with function(...) for the new pass manager; escape parens for sh
+        list(APPEND IMC_FUNCTION_PASSES "function\\(imc-insert-checkpoints\\)")
         list(APPEND PLUGINS /home/ybkim/workspace/imc/loop_duplication/build/lib/libImcInsertCheckpoints.so)
     endif()
 
-    list(JOIN IMC_OPT_PASSES "," IMC_OPT_PASSES)
+    list(JOIN IMC_BASIC_PASSES "," IMC_BASIC_PASSES)
+    list(JOIN IMC_MODULE_PASSES "," IMC_MODULE_PASSES)
+    list(JOIN IMC_FUNCTION_PASSES "," IMC_FUNCTION_PASSES)
     set(PLUGIN_OPTIONS "")
     foreach(PLUGIN ${PLUGINS})
         list(APPEND PLUGIN_OPTIONS -load-pass-plugin=${PLUGIN})
     endforeach()
 
-    set(OPT_OPTIONS "-imc-loop-opt-duplication-ratio=${IMC_DUPLICATION_RATIO}")
+    # Build -passes list without trailing commas
+    set(PASSES "${IMC_BASIC_PASSES}")
+    if(IMC_MODULE_PASSES)
+        set(PASSES "${PASSES},${IMC_MODULE_PASSES}")
+    endif()
+    if(IMC_FUNCTION_PASSES)
+        set(PASSES "${PASSES},${IMC_FUNCTION_PASSES}")
+    endif()
+
+    # Only pass plugin-specific options when the relevant optimization is enabled
+    set(OPT_OPTIONS "")
+    if(IMC_SPLIT_LOOP)
+        set(OPT_OPTIONS "-imc-loop-opt-duplication-ratio=${IMC_DUPLICATION_RATIO}")
+    endif()
 
     set(ADDITIONAL_DEFS
         "-D imcBENCH_REPEAT_COUNT=${IMC_BENCH_REPEAT_COUNT}"
@@ -229,7 +252,7 @@ function(compile_imc_files)
     )
 
     set(ADDITIONAL_OPT_FLAGS
-        # "--disable-loop-unrolling"
+        "--disable-loop-unrolling"
     )
 
     foreach(APP_SRC ${IMC_APP_FILES})
@@ -237,7 +260,9 @@ function(compile_imc_files)
             OUTPUT ${PROJECT_BINARY_DIR}/${APP_SRC}.s
             DEPENDS ${APP_SRC}
             COMMAND ${CMAKE_C_COMPILER} -g ${C_FLAGS} -O0 -Xclang -disable-O0-optnone ${INCLUDE_DIRS} -D USE_HAL_DRIVER -D STM32L552xx ${ADDITIONAL_DEFS} -c -emit-llvm ${PROJECT_SOURCE_DIR}/${APP_SRC} -o ${PROJECT_BINARY_DIR}/${APP_SRC}.bc
-            COMMAND ${OPT_BIN} -o ${PROJECT_BINARY_DIR}/${APP_SRC}_imc.bc ${PLUGIN_OPTIONS} -passes=${BASIC_OPT_PASSES},${IMC_OPT_PASSES} ${OPT_OPTIONS} ${PROJECT_BINARY_DIR}/${APP_SRC}.bc
+            COMMAND ${OPT_BIN} -o ${PROJECT_BINARY_DIR}/${APP_SRC}_imc.bc ${PLUGIN_OPTIONS} "-passes=${PASSES}" ${OPT_OPTIONS} ${PROJECT_BINARY_DIR}/${APP_SRC}.bc
+            # COMMAND ${OPT_BIN} -o ${PROJECT_BINARY_DIR}/${APP_SRC}_imc.bc ${PLUGIN_OPTIONS} "-passes=${IMC_MODULE_PASSES},${IMC_FUNCTION_PASSES}" ${OPT_OPTIONS} ${PROJECT_BINARY_DIR}/${APP_SRC}.bc
+            # COMMAND ${OPT_BIN} -o ${PROJECT_BINARY_DIR}/${APP_SRC}_imc.bc ${PLUGIN_OPTIONS} -passes=${BASIC_OPT_PASSES},${IMC_OPT_PASSES} ${OPT_OPTIONS} ${PROJECT_BINARY_DIR}/${APP_SRC}.bc
             COMMAND ${OPT_BIN} ${OPT} ${ADDITIONAL_OPT_FLAGS} -o ${PROJECT_BINARY_DIR}/${APP_SRC}.opt.bc ${PROJECT_BINARY_DIR}/${APP_SRC}_imc.bc
             COMMAND ${LLC_BIN} ${CPU} ${OPT} -o ${PROJECT_BINARY_DIR}/${APP_SRC}.s ${PROJECT_BINARY_DIR}/${APP_SRC}.opt.bc
         )

部分文件因文件數量過多而無法顯示