From 885084d7f7d4288935fb2f1ff8cd395c1d3a76cb Mon Sep 17 00:00:00 2001 From: Zexin Fu Date: Tue, 3 Dec 2024 13:51:31 +0100 Subject: [PATCH] [Profile] Add interconnect profiling testbench support and python scripts. --- config/config.mk | 7 +- hardware/Makefile | 7 +- hardware/scripts/gen_xbar.py | 8 +- hardware/scripts/noc_profiling.py | 393 ++++++++++++++++++++++++++ hardware/scripts/noc_profiling_bw.py | 280 ++++++++++++++++++ hardware/src/mempool_pkg.sv | 25 ++ hardware/tb/mempool_tb.sv | 407 ++++++++++++++++++++++++++- 7 files changed, 1109 insertions(+), 18 deletions(-) create mode 100644 hardware/scripts/noc_profiling.py create mode 100644 hardware/scripts/noc_profiling_bw.py diff --git a/config/config.mk b/config/config.mk index b040d0a96..04d195083 100644 --- a/config/config.mk +++ b/config/config.mk @@ -81,7 +81,10 @@ dram_axi_width_interleaved ?= 16 spm_bank_id_remap ?= 0 # Enable tile id remapping inside of each group -tile_id_remap ?= 1 +tile_id_remap ?= 0 # Enable the spm access pattern profiling -spm_profiling ?= 0 \ No newline at end of file +spm_profiling ?= 0 + +# Enable the interconnect access pattern profiling +noc_profiling ?= 0 \ No newline at end of file diff --git a/hardware/Makefile b/hardware/Makefile index 6a8b992f0..7ccaf0214 100644 --- a/hardware/Makefile +++ b/hardware/Makefile @@ -146,6 +146,9 @@ vlog_defs += -DDRAM_AXI_WIDTH_INTERLEAVED=${dram_axi_width_interleaved} ifeq (1, $(spm_profiling)) vlog_defs += -DSPM_PROFILING=$(spm_profiling) endif +ifeq (1, $(noc_profiling)) + vlog_defs += -DNOC_PROFILING=$(noc_profiling) +endif ifeq (1, $(spm_bank_id_remap)) vlog_defs += -DSPM_BANK_ID_REMAP=$(spm_bank_id_remap) endif @@ -238,7 +241,7 @@ $(buildpath)/$(dpi_library)/mempool_dpi.so: $(dpi) # Elaboration .PHONY: elabvcs -elabvcs: dpivcs $(buildpath) $(buildpath)/compilevcs.sh update_opcodes +elabvcs: gen_selector dpivcs $(buildpath) $(buildpath)/compilevcs.sh update_opcodes $(buildpath)/compilevcs.sh: $(bender) $(config_mk) Makefile $(MEMPOOL_DIR)/Bender.yml $(shell find {src,tb,deps} -type f) $(bender) script vcs --vlogan-bin="$(vcs_cmd) vlogan" --vlog-arg="$(vlogan_args)" $(vlog_defs) -t rtl -t mempool_vsim > $(buildpath)/compilevcs.sh echo "exit" >> $(buildpath)/compilevcs.sh @@ -309,7 +312,7 @@ ifneq (${CLANG_PATH},) VERILATOR_FLAGS += -LDFLAGS "-L $(CLANG_PATH)/lib -Wl,-rpath,$(CLANG_PATH)/lib -lc++ -nostdlib++" endif -$(VERILATOR_MK): $(VERILATOR_CONF) $(VERILATOR_WAIVE) $(MEMPOOL_DIR)/Bender.yml $(shell find {src,tb,deps} -type f) $(bender) $(config_mk) Makefile +$(VERILATOR_MK): gen_selector $(VERILATOR_CONF) $(VERILATOR_WAIVE) $(MEMPOOL_DIR)/Bender.yml $(shell find {src,tb,deps} -type f) $(bender) $(config_mk) Makefile rm -rf $(verilator_build); mkdir -p $(verilator_build) # Overwrite Bootaddress to L2 base while we don't have a DPI to write a wake-up $(eval boot_addr=$(l2_base)) diff --git a/hardware/scripts/gen_xbar.py b/hardware/scripts/gen_xbar.py index ec84bd575..444b3b7c5 100755 --- a/hardware/scripts/gen_xbar.py +++ b/hardware/scripts/gen_xbar.py @@ -47,11 +47,11 @@ def generate_crossbar(InNum, OutNum): assertion = ( "initial begin\n" f" assert (InNum == {InNum}) else " - "$fatal(1, \"Parameter InNum does not\n" - f" match expected value {InNum}\");\n" + "$fatal(1, \n\"Parameter InNum does not " + f"match expected value {InNum}\");\n" f" assert (OutNum == {OutNum}) else " - "$fatal(1, \"Parameter OutNum does not\n" - f" match expected value {OutNum}\");\n" + "$fatal(1, \n\"Parameter OutNum does not " + f"match expected value {OutNum}\");\n" "end\n" ) diff --git a/hardware/scripts/noc_profiling.py b/hardware/scripts/noc_profiling.py new file mode 100644 index 000000000..cc9e133da --- /dev/null +++ b/hardware/scripts/noc_profiling.py @@ -0,0 +1,393 @@ +#!/usr/bin/env python3 +# Copyright 2024 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.colors as mcolors +import ast + + +scale_factor = 10 + + +# Function to plot congestion intervals inline +def plot_intervals_inline(interval_results, output_int): + # Extract intervals and percentages + intervals = list(interval_results.keys()) + percentages = list(interval_results.values()) + + # Plot the data + plt.figure(figsize=(10, 6)) + bars = plt.bar( + intervals, percentages, color="skyblue", edgecolor="black", alpha=0.7 + ) + + # Add labels on top of each bar + for bar, percent in zip(bars, percentages): + plt.text( + bar.get_x() + bar.get_width() / 2, + bar.get_height(), + f"{percent:.1f}%", + ha="center", + va="bottom", + fontsize=10, + ) + + plt.xlabel("Intervals", fontsize=14) + plt.ylabel("Percentage (%)", fontsize=14) + plt.title("Percentage of Values in 10% Intervals", fontsize=16) + plt.xticks(rotation=45, fontsize=12) + plt.yticks(fontsize=12) + plt.tight_layout() + # Save the output as PNG and PDF + plt.savefig(output_int, format="png", bbox_inches="tight") + # plt.savefig(output_pdf, format='pdf', bbox_inches='tight') + plt.clf() + plt.close() + + +def visualize_mesh_noc_congestion_optimized( + file_path, output_png, output_pdf, output_int, req_rsp, bw, NumX=4, NumY=4 +): + if bw == 0: + target = "Congestion" + else: + target = "Bandwidth" + + # Load and preprocess the data + with open(file_path, "r") as file: + raw_content = file.readlines() + + parsed_data = [] + for line in raw_content: + line = line.strip() + if line.startswith("{") and not line.endswith("}"): + line += "}" # Append a closing brace if missing + try: + entry = ast.literal_eval(line) # Parse JSON-like entries + parsed_data.append(entry) + except Exception: + continue + + df = pd.DataFrame(parsed_data) + if df.empty: + print("Error: No valid data found in the file.") + return + + # Filter data for REQ_RSP == 0 (request NoC) + req_noc_data = df[df["REQ_RSP"] == req_rsp] + + # Calculate congestion for inbound and outbound links (1 - handshake/valid) + if bw: + max_hsk = req_noc_data["out_hsk_cyc_num"].max() + # req_noc_data['in_congestion'] = (req_noc_data['in_hsk_cyc_num'] / + # (max_hsk)) + req_noc_data["out_congestion"] = req_noc_data["out_hsk_cyc_num"] / ( + max_hsk + ) + # req_noc_data['out_congestion'] = (req_noc_data['out_hsk_cyc_num']) + filtered_req_noc_data = req_noc_data + + # Normalize congestion for color mapping + # (0: least congested, 1: most congested) + # req_noc_data['in_congestion_norm'] = + # np.clip(req_noc_data['in_congestion'], 0, 1) + req_noc_data["out_congestion_norm"] = np.clip( + req_noc_data["out_congestion"], 0, 1 + ) + + # Collect normalized congestion values into a list + data_list = req_noc_data["out_congestion_norm"].tolist() + + print( + "Total flits transmitted:", req_noc_data["out_hsk_cyc_num"].sum() + ) + + else: + req_noc_data["in_congestion"] = 1 - ( + req_noc_data["in_hsk_cyc_num"] + / (req_noc_data["in_vld_cyc_num"] + 1e-5) + ) + req_noc_data["out_congestion"] = 1 - ( + req_noc_data["out_hsk_cyc_num"] + / (req_noc_data["out_vld_cyc_num"] + 1e-5) + ) + + # Remove entries equal to 1 in 'in_congestion' before normalization + filtered_req_noc_data = req_noc_data[ + req_noc_data["out_congestion"] < 1 + ] + + # Normalize congestion for color mapping + # (0: least congested, 1: most congested) + max_out_congestion = filtered_req_noc_data["out_congestion"].max() + filtered_req_noc_data["out_congestion_norm"] = ( + np.clip( + filtered_req_noc_data["out_congestion"], 0, max_out_congestion + ) + / max_out_congestion + ) + + # # Normalize congestion for color mapping + # # (0: least congested, 1: most congested) + # req_noc_data['in_congestion_norm'] = + # np.clip(filtered_req_in_noc_data, 0, 1) + # req_noc_data['out_congestion_norm'] = + # np.clip(req_noc_data['out_congestion'], 0, 1) + + # Collect normalized congestion values into a list + data_list = filtered_req_noc_data["out_congestion_norm"].tolist() + + # Calculate the average congestion + average_congestion = np.mean(data_list) + print(f"Average {target}: {average_congestion:.2f}") + + # draw interval + total_count = len(data_list) + + # Define intervals for 10% ranges + intervals = [ + (i / 10, (i + 1) / 10) for i in range(10) + ] # [(0.0, 0.1), (0.1, 0.2), ..., (0.9, 1.0)] + + # Initialize a dictionary to store percentages for each interval + interval_results = {} + + # Calculate the percentage of values within each interval + for lower, upper in intervals: + count_in_interval = np.sum( + (np.array(data_list) >= lower) & (np.array(data_list) < upper) + ) + percent_in_interval = (count_in_interval / total_count) * 100 + interval_results[f"{lower:.1f}-{upper:.1f}"] = percent_in_interval + + # Display the results + for interval, percent in interval_results.items(): + print(f"Percentage of values in {interval}: {percent:.2f}%") + + plot_intervals_inline(interval_results, output_int) + + # Define a color map for congestion visualization (green -> yellow -> red) + congestion_cmap = plt.cm.get_cmap("RdYlGn_r") + + # Helper function to get router coordinates from group ID + def get_router_coords(group_id): + x = group_id // NumX # Column index + y = group_id % NumY # Row index + # Reverse Y-axis for visualization + # return x * scale_factor, ((NumY-1) - y) * scale_factor + return ( + x * scale_factor, + y * scale_factor, + ) # Reverse Y-axis for visualization + + # Draw the mesh NoC with congestion-based links + plt.figure(figsize=(10, 8.4)) + for _, row in filtered_req_noc_data.iterrows(): + src_coords = get_router_coords(row["GROUP"]) + + # Skip invalid links based on edge and corner conditions + if ( + row["DIR"] == 0 and src_coords[1] == 3 * scale_factor + ): # North link for top row routers + continue + if ( + row["DIR"] == 1 and src_coords[0] == 3 * scale_factor + ): # East link for rightmost column routers + continue + if ( + row["DIR"] == 2 and src_coords[1] == 0 * scale_factor + ): # South link for bottom row routers + continue + if ( + row["DIR"] == 3 and src_coords[0] == 0 * scale_factor + ): # West link for leftmost column routers + continue + + # Determine destination coordinates + if row["DIR"] == 0: # North + dest_coords = (src_coords[0], src_coords[1] + 1 * scale_factor) + elif row["DIR"] == 1: # East + dest_coords = (src_coords[0] + 1 * scale_factor, src_coords[1]) + elif row["DIR"] == 2: # South + dest_coords = (src_coords[0], src_coords[1] - 1 * scale_factor) + elif row["DIR"] == 3: # West + dest_coords = (src_coords[0] - 1 * scale_factor, src_coords[1]) + else: + continue + + # Determine the congestion level and color + # congestion_level = + # (row['in_congestion_norm'] + row['out_congestion_norm']) / 2 + + # we only need outbound, because it is the inbound of its pair routers + congestion_level = row["out_congestion_norm"] + link_color = congestion_cmap(congestion_level) + + # Offset + granularity = 0.05 + offset_x = 0 + offset_y = 0 + if row["DIR"] == 1 or row["DIR"] == 3: + offset_y = ( + row["TILE"] * granularity * 2 + row["PORT"] * granularity + ) + else: + offset_x = ( + row["TILE"] * granularity * 2 + row["PORT"] * granularity + ) + + if row["DIR"] == 1: + offset_y += granularity * 20 + elif row["DIR"] == 3: + offset_y -= granularity * 20 + elif row["DIR"] == 0: + offset_x += granularity * 20 + elif row["DIR"] == 2: + offset_x -= granularity * 20 + else: + continue + + plt.plot( + [src_coords[0] + offset_x, dest_coords[0] + offset_x], + [src_coords[1] + offset_y, dest_coords[1] + offset_y], + color=link_color, + linewidth=1, + alpha=1, + ) + + # Add routers as nodes + offset_dir = 0.8 + for group_id in range(NumX * NumY): # 4x4 mesh + x, y = get_router_coords(group_id) + plt.scatter( + x + offset_dir, + y + offset_dir, + color="orange", + s=1200, + edgecolor="black", + zorder=11, + ) + plt.text( + x + offset_dir, + y + offset_dir, + f"R{group_id}", + ha="center", + va="center", + fontsize=15, + zorder=12, + ) + + for direction in range(4): # 4 router directions + offset_x = 0 + offset_y = 0 + x_2 = x + y_2 = y + offset_arrow = 0.5 + + # Skip invalid links based on edge and corner conditions + if ( + y == (NumY - 1) * scale_factor and direction == 0 + ): # North link for top row routers + continue + if ( + x == (NumX - 1) * scale_factor and direction == 1 + ): # East link for rightmost column routers + continue + if y == 0 and direction == 2: # South link for bottom row routers + continue + if ( + x == 0 and direction == 3 + ): # West link for leftmost column routers + continue + + if direction == 0: # North link for top row routers + offset_x = 0 + offset_y = scale_factor / 2 + x_2 = x + offset_arrow + offset_dir + elif direction == 1: # East link for rightmost column routers + offset_x = scale_factor / 2 + offset_y = 0 + y_2 = y + offset_arrow + offset_dir + elif direction == 2: # South link for bottom row routers + offset_x = 0 + offset_y = -scale_factor / 2 + x_2 = x - offset_arrow + offset_dir + elif direction == 3: # West link for leftmost column routers + offset_x = -scale_factor / 2 + offset_y = 0 + y_2 = y - offset_arrow + offset_dir + else: + continue + + plt.arrow( + x_2, + y_2, + offset_x, + offset_y, + head_width=1, + head_length=1, + color="black", + length_includes_head=True, + alpha=0.8, + width=0.02, + zorder=10, + ) + + # Configure plot + if req_rsp: + plt.title( + f"4x4 Mesh NoC {target} Visualization (resp network)", fontsize=16 + ) + else: + plt.title( + f"4x4 Mesh NoC {target} Visualization (req network)", fontsize=16 + ) + plt.axis("off") + plt.colorbar( + plt.cm.ScalarMappable( + cmap=congestion_cmap, norm=mcolors.Normalize(vmin=0, vmax=1) + ), + label=f"{target} Level", + ) + + # Save the output as PNG and PDF + plt.savefig(output_png, format="png", bbox_inches="tight") + plt.savefig(output_pdf, format="pdf", bbox_inches="tight") + # plt.show() + plt.clf() + plt.close() + + +req_rsp = 0 +for bw in range(2): + if bw == 0: + target = "congestion" + else: + target = "bw" + + for req_rsp in range(2): + # Define file paths + file_path = "spm_profiling/run_logs_f_1024/tests/router_level_profile_q_00038000.log" + output_png = f"out/mesh_noc_{target}_{req_rsp}.png" + output_pdf = f"out/mesh_noc_{target}_{req_rsp}.pdf" + output_int = f"out/mesh_noc_{target}_{req_rsp}_intreval.png" + + # Call the visualization function + visualize_mesh_noc_congestion_optimized( + file_path, output_png, output_pdf, output_int, req_rsp, bw + ) + + # Define file paths + file_path = "spm_profiling/run_logs_remap_f_1024/tests/router_level_profile_q_00038000.log" + output_png = f"out/mesh_noc_remap_{target}_{req_rsp}.png" + output_pdf = f"out/mesh_noc_remap_{target}_{req_rsp}.pdf" + output_int = f"out/mesh_noc_remap_{target}_{req_rsp}_intreval.png" + + # Call the visualization function + visualize_mesh_noc_congestion_optimized( + file_path, output_png, output_pdf, output_int, req_rsp, bw + ) diff --git a/hardware/scripts/noc_profiling_bw.py b/hardware/scripts/noc_profiling_bw.py new file mode 100644 index 000000000..b698b85fb --- /dev/null +++ b/hardware/scripts/noc_profiling_bw.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python3 +# Copyright 2024 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.colors as mcolors +import ast + + +scale_factor = 10 + + +def visualize_mesh_noc_congestion_optimized( + file_path, output_png, output_pdf, req_rsp, NumX=4, NumY=4 +): + # Load and preprocess the data + with open(file_path, "r") as file: + raw_content = file.readlines() + + parsed_data = [] + for line in raw_content: + line = line.strip() + if line.startswith("{") and not line.endswith("}"): + line += "}" # Append a closing brace if missing + try: + entry = ast.literal_eval(line) # Parse JSON-like entries + parsed_data.append(entry) + except Exception: + continue + + df = pd.DataFrame(parsed_data) + if df.empty: + print("Error: No valid data found in the file.") + return + + # Filter data for REQ_RSP == 0 (request NoC) + req_noc_data = df[df["REQ_RSP"] == req_rsp] + + # Calculate congestion for inbound and outbound links (1 - handshake/valid) + max_hsk = req_noc_data["out_hsk_cyc_num"].max() + # req_noc_data['in_congestion'] = + # (req_noc_data['in_hsk_cyc_num'] / (max_hsk)) + req_noc_data["out_congestion"] = req_noc_data["out_hsk_cyc_num"] / ( + max_hsk + ) + + # Normalize congestion for color mapping + # (0: least congested, 1: most congested) + # req_noc_data['in_congestion_norm'] = + # np.clip(req_noc_data['in_congestion'], 0, 1) + req_noc_data["out_congestion_norm"] = np.clip( + req_noc_data["out_congestion"], 0, 1 + ) + print(req_noc_data) + + # Define a color map for congestion visualization (green -> yellow -> red) + congestion_cmap = plt.cm.get_cmap("RdYlGn_r") + + # Helper function to get router coordinates from group ID + def get_router_coords(group_id): + x = group_id // NumX # Column index + y = group_id % NumY # Row index + # Reverse Y-axis for visualization + # return x * scale_factor, ((NumY-1) - y) * scale_factor + return ( + x * scale_factor, + y * scale_factor, + ) # Reverse Y-axis for visualization + + # Draw the mesh NoC with congestion-based links + plt.figure(figsize=(10, 8.4)) + for _, row in req_noc_data.iterrows(): + src_coords = get_router_coords(row["GROUP"]) + + # print(row) + + # Skip invalid links based on edge and corner conditions + if ( + row["DIR"] == 0 and src_coords[1] == 3 * scale_factor + ): # North link for top row routers + continue + if ( + row["DIR"] == 1 and src_coords[0] == 3 * scale_factor + ): # East link for rightmost column routers + continue + if ( + row["DIR"] == 2 and src_coords[1] == 0 * scale_factor + ): # South link for bottom row routers + continue + if ( + row["DIR"] == 3 and src_coords[0] == 0 * scale_factor + ): # West link for leftmost column routers + continue + + # Determine destination coordinates + if row["DIR"] == 0: # North + dest_coords = (src_coords[0], src_coords[1] + 1 * scale_factor) + elif row["DIR"] == 1: # East + dest_coords = (src_coords[0] + 1 * scale_factor, src_coords[1]) + elif row["DIR"] == 2: # South + dest_coords = (src_coords[0], src_coords[1] - 1 * scale_factor) + elif row["DIR"] == 3: # West + dest_coords = (src_coords[0] - 1 * scale_factor, src_coords[1]) + else: + continue + + # Determine the congestion level and color + # congestion_level = + # (row['in_congestion_norm'] + row['out_congestion_norm']) / 2 + # we only need outbound, because it is the inbound of its pair routers + congestion_level = row["out_congestion_norm"] + link_color = congestion_cmap(congestion_level) + + # Offset + granularity = 0.05 + offset_x = 0 + offset_y = 0 + if row["DIR"] == 1 or row["DIR"] == 3: + offset_y = ( + row["TILE"] * granularity * 2 + row["PORT"] * granularity + ) + else: + offset_x = ( + row["TILE"] * granularity * 2 + row["PORT"] * granularity + ) + + if row["DIR"] == 1: + offset_y += granularity * 20 + elif row["DIR"] == 3: + offset_y -= granularity * 20 + elif row["DIR"] == 0: + offset_x += granularity * 20 + elif row["DIR"] == 2: + offset_x -= granularity * 20 + else: + continue + + # Draw the link + # plt.plot( + # [src_coords[0] + offset_x, dest_coords[0] + offset_x], + # [src_coords[1] + offset_y, dest_coords[1] + offset_y], + # color=link_color, + # linewidth=1, + # alpha=1, + # ) + plt.plot( + [src_coords[0] + offset_x, dest_coords[0] + offset_x], + [src_coords[1] + offset_y, dest_coords[1] + offset_y], + color=link_color, + linewidth=1, + alpha=1, + ) + + # Add routers as nodes + offset_dir = 0.8 + for group_id in range(NumX * NumY): # 4x4 mesh + x, y = get_router_coords(group_id) + plt.scatter( + x + offset_dir, + y + offset_dir, + color="orange", + s=1200, + edgecolor="black", + zorder=11, + ) + plt.text( + x + offset_dir, + y + offset_dir, + f"R{group_id}", + ha="center", + va="center", + fontsize=15, + zorder=12, + ) + + for direction in range(4): # 4 router directions + offset_x = 0 + offset_y = 0 + x_2 = x + y_2 = y + offset_arrow = 0.5 + + # Skip invalid links based on edge and corner conditions + if ( + y == (NumY - 1) * scale_factor and direction == 0 + ): # North link for top row routers + continue + if ( + x == (NumX - 1) * scale_factor and direction == 1 + ): # East link for rightmost column routers + continue + if y == 0 and direction == 2: # South link for bottom row routers + continue + if ( + x == 0 and direction == 3 + ): # West link for leftmost column routers + continue + + if direction == 0: # North link for top row routers + offset_x = 0 + offset_y = scale_factor / 2 + x_2 = x + offset_arrow + offset_dir + elif direction == 1: # East link for rightmost column routers + offset_x = scale_factor / 2 + offset_y = 0 + y_2 = y + offset_arrow + offset_dir + elif direction == 2: # South link for bottom row routers + offset_x = 0 + offset_y = -scale_factor / 2 + x_2 = x - offset_arrow + offset_dir + elif direction == 3: # West link for leftmost column routers + offset_x = -scale_factor / 2 + offset_y = 0 + y_2 = y - offset_arrow + offset_dir + else: + continue + + plt.arrow( + x_2, + y_2, + offset_x, + offset_y, + head_width=1, + head_length=1, + color="black", + length_includes_head=True, + alpha=0.8, + width=0.02, + zorder=10, + ) + + # Configure plot + if req_rsp: + plt.title( + "4x4 Mesh NoC Bandwidth Visualization (resp network)", fontsize=16 + ) + else: + plt.title( + "4x4 Mesh NoC Bandwidth Visualization (req network)", fontsize=16 + ) + plt.axis("off") + plt.colorbar( + plt.cm.ScalarMappable( + cmap=congestion_cmap, norm=mcolors.Normalize(vmin=0, vmax=1) + ), + label="Bandwidth Level", + ) + + # Save the output as PNG and PDF + plt.savefig(output_png, format="png", bbox_inches="tight") + plt.savefig(output_pdf, format="pdf", bbox_inches="tight") + # plt.show() + plt.clf() + plt.close() + + +req_rsp = 0 +for req_rsp in range(2): + # Define file paths + file_path = "spm_profiling/run_logs_f_1024/tests/router_level_profile_q_00038000.log" + output_png = f"out/mesh_noc_bw_{req_rsp}.png" + output_pdf = f"out/mesh_noc_bw_{req_rsp}.pdf" + + # Call the visualization function + visualize_mesh_noc_congestion_optimized( + file_path, output_png, output_pdf, req_rsp + ) + + # Define file paths + file_path = "spm_profiling/run_logs_remap_f_1024/tests/router_level_profile_q_00038000.log" + output_png = f"out/mesh_noc_remap_bw_{req_rsp}.png" + output_pdf = f"out/mesh_noc_remap_bw_{req_rsp}.pdf" + + # Call the visualization function + visualize_mesh_noc_congestion_optimized( + file_path, output_png, output_pdf, req_rsp + ) diff --git a/hardware/src/mempool_pkg.sv b/hardware/src/mempool_pkg.sv index 4f6c43e31..3317bdfe2 100644 --- a/hardware/src/mempool_pkg.sv +++ b/hardware/src/mempool_pkg.sv @@ -390,6 +390,7 @@ package mempool_pkg; localparam bit PostLayoutGr = `ifdef POSTLAYOUTGR `POSTLAYOUTGR `else 0 `endif; `ifndef TARGET_VERILATOR + // tcdm memory pattern profile typedef struct { int unsigned initiated; int unsigned initial_cycle; @@ -402,6 +403,30 @@ package mempool_pkg; int unsigned read_cycles[$]; // dynamic array to store cycles of read accesses int unsigned write_cycles[$]; // dynamic array to store cycles of write accesses } profile_t; + + // tile level profiling + typedef struct { + // tile remote ports profile + int unsigned req_vld_cyc_num[NumRemotePortsPerTile-1]; + int unsigned req_hsk_cyc_num[NumRemotePortsPerTile-1]; + } tile_level_profile_t; + + // group level profiling + typedef struct { + // group xbar ports profile + int unsigned req_vld_cyc_num [NumRemotePortsPerTile-1]; + int unsigned req_hsk_cyc_num [NumRemotePortsPerTile-1]; + int unsigned req_vld_cyc_more_than_one_hit_same_bank_num; + } group_level_profile_t; + + // router level profile + typedef struct { + // noc router ports profile + int unsigned in_vld_cyc_num [4]; // 4: 4 directions + int unsigned in_hsk_cyc_num [4]; // 4: 4 directions + int unsigned out_vld_cyc_num[4]; // 4: 4 directions + int unsigned out_hsk_cyc_num[4]; // 4: 4 directions + } router_level_profile_t; `endif diff --git a/hardware/tb/mempool_tb.sv b/hardware/tb/mempool_tb.sv index 63a0ab2da..c5ac8a6e2 100644 --- a/hardware/tb/mempool_tb.sv +++ b/hardware/tb/mempool_tb.sv @@ -23,6 +23,7 @@ module mempool_tb; import mempool_pkg::*; import axi_pkg::xbar_cfg_t; import axi_pkg::xbar_rule_32_t; + import cf_math_pkg::idx_width; `ifdef BOOT_ADDR localparam BootAddr = `BOOT_ADDR; @@ -448,6 +449,402 @@ module mempool_tb; `ifndef TARGET_SYNTHESIS `ifndef TARGET_VERILATOR + logic [63:0] cycle_q; + always_ff @(posedge clk or negedge rst_n) begin + if(~rst_n) begin + cycle_q <= '0; + end else begin + cycle_q <= cycle_q + 64'd1; + end + end + + // always_comb begin + // if(cycle_q > 3600) + // $finish; + // end + +`ifdef NOC_PROFILING + int f_2, f_final_2; + string fn_2, fn_final_2; + int f_3, f_final_3; + string fn_3, fn_final_3; + int f_4, f_final_4; + string fn_4, fn_final_4; + string dump_time; + + string app; + string log_path; + initial begin + void'($value$plusargs("APP=%s", app)); + // $sformat(log_path, "../scripts/spm_profiling/run_logs_remap_%1d/%s", NumCores, app); + // $sformat(log_path, "../scripts/spm_profiling/run_logs_%1d/%s", NumCores, app); + $sformat(log_path, "../scripts/spm_profiling/run_logs_remap_f_%1d/%s", NumCores, app); + // $sformat(log_path, "../scripts/spm_profiling/run_logs_f_%1d/%s", NumCores, app); + end + + tile_level_profile_t tile_level_profile_d, tile_level_profile_q [NumGroups-1:0][NumTilesPerGroup-1:0]; + group_level_profile_t group_level_profile_d, group_level_profile_q [NumGroups-1:0]; + router_level_profile_t router_level_profile_d, router_level_profile_q[NumGroups-1:0][NumTilesPerGroup-1:0][(NumRemotePortsPerTile-1)-1:0][2-1:0]; // 2: req+rsp noc + + // tile level profiling + generate + for (genvar g = 0; g < NumGroups; g++) begin + for (genvar t = 0; t < NumTilesPerGroup; t++) begin + always_ff @(posedge clk or negedge rst_n) begin + if(!rst_n) begin + for (int p = 0; p < (NumRemotePortsPerTile-1); p++) begin + tile_level_profile_q[g][t].req_vld_cyc_num[p] = '0; + tile_level_profile_q[g][t].req_hsk_cyc_num[p] = '0; + end + end else begin + for (int p = 0; p < (NumRemotePortsPerTile-1); p++) begin + tile_level_profile_q[g][t].req_vld_cyc_num[p] = tile_level_profile_q[g][t].req_vld_cyc_num[p] + + $countones( + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.i_mempool_group.gen_tiles[t].i_tile.tcdm_master_req_valid_o[p+1] + ); + tile_level_profile_q[g][t].req_hsk_cyc_num[p] = tile_level_profile_q[g][t].req_hsk_cyc_num[p] + + $countones( + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.i_mempool_group.gen_tiles[t].i_tile.tcdm_master_req_valid_o[p+1] & + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.i_mempool_group.gen_tiles[t].i_tile.tcdm_master_req_ready_i[p+1] + ); + + end + end + end + end + end + endgenerate + + // group level profiling + logic [NumGroups-1:0][NumTilesPerGroup*NumBanksPerTile-1:0][$clog2(NumTilesPerGroup*(NumRemotePortsPerTile-1)):0] group_xbar_req_to_same_bank_count; + logic [NumGroups-1:0][NumTilesPerGroup*NumBanksPerTile-1:0][$clog2(NumTilesPerGroup*(NumRemotePortsPerTile-1)):0] group_xbar_req_to_same_bank_conflict_count; + logic [NumGroups-1:0][$clog2(NumTilesPerGroup*(NumRemotePortsPerTile-1)):0] group_xbar_req_to_same_bank_conflict_count_sum; + + logic [NumX-1:0][NumY-1:0][NumRemotePortsPerTile-1-1:0][NumTilesPerGroup-1:0] tcdm_slave_req_valid; + logic [NumX-1:0][NumY-1:0][NumRemotePortsPerTile-1-1:0][NumTilesPerGroup-1:0][idx_width(NumTilesPerGroup)+idx_width(NumBanksPerTile)-1:0] tcdm_slave_req_tgt_addr; + + generate + for(genvar x_dim = 0; x_dim < NumX; x_dim++) begin + for(genvar y_dim = 0; y_dim < NumY; y_dim++) begin + for (genvar p = 0; p < (NumRemotePortsPerTile-1); p++) begin + for(genvar t_i = 0; t_i < NumTilesPerGroup; t_i++) begin + assign tcdm_slave_req_valid [x_dim][y_dim][p][t_i] = dut.i_mempool_cluster.gen_groups_x[x_dim].gen_groups_y[y_dim].i_group.floo_req_from_router_before_xbar_valid_per_port[p+1][t_i]; + assign tcdm_slave_req_tgt_addr[x_dim][y_dim][p][t_i] = dut.i_mempool_cluster.gen_groups_x[x_dim].gen_groups_y[y_dim].i_group.floo_req_from_router[t_i][p+1].hdr.tgt_addr[idx_width(NumTilesPerGroup)+idx_width(NumBanksPerTile)-1:0]; + end + end + end + end + endgenerate + + always_comb begin + group_xbar_req_to_same_bank_count = '0; + for(int g = 0; g < NumGroups; g++) begin + for (int p = 0; p < (NumRemotePortsPerTile-1); p++) begin + for(int t_i = 0; t_i < NumTilesPerGroup; t_i++) begin + if( + tcdm_slave_req_valid [g/NumY][g%NumY][p][t_i] // if source port from router is valid + ) begin + group_xbar_req_to_same_bank_count[g][ + tcdm_slave_req_tgt_addr[g/NumY][g%NumY][p][t_i] + ] += 1; // then destination port count +1 + end + end + end + end + end + + always_comb begin + group_xbar_req_to_same_bank_conflict_count = '0; + group_xbar_req_to_same_bank_conflict_count_sum = '0; + for(int g = 0; g < NumGroups; g++) begin + for(int b = 0; b < NumTilesPerGroup*NumBanksPerTile; b++) begin + if(group_xbar_req_to_same_bank_count[g][b] > 0) begin + group_xbar_req_to_same_bank_conflict_count[g][b] = group_xbar_req_to_same_bank_count[g][b] - 1; // minus the one that is not conflict. + end + group_xbar_req_to_same_bank_conflict_count_sum[g] += group_xbar_req_to_same_bank_conflict_count[g][b]; + end + end + end + + generate + for (genvar g = 0; g < NumGroups; g++) begin + always_ff @(posedge clk or negedge rst_n) begin + if(!rst_n) begin + for (int p = 0; p < (NumRemotePortsPerTile-1); p++) begin + group_level_profile_q[g].req_vld_cyc_num[p] = '0; + group_level_profile_q[g].req_hsk_cyc_num[p] = '0; + end + group_level_profile_q[g].req_vld_cyc_more_than_one_hit_same_bank_num = '0; + end else begin + for (int p = 0; p < (NumRemotePortsPerTile-1); p++) begin + group_level_profile_q[g].req_vld_cyc_num[p] = group_level_profile_q[g].req_vld_cyc_num[p] + + $countones( + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.floo_req_from_router_before_xbar_valid_per_port[p+1][NumTilesPerGroup-1:0] + ); + group_level_profile_q[g].req_hsk_cyc_num[p] = group_level_profile_q[g].req_hsk_cyc_num[p] + + $countones( + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.floo_req_from_router_before_xbar_valid_per_port[p+1][NumTilesPerGroup-1:0] & + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.floo_req_from_router_before_xbar_ready_per_port[p+1][NumTilesPerGroup-1:0] + ); + end + group_level_profile_q[g].req_vld_cyc_more_than_one_hit_same_bank_num = group_level_profile_q[g].req_vld_cyc_more_than_one_hit_same_bank_num + + group_xbar_req_to_same_bank_conflict_count_sum[g]; + end + end + end + endgenerate + + // router level profiling + generate + for (genvar g = 0; g < NumGroups; g++) begin: gen_router_profile_per_group + for(genvar t = 0; t < NumTilesPerGroup; t++) begin: gen_router_profile_per_tile + for(genvar p = 0; p < (NumRemotePortsPerTile-1); p++) begin: gen_router_profile_per_remote_port + always_ff @(posedge clk or negedge rst_n) begin + if(!rst_n) begin + for(int router_p = 0; router_p < 4; router_p++) begin + router_level_profile_q[g][t][p][0].in_vld_cyc_num[router_p] = '0; + router_level_profile_q[g][t][p][0].in_hsk_cyc_num[router_p] = '0; + router_level_profile_q[g][t][p][0].out_vld_cyc_num[router_p] = '0; + router_level_profile_q[g][t][p][0].out_hsk_cyc_num[router_p] = '0; + router_level_profile_q[g][t][p][1].in_vld_cyc_num[router_p] = '0; + router_level_profile_q[g][t][p][1].in_hsk_cyc_num[router_p] = '0; + router_level_profile_q[g][t][p][1].out_vld_cyc_num[router_p] = '0; + router_level_profile_q[g][t][p][1].out_hsk_cyc_num[router_p] = '0; + end + end else begin + for(int router_p = 0; router_p < 4; router_p++) begin + // req router + router_level_profile_q[g][t][p][0].in_vld_cyc_num[router_p] = router_level_profile_q[g][t][p][0].in_vld_cyc_num[router_p] + + $countones( + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.gen_router_router_i[t].gen_router_router_j[p+1].i_floo_req_router.valid_i[router_p+1] + ); + router_level_profile_q[g][t][p][0].in_hsk_cyc_num[router_p] = router_level_profile_q[g][t][p][0].in_hsk_cyc_num[router_p] + + $countones( + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.gen_router_router_i[t].gen_router_router_j[p+1].i_floo_req_router.valid_i[router_p+1] & + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.gen_router_router_i[t].gen_router_router_j[p+1].i_floo_req_router.ready_o[router_p+1] + ); + router_level_profile_q[g][t][p][0].out_vld_cyc_num[router_p] = router_level_profile_q[g][t][p][0].out_vld_cyc_num[router_p] + + $countones( + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.gen_router_router_i[t].gen_router_router_j[p+1].i_floo_req_router.valid_o[router_p+1] + ); + router_level_profile_q[g][t][p][0].out_hsk_cyc_num[router_p] = router_level_profile_q[g][t][p][0].out_hsk_cyc_num[router_p] + + $countones( + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.gen_router_router_i[t].gen_router_router_j[p+1].i_floo_req_router.valid_o[router_p+1] & + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.gen_router_router_i[t].gen_router_router_j[p+1].i_floo_req_router.ready_i[router_p+1] + ); + // resq router + router_level_profile_q[g][t][p][1].in_vld_cyc_num[router_p] = router_level_profile_q[g][t][p][1].in_vld_cyc_num[router_p] + + $countones( + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.gen_router_router_i[t].gen_router_router_j[p+1].i_floo_resp_router.valid_i[router_p+1] + ); + router_level_profile_q[g][t][p][1].in_hsk_cyc_num[router_p] = router_level_profile_q[g][t][p][1].in_hsk_cyc_num[router_p] + + $countones( + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.gen_router_router_i[t].gen_router_router_j[p+1].i_floo_resp_router.valid_i[router_p+1] & + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.gen_router_router_i[t].gen_router_router_j[p+1].i_floo_resp_router.ready_o[router_p+1] + ); + router_level_profile_q[g][t][p][1].out_vld_cyc_num[router_p] = router_level_profile_q[g][t][p][1].out_vld_cyc_num[router_p] + + $countones( + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.gen_router_router_i[t].gen_router_router_j[p+1].i_floo_resp_router.valid_o[router_p+1] + ); + router_level_profile_q[g][t][p][1].out_hsk_cyc_num[router_p] = router_level_profile_q[g][t][p][1].out_hsk_cyc_num[router_p] + + $countones( + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.gen_router_router_i[t].gen_router_router_j[p+1].i_floo_resp_router.valid_o[router_p+1] & + dut.i_mempool_cluster.gen_groups_x[g/NumY].gen_groups_y[g%NumY].i_group.gen_router_router_i[t].gen_router_router_j[p+1].i_floo_resp_router.ready_i[router_p+1] + ); + end + end + end + end + end + end + endgenerate + + + always_ff @(posedge clk) begin + if (rst_n) begin + // if(cycle_q[19:0] == 'h80000) begin + if( + ((cycle_q[63:0] < 'h8000) && ((cycle_q[10:0] == 11'h400) || (cycle_q[10:0] == 11'h000))) || + (cycle_q[15:0] == 'h8000) + ) begin + + $sformat(fn_2, "%s/tile_level_profile_q_%8x.log", log_path, cycle_q); + f_2 = $fopen(fn_2, "w"); + $display("[Tracer] Final Logging Banks to %s", fn_2); + + $sformat(fn_3, "%s/group_level_profile_q_%8x.log", log_path, cycle_q); + f_3 = $fopen(fn_3, "w"); + $display("[Tracer] Final Logging Banks to %s", fn_3); + + $sformat(fn_4, "%s/router_level_profile_q_%8x.log", log_path, cycle_q); + f_4 = $fopen(fn_4, "w"); + $display("[Tracer] Final Logging Banks to %s", fn_4); + + $timeformat(-9, 0, "", 10); + $sformat(dump_time, "dump time %t, cycle %8d #;\n", $time, cycle_q); + $fwrite(f_2, dump_time); + $fwrite(f_3, dump_time); + $fwrite(f_4, dump_time); + + // tile level + for(int g = 0; g < NumGroups; g++) begin + for(int t_i = 0; t_i < NumTilesPerGroup; t_i++) begin + for (int p = 0; p < (NumRemotePortsPerTile-1); p++) begin + automatic string extras_str_2; + extras_str_2 = $sformatf("{'GROUP': %03d, 'TILE': %03d, 'PORT': %03d, 'req_vld_cyc_num': %03d, 'req_hsk_cyc_num': %03d, 'util': %.2f\n", + g, t_i, p, + tile_level_profile_q[g][t_i].req_vld_cyc_num[p], + tile_level_profile_q[g][t_i].req_hsk_cyc_num[p], + (tile_level_profile_q[g][t_i].req_hsk_cyc_num[p]*1.0)/(tile_level_profile_q[g][t_i].req_vld_cyc_num[p]*1.0) + ); + $fwrite(f_2, extras_str_2); + end + end + end + $fclose(f_2); + + // group level + for(int g = 0; g < NumGroups; g++) begin + int unsigned req_vld_cyc_num_sum; + int unsigned req_hsk_cyc_num_sum; + automatic string extras_str_3; + req_vld_cyc_num_sum = 0; + req_hsk_cyc_num_sum = 0; + for (int p = 0; p < (NumRemotePortsPerTile-1); p++) begin + req_vld_cyc_num_sum += group_level_profile_q[g].req_vld_cyc_num[p]; + req_hsk_cyc_num_sum += group_level_profile_q[g].req_hsk_cyc_num[p]; + end + extras_str_3 = $sformatf("{'GROUP': %03d, 'req_vld_cyc_num': %03d, 'req_hsk_cyc_num': %03d, 'req_vld_cyc_more_than_one_hit_same_bank_num': %03d, 'util': %.2f\n", + g, + req_vld_cyc_num_sum, + req_hsk_cyc_num_sum, + group_level_profile_q[g].req_vld_cyc_more_than_one_hit_same_bank_num, + (req_hsk_cyc_num_sum*1.0)/((req_vld_cyc_num_sum-group_level_profile_q[g].req_vld_cyc_more_than_one_hit_same_bank_num)*1.0) + ); + $fwrite(f_3, extras_str_3); + end + $fclose(f_3); + + // router level + for(int g = 0; g < NumGroups; g++) begin + for(int t = 0; t < NumTilesPerGroup; t++) begin + for(int p = 0; p < (NumRemotePortsPerTile-1); p++) begin + for(int req_rsp = 0; req_rsp < 2; req_rsp++) begin + for(int dir = 0; dir < 4; dir++) begin + automatic string extras_str_4; + extras_str_4 = $sformatf("{'GROUP': %03d, 'TILE': %03d, 'PORT': %03d, 'REQ_RSP': %03d, 'DIR': %03d, 'in_vld_cyc_num': %03d, 'in_hsk_cyc_num': %03d, 'out_vld_cyc_num': %03d, 'out_hsk_cyc_num': %03d, 'in_util': %.2f, 'out_util': %.2f\n", + g, t, p, req_rsp, dir, + router_level_profile_q[g][t][p][req_rsp].in_vld_cyc_num[dir], + router_level_profile_q[g][t][p][req_rsp].in_hsk_cyc_num[dir], + router_level_profile_q[g][t][p][req_rsp].out_vld_cyc_num[dir], + router_level_profile_q[g][t][p][req_rsp].out_hsk_cyc_num[dir], + router_level_profile_q[g][t][p][req_rsp].in_vld_cyc_num[dir] > 0 ? (router_level_profile_q[g][t][p][req_rsp].in_hsk_cyc_num[dir]*1.0)/(router_level_profile_q[g][t][p][req_rsp].in_vld_cyc_num[dir]*1.0) : 0, + router_level_profile_q[g][t][p][req_rsp].out_vld_cyc_num[dir] > 0 ? (router_level_profile_q[g][t][p][req_rsp].out_hsk_cyc_num[dir]*1.0)/(router_level_profile_q[g][t][p][req_rsp].out_vld_cyc_num[dir]*1.0) : 0 + ); + $fwrite(f_4, extras_str_4); + end + end + end + end + end + $fclose(f_4); + end + end + end + + + + + + + + final begin + $sformat(fn_final_2, "%s/tile_level_profile_q.log", log_path); + f_final_2 = $fopen(fn_final_2, "w"); + $display("[Tracer] Final Logging Banks to %s", fn_final_2); + + $sformat(fn_final_3, "%s/group_level_profile_q.log", log_path); + f_final_3 = $fopen(fn_final_3, "w"); + $display("[Tracer] Final Logging Banks to %s", fn_final_3); + + $sformat(fn_final_4, "%s/router_level_profile_q.log", log_path); + f_final_4 = $fopen(fn_final_4, "w"); + $display("[Tracer] Final Logging Banks to %s", fn_final_4); + + $timeformat(-9, 0, "", 10); + $sformat(dump_time, "dump time %t, cycle %8d #;\n", $time, cycle_q); + $fwrite(f_final_2, dump_time); + $fwrite(f_final_3, dump_time); + $fwrite(f_final_4, dump_time); + + // tile level + for(int g = 0; g < NumGroups; g++) begin + for(int t_i = 0; t_i < NumTilesPerGroup; t_i++) begin + for (int p = 0; p < (NumRemotePortsPerTile-1); p++) begin + automatic string extras_str_final_2; + extras_str_final_2 = $sformatf("{'GROUP': %03d, 'TILE': %03d, 'PORT': %03d, 'req_vld_cyc_num': %03d, 'req_hsk_cyc_num': %03d, 'util': %.2f\n", + g, t_i, p, + tile_level_profile_q[g][t_i].req_vld_cyc_num[p], + tile_level_profile_q[g][t_i].req_hsk_cyc_num[p], + (tile_level_profile_q[g][t_i].req_hsk_cyc_num[p]*1.0)/(tile_level_profile_q[g][t_i].req_vld_cyc_num[p]*1.0) + ); + $fwrite(f_final_2, extras_str_final_2); + end + end + end + $fclose(f_final_2); + + // group level + for(int g = 0; g < NumGroups; g++) begin + int unsigned req_vld_cyc_num_sum; + int unsigned req_hsk_cyc_num_sum; + automatic string extras_str_final_3; + req_vld_cyc_num_sum = 0; + req_hsk_cyc_num_sum = 0; + for (int p = 0; p < (NumRemotePortsPerTile-1); p++) begin + req_vld_cyc_num_sum += group_level_profile_q[g].req_vld_cyc_num[p]; + req_hsk_cyc_num_sum += group_level_profile_q[g].req_hsk_cyc_num[p]; + end + extras_str_final_3 = $sformatf("{'GROUP': %03d, 'req_vld_cyc_num': %03d, 'req_hsk_cyc_num': %03d, 'req_vld_cyc_more_than_one_hit_same_bank_num': %03d, 'util': %.2f\n", + g, + req_vld_cyc_num_sum, + req_hsk_cyc_num_sum, + group_level_profile_q[g].req_vld_cyc_more_than_one_hit_same_bank_num, + (req_hsk_cyc_num_sum*1.0)/((req_vld_cyc_num_sum-group_level_profile_q[g].req_vld_cyc_more_than_one_hit_same_bank_num)*1.0) + ); + $fwrite(f_final_3, extras_str_final_3); + end + $fclose(f_final_3); + + // router level + for(int g = 0; g < NumGroups; g++) begin + for(int t = 0; t < NumTilesPerGroup; t++) begin + for(int p = 0; p < (NumRemotePortsPerTile-1); p++) begin + for(int req_rsp = 0; req_rsp < 2; req_rsp++) begin + for(int dir = 0; dir < 4; dir++) begin + automatic string extras_str_final_4; + extras_str_final_4 = $sformatf("{'GROUP': %03d, 'TILE': %03d, 'PORT': %03d, 'REQ_RSP': %03d, 'DIR': %03d, 'in_vld_cyc_num': %03d, 'in_hsk_cyc_num': %03d, 'out_vld_cyc_num': %03d, 'out_hsk_cyc_num': %03d, 'in_util': %.2f, 'out_util': %.2f\n", + g, t, p, req_rsp, dir, + router_level_profile_q[g][t][p][req_rsp].in_vld_cyc_num[dir], + router_level_profile_q[g][t][p][req_rsp].in_hsk_cyc_num[dir], + router_level_profile_q[g][t][p][req_rsp].out_vld_cyc_num[dir], + router_level_profile_q[g][t][p][req_rsp].out_hsk_cyc_num[dir], + (router_level_profile_q[g][t][p][req_rsp].in_hsk_cyc_num[dir]*1.0)/(router_level_profile_q[g][t][p][req_rsp].in_vld_cyc_num[dir]*1.0), + (router_level_profile_q[g][t][p][req_rsp].out_hsk_cyc_num[dir]*1.0)/(router_level_profile_q[g][t][p][req_rsp].out_vld_cyc_num[dir]*1.0) + ); + $fwrite(f_final_4, extras_str_final_4); + end + end + end + end + end + $fclose(f_final_4); + + end + + +`endif + `ifdef SPM_PROFILING int f_0, f_final_0; int f_1, f_final_1; @@ -464,16 +861,6 @@ module mempool_tb; profile_t dbg_profile_q[NumGroups-1:0][NumTilesPerGroup-1:0][NumBanksPerTile-1:0][2**TCDMAddrMemWidth-1:0]; - logic [63:0] cycle_q; - - always_ff @(posedge clk or negedge rst_n) begin - if(~rst_n) begin - cycle_q <= '0; - end else begin - cycle_q <= cycle_q + 64'd1; - end - end - generate for (genvar g = 0; g < NumGroups; g++) begin for (genvar t = 0; t < NumTilesPerGroup; t++) begin