Skip to content

Commit ba75079

Browse files
committed
hw: Add SPM mode, to substitute L1 I$ with cacheable SPM
- Non-cacheable regions still bypass both L0 and L1, generating a AXI transaction which brings an instruction directly into the core IF - Cacheable regions exploit L0 prefetcher normally - If L1 is enabled, L0 misses cause a fetch from L1, which in turn can refill from AXI - If SPM mode is enabled, the L1 I$ is not instantiated, and L0 misses directly generate an AXI request
1 parent 64e21ae commit ba75079

File tree

3 files changed

+249
-3
lines changed

3 files changed

+249
-3
lines changed

Bender.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ sources:
2525
- src/snitch_icache_pkg.sv
2626
- src/riscv_instr_branch.sv
2727
- src/multi_accept_rr_arb.sv
28+
- src/snitch_icache_l0_to_axi.sv
2829
# Level 1
2930
- src/snitch_axi_to_cache.sv
3031
- src/snitch_icache_l0.sv

src/snitch_icache.sv

Lines changed: 104 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,17 @@ module snitch_icache import snitch_icache_pkg::*; #(
2626
parameter int unsigned FILL_AW = -1,
2727
/// Fill interface data width. Power of two; >= 8.
2828
parameter int unsigned FILL_DW = -1,
29+
/// Instruction SPM size in kB. If > 0, bypasses L1 entirely
30+
parameter int unsigned InstrSpmSize = 0,
2931
/// Allow fetches to have priority over prefetches for L0 to L1
3032
parameter bit FETCH_PRIORITY = 1'b0,
3133
/// Merge L0-L1 fetches if requesting the same address
3234
parameter bit MERGE_FETCHES = 1'b0,
33-
/// Serialize the L1 lookup (parallel tag/data lookup by default)
35+
/// Serialize the L1 lookup (parallel tag/data lookup by default) - UNUSED when InstrSpmSize > 0
3436
parameter bit SERIAL_LOOKUP = 0,
35-
/// Replace the L1 tag banks with latch-based SCM.
37+
/// Replace the L1 tag banks with latch-based SCM. - UNUSED when InstrSpmSize > 0
3638
parameter bit L1_TAG_SCM = 0,
37-
/// Number of pending response beats for the L1 cache.
39+
/// Number of pending response beats for the L1 cache. - UNUSED when InstrSpmSize > 0
3840
parameter int unsigned NUM_AXI_OUTSTANDING = 2,
3941
/// This reduces area impact at the cost of
4042
/// increased hassle of having latches in
@@ -78,6 +80,7 @@ module snitch_icache import snitch_icache_pkg::*; #(
7880
input sram_cfg_data_t sram_cfg_data_i,
7981
input sram_cfg_tag_t sram_cfg_tag_i,
8082

83+
// AXI master interface (for L0 prefetchers when InstrSpmSize > 0, or L1 refill otherwise)
8184
output axi_req_t axi_req_o,
8285
input axi_rsp_t axi_rsp_i
8386
);
@@ -479,6 +482,102 @@ module snitch_icache import snitch_icache_pkg::*; #(
479482
end
480483
assign prefetch_lookup_rsp_ready = |prefetch_rsp_ready;
481484

485+
// ==============================================
486+
// L0 Prefetcher Backend: SPM mode vs L1 mode
487+
// ==============================================
488+
489+
if (InstrSpmSize > 0) begin : gen_spm_mode
490+
// SPM mode: L0 prefetchers access instruction SPM via AXI (cacheable)
491+
// Bypass path still active for non-cacheable bootrom accesses
492+
// No L1 cache instantiated
493+
494+
miss_refill_req_t spm_req;
495+
logic spm_req_valid, spm_req_ready;
496+
497+
// Build SPM request (cacheable path)
498+
assign spm_req.addr = prefetch_lookup_req.addr;
499+
assign spm_req.id = prefetch_lookup_req.id[CFG.PENDING_IW-1:0];
500+
assign spm_req.bypass = 1'b0;
501+
502+
// Store full ID for response reconstruction
503+
logic [CFG.ID_WIDTH-1:0] spm_id_q;
504+
logic spm_id_valid;
505+
506+
`FF(spm_id_q, prefetch_lookup_req.id, '0)
507+
`FF(spm_id_valid, spm_req_valid && spm_req_ready && !spm_req.bypass, 1'b0)
508+
509+
// Arbitrate between cacheable (SPM) and non-cacheable (bootrom) requests
510+
stream_arbiter #(
511+
.DATA_T ( miss_refill_req_t ),
512+
.N_INP ( 2 )
513+
) i_stream_arbiter_spm_bypass (
514+
.clk_i,
515+
.rst_ni,
516+
.inp_data_i ( {bypass_req_q, spm_req} ),
517+
.inp_valid_i ( {bypass_req_valid_q, spm_req_valid} ),
518+
.inp_ready_o ( {bypass_req_ready_q, spm_req_ready} ),
519+
.oup_data_o ( refill_req ),
520+
.oup_valid_o ( refill_req_valid ),
521+
.oup_ready_i ( refill_req_ready )
522+
);
523+
524+
assign spm_req_valid = prefetch_lookup_req_valid;
525+
assign prefetch_lookup_req_ready = spm_req_ready;
526+
527+
// Use refill module to handle AXI transactions for both paths
528+
snitch_icache_refill #(
529+
.CFG ( CFG ),
530+
.axi_req_t ( axi_req_t ),
531+
.axi_rsp_t ( axi_rsp_t )
532+
) i_refill (
533+
.clk_i,
534+
.rst_ni,
535+
536+
.in_req_addr_i ( refill_req.addr ),
537+
.in_req_id_i ( refill_req.id ),
538+
.in_req_bypass_i ( refill_req.bypass ),
539+
.in_req_valid_i ( refill_req_valid ),
540+
.in_req_ready_o ( refill_req_ready ),
541+
542+
.in_rsp_data_o ( refill_rsp.data ),
543+
.in_rsp_error_o ( refill_rsp.error ),
544+
.in_rsp_id_o ( refill_rsp.id ),
545+
.in_rsp_bypass_o ( refill_rsp.bypass ),
546+
.in_rsp_valid_o ( refill_rsp_valid ),
547+
.in_rsp_ready_i ( refill_rsp_ready ),
548+
549+
.axi_req_o ( axi_req_o ),
550+
.axi_rsp_i ( axi_rsp_i )
551+
);
552+
553+
// Demux responses back to cacheable (L0) and bypass paths
554+
stream_demux #(
555+
.N_OUP ( 2 )
556+
) i_stream_demux_spm_bypass (
557+
.inp_valid_i ( refill_rsp_valid ),
558+
.inp_ready_o ( refill_rsp_ready ),
559+
560+
.oup_sel_i ( refill_rsp.bypass ),
561+
562+
.oup_valid_o ( {bypass_rsp_valid, prefetch_lookup_rsp_valid} ),
563+
.oup_ready_i ( {bypass_rsp_ready, prefetch_lookup_rsp_ready} )
564+
);
565+
566+
// Route responses - restore full ID for SPM responses
567+
assign prefetch_lookup_rsp.data = refill_rsp.data;
568+
assign prefetch_lookup_rsp.error = refill_rsp.error;
569+
assign prefetch_lookup_rsp.id = spm_id_valid ? spm_id_q : '0;
570+
assign bypass_rsp = refill_rsp;
571+
572+
// Tie off events and flush
573+
assign icache_l1_events_o = '0;
574+
logic flush_valid;
575+
assign flush_valid = |flush_valid_i;
576+
assign flush_ready_o = {CFG.NR_FETCH_PORTS{flush_valid}}; // Immediate ack
577+
578+
end else begin : gen_l1_mode
579+
// L1 cache mode: Full lookup, handler, and refill logic
580+
482581
/// Tag lookup
483582

484583
// The lookup module contains the actual cache RAMs and performs lookups.
@@ -703,6 +802,8 @@ module snitch_icache import snitch_icache_pkg::*; #(
703802
.axi_rsp_i (axi_rsp_i)
704803
);
705804

805+
end // gen_l1_mode
806+
706807
endmodule
707808

708809
// Translate register interface to refill requests.

src/snitch_icache_l0_to_axi.sv

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
// Copyright 2025 ETH Zurich and University of Bologna.
2+
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
3+
// SPDX-License-Identifier: SHL-0.51
4+
//
5+
// L0 Prefetcher to AXI Adapter (SPM Mode)
6+
// Directly converts L0 prefetch requests to AXI burst reads
7+
// Bypasses L1 cache completely when instruction SPM is present
8+
9+
`include "common_cells/registers.svh"
10+
11+
module snitch_icache_l0_to_axi import snitch_icache_pkg::*; #(
12+
parameter snitch_icache_pkg::config_t CFG = '0,
13+
parameter type axi_req_t = logic,
14+
parameter type axi_rsp_t = logic
15+
) (
16+
input logic clk_i,
17+
input logic rst_ni,
18+
19+
// L0 prefetcher interface
20+
input logic [CFG.FETCH_AW-1:0] in_addr_i,
21+
input logic [CFG.ID_WIDTH-1:0] in_id_i,
22+
input logic in_valid_i,
23+
output logic in_ready_o,
24+
25+
output logic [CFG.ID_WIDTH-1:0] out_id_o,
26+
output logic [CFG.LINE_WIDTH-1:0] out_data_o,
27+
output logic out_error_o,
28+
output logic out_valid_o,
29+
input logic out_ready_i,
30+
31+
// AXI master interface
32+
output axi_req_t axi_req_o,
33+
input axi_rsp_t axi_rsp_i
34+
);
35+
36+
localparam int unsigned AxiDataWidth = CFG.FILL_DW;
37+
localparam int unsigned BeatsPerLine = CFG.LINE_WIDTH / AxiDataWidth;
38+
localparam int unsigned BeatCntWidth = BeatsPerLine > 1 ? $clog2(BeatsPerLine) : 1;
39+
40+
// Align address to cache line boundary
41+
logic [CFG.FETCH_AW-1:0] addr_aligned;
42+
assign addr_aligned = {in_addr_i[CFG.FETCH_AW-1:CFG.LINE_ALIGN], {CFG.LINE_ALIGN{1'b0}}};
43+
44+
// State machine
45+
typedef enum logic [1:0] {
46+
IDLE,
47+
WAIT_AR,
48+
WAIT_R,
49+
PRESENT
50+
} state_e;
51+
52+
state_e state_d, state_q;
53+
54+
// Transaction tracking
55+
logic [CFG.ID_WIDTH-1:0] trans_id_d, trans_id_q;
56+
logic [CFG.LINE_WIDTH-1:0] trans_data_d, trans_data_q;
57+
logic trans_error_d, trans_error_q;
58+
logic [BeatCntWidth-1:0] beat_cnt_d, beat_cnt_q;
59+
60+
// State machine logic
61+
always_comb begin
62+
state_d = state_q;
63+
trans_id_d = trans_id_q;
64+
trans_data_d = trans_data_q;
65+
trans_error_d = trans_error_q;
66+
beat_cnt_d = beat_cnt_q;
67+
68+
in_ready_o = 1'b0;
69+
out_valid_o = 1'b0;
70+
out_id_o = trans_id_q;
71+
out_data_o = trans_data_q;
72+
out_error_o = trans_error_q;
73+
74+
axi_req_o = '0;
75+
76+
case (state_q)
77+
IDLE: begin
78+
if (in_valid_i) begin
79+
// Latch request
80+
trans_id_d = in_id_i;
81+
trans_data_d = '0;
82+
trans_error_d = 1'b0;
83+
beat_cnt_d = '0;
84+
state_d = WAIT_AR;
85+
end
86+
end
87+
88+
WAIT_AR: begin
89+
// Issue AXI AR (read address) transaction
90+
axi_req_o.ar_valid = 1'b1;
91+
axi_req_o.ar.addr = addr_aligned;
92+
axi_req_o.ar.len = BeatsPerLine - 1; // AXI len = bursts - 1
93+
axi_req_o.ar.size = $clog2(AxiDataWidth/8); // Bytes per beat
94+
axi_req_o.ar.burst = 2'b01; // INCR burst
95+
axi_req_o.ar.lock = 1'b0;
96+
axi_req_o.ar.cache = 4'b0010; // Normal non-cacheable bufferable
97+
axi_req_o.ar.prot = 3'b100; // Instruction, non-secure, unprivileged
98+
axi_req_o.ar.qos = 4'b0000;
99+
axi_req_o.ar.region = 4'b0000;
100+
axi_req_o.ar.id = '0;
101+
axi_req_o.ar.user = '0;
102+
103+
if (axi_rsp_i.ar_ready) begin
104+
state_d = WAIT_R;
105+
end
106+
end
107+
108+
WAIT_R: begin
109+
// Receive AXI R (read data) beats
110+
axi_req_o.r_ready = 1'b1;
111+
112+
if (axi_rsp_i.r_valid) begin
113+
// Accumulate data
114+
trans_data_d[(beat_cnt_q * AxiDataWidth) +: AxiDataWidth] = axi_rsp_i.r.data;
115+
trans_error_d = trans_error_q | (axi_rsp_i.r.resp != 2'b00);
116+
beat_cnt_d = beat_cnt_q + 1;
117+
118+
if (axi_rsp_i.r.last) begin
119+
state_d = PRESENT;
120+
end
121+
end
122+
end
123+
124+
PRESENT: begin
125+
out_valid_o = 1'b1;
126+
127+
if (out_ready_i) begin
128+
in_ready_o = 1'b1; // Acknowledge original request
129+
state_d = IDLE;
130+
end
131+
end
132+
133+
default: state_d = IDLE;
134+
endcase
135+
end
136+
137+
// Registers
138+
`FF(state_q, state_d, IDLE)
139+
`FF(trans_id_q, trans_id_d, '0)
140+
`FF(trans_data_q, trans_data_d, '0)
141+
`FF(trans_error_q, trans_error_d, '0)
142+
`FF(beat_cnt_q, beat_cnt_d, '0)
143+
144+
endmodule

0 commit comments

Comments
 (0)