Skip to content

Commit e4b241a

Browse files
committed
Create wrapper for PULP cluster
1 parent 95035dc commit e4b241a

File tree

2 files changed

+234
-0
lines changed

2 files changed

+234
-0
lines changed

Bender.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ sources:
3636
# Level 3
3737
- src/snitch_icache.sv
3838
- src/snitch_read_only_cache.sv
39+
- target: pulp
40+
files:
41+
- src/pulp_icache_wrap.sv
3942
- target: test
4043
files:
4144
- test/snitch_icache_l0_tb.sv

src/pulp_icache_wrap.sv

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
// Copyright 2024 ETH Zurich and University of Bologna.
2+
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
3+
// SPDX-License-Identifier: SHL-0.51
4+
5+
// Michael Rogenmoser <michaero@iis.ee.ethz.ch>
6+
7+
`include "common_cells/registers.svh"
8+
9+
/// Porting from hier-icache:
10+
/// Missing: different line width, banks in L1, L0 is fully associative
11+
/// [PRI_FETCH_DATA_WIDTH == SH_FETCH_DATA_WIDTH]
12+
/// [SH_NB_BANKS == 1]
13+
/// [PRI_NB_WAYS == 1]
14+
/// [SH_CACHE_LINE == PRI_CACHE_LINE]
15+
/// NumFetchPorts = NB_CORES
16+
/// L0_LINE_COUNT = SH_CACHE_SIZE/(bytes per line)
17+
/// LINE_WIDTH = X_CACHE_LINE * DATA_WIDTH
18+
/// LINE_COUNT = SH_CACHE_SIZE/(bytes per line)
19+
/// SET_COUNT = SH_NB_WAYS
20+
module pulp_icache_wrap #(
21+
/// Number of request (fetch) ports
22+
parameter int NumFetchPorts = -1,
23+
/// L0 Cache Line Count
24+
parameter int L0_LINE_COUNT = -1,
25+
/// Cache Line Width
26+
parameter int LINE_WIDTH = -1,
27+
/// The number of cache lines per set. Power of two; >= 2.
28+
parameter int LINE_COUNT = -1,
29+
/// The set associativity of the cache. Power of two; >= 1.
30+
parameter int SET_COUNT = 1,
31+
/// Fetch interface address width. Same as FILL_AW; >= 1.
32+
parameter int FetchAddrWidth = -1,
33+
/// Fetch interface data width. Power of two; >= 8.
34+
parameter int FetchDataWidth = -1,
35+
/// Fill interface address width. Same as FETCH_AW; >= 1.
36+
parameter int AxiAddrWidth = -1,
37+
/// Fill interface data width. Power of two; >= 8.
38+
parameter int AxiDataWidth = -1,
39+
/// Configuration input types for memory cuts used in implementation.
40+
parameter type sram_cfg_data_t = logic,
41+
parameter type sram_cfg_tag_t = logic,
42+
43+
parameter type axi_req_t = logic,
44+
parameter type axi_rsp_t = logic
45+
) (
46+
input logic clk_i,
47+
input logic rst_ni,
48+
49+
// Processor interface
50+
input logic [NumFetchPorts-1:0] fetch_req_i,
51+
input logic [NumFetchPorts-1:0][FetchAddrWidth-1:0] fetch_addr_i,
52+
output logic [NumFetchPorts-1:0] fetch_gnt_o,
53+
output logic [NumFetchPorts-1:0] fetch_rvalid_o,
54+
output logic [NumFetchPorts-1:0][FetchDataWidth-1:0] fetch_rdata_o,
55+
output logic [NumFetchPorts-1:0] fetch_rerror_o,
56+
57+
input logic enable_prefetching_i,
58+
output snitch_icache_pkg::icache_events_t [NumFetchPorts-1:0] icache_events_o,
59+
input logic [NumFetchPorts-1:0] flush_valid_i,
60+
output logic [NumFetchPorts-1:0] flush_ready_o,
61+
62+
// SRAM configs
63+
input sram_cfg_data_t sram_cfg_data_i,
64+
input sram_cfg_tag_t sram_cfg_tag_i,
65+
66+
// AXI interface
67+
output axi_req_t axi_req_o,
68+
input axi_rsp_t axi_rsp_i
69+
);
70+
localparam int unsigned AdapterType = 1;
71+
72+
logic [NumFetchPorts-1:0] fetch_valid, fetch_ready, fetch_rerror;
73+
logic [NumFetchPorts-1:0][FetchAddrWidth-1:0] fetch_addr;
74+
logic [NumFetchPorts-1:0][FetchDataWidth-1:0] fetch_rdata;
75+
76+
for (genvar i = 0; i < NumFetchPorts; i++) begin : gen_adapter
77+
if (AdapterType == 0) begin : gen_response_cut
78+
79+
// Reuquires the core to keep data applied steady while req is high, may not be guaranteed...
80+
spill_register #(
81+
.T (logic [FetchDataWidth-1+1:0]),
82+
.Bypass(1'b0)
83+
) i_spill_reg (
84+
.clk_i,
85+
.rst_ni,
86+
.valid_i ( fetch_ready [i] ),
87+
.ready_o ( /* Unconnected as always ready */ ),
88+
.data_i ( {fetch_rdata [i], fetch_rerror [i]} ),
89+
.valid_o ( fetch_rvalid_o[i] ),
90+
.ready_i ( '1 ),
91+
.data_o ( {fetch_rdata_o[i], fetch_rerror_o[i]} )
92+
);
93+
94+
assign fetch_addr[i] = fetch_addr_i[i];
95+
assign fetch_valid[i] = fetch_req_i[i];
96+
assign fetch_gnt_o[i] = fetch_ready[i];
97+
98+
end else if (AdapterType == 1) begin : gen_request_cut
99+
100+
logic gnt;
101+
102+
assign fetch_gnt_o[i] = gnt & fetch_req_i[i];
103+
104+
spill_register #(
105+
.T (logic [FetchAddrWidth-1:0]),
106+
.Bypass(1'b0)
107+
) i_spill_reg (
108+
.clk_i,
109+
.rst_ni,
110+
.valid_i ( fetch_req_i [i] ),
111+
.ready_o ( gnt ),
112+
.data_i ( fetch_addr_i[i] ),
113+
.valid_o ( fetch_valid [i] ),
114+
.ready_i ( fetch_ready [i] ),
115+
.data_o ( fetch_addr [i] )
116+
);
117+
118+
assign fetch_rdata_o [i] = fetch_rdata [i];
119+
assign fetch_rerror_o[i] = fetch_rerror[i];
120+
assign fetch_rvalid_o[i] = fetch_ready [i] & fetch_valid[i];
121+
122+
end else begin : gen_flexible_cut
123+
// This can still be improved, there is still an extra stall cycle sometimes AFAIK...
124+
125+
logic stalled_d, stalled_q;
126+
127+
logic spill_valid, spill_ready;
128+
logic [FetchAddrWidth-1:0] spill_addr;
129+
130+
spill_register #(
131+
.T (logic [FetchAddrWidth-1:0]),
132+
.Bypass(1'b0)
133+
) i_req_spill_reg (
134+
.clk_i,
135+
.rst_ni,
136+
.valid_i ( fetch_req_i [i] ),
137+
.ready_o ( fetch_gnt_o [i] ),
138+
.data_i ( fetch_addr_i[i] ),
139+
.valid_o ( spill_valid ),
140+
.ready_i ( spill_ready ),
141+
.data_o ( spill_addr )
142+
);
143+
144+
always_comb begin
145+
// Keep steady state
146+
stalled_d = stalled_q;
147+
148+
// If already stalled
149+
if (stalled_q) begin
150+
// only revert back to unstalled state with sufficient gap
151+
if (!spill_valid && !fetch_req_i[i])
152+
stalled_d = 1'b0;
153+
end else begin
154+
if (fetch_req_i[i] && !fetch_ready[i])
155+
stalled_d = 1'b1;
156+
end
157+
end
158+
`FF(stalled_q, stalled_d, '0)
159+
160+
assign fetch_valid[i] = stalled_q ? spill_valid : fetch_req_i[i];
161+
assign fetch_addr [i] = stalled_q ? spill_addr : fetch_addr_i[i];
162+
163+
logic spill_rvalid;
164+
logic spill_rerror;
165+
logic [FetchDataWidth-1:0] spill_rdata;
166+
167+
spill_register #(
168+
.T (logic [FetchDataWidth-1+1:0]),
169+
.Bypass(1'b0)
170+
) i_rsp_spill_reg (
171+
.clk_i,
172+
.rst_ni,
173+
.valid_i ( fetch_ready [i] ),
174+
.ready_o ( /* Unconnected as always ready */ ),
175+
.data_i ( {fetch_rdata[i], fetch_rerror[i]} ),
176+
.valid_o ( spill_rvalid ),
177+
.ready_i ( '1 ),
178+
.data_o ( {spill_rdata , spill_rerror } )
179+
);
180+
181+
assign fetch_rvalid_o[i] = stalled_q ? fetch_ready[i] : spill_rvalid;
182+
assign fetch_rdata_o [i] = stalled_q ? fetch_rdata [i] : spill_rdata;
183+
assign fetch_rerror_o[i] = stalled_q ? fetch_rerror[i] : spill_rerror;
184+
185+
end
186+
end
187+
188+
snitch_icache #(
189+
.NR_FETCH_PORTS ( NumFetchPorts ),
190+
.L0_LINE_COUNT ( L0_LINE_COUNT ),
191+
.LINE_WIDTH ( LINE_WIDTH ),
192+
.LINE_COUNT ( LINE_COUNT ),
193+
.SET_COUNT ( SET_COUNT ),
194+
.FETCH_AW ( FetchAddrWidth ),
195+
.FETCH_DW ( FetchDataWidth ),
196+
.FILL_AW ( AxiAddrWidth ),
197+
.FILL_DW ( AxiDataWidth ),
198+
.L1_TAG_SCM ( 1 ),
199+
.SERIAL_LOOKUP ( 1 ),
200+
.NUM_AXI_OUTSTANDING( 4 ),
201+
.EARLY_LATCH ( 0 ),
202+
.ISO_CROSSING ( 0 ),
203+
.sram_cfg_data_t ( sram_cfg_data_t ),
204+
.sram_cfg_tag_t ( sram_cfg_tag_t ),
205+
.axi_req_t ( axi_req_t ),
206+
.axi_rsp_t ( axi_rsp_t )
207+
) i_snitch_icache (
208+
.clk_i,
209+
.clk_d2_i ( clk_i ),
210+
.rst_ni,
211+
212+
.enable_prefetching_i,
213+
.icache_events_o,
214+
.flush_valid_i,
215+
.flush_ready_o,
216+
217+
.inst_addr_i ( fetch_addr ),
218+
.inst_data_o ( fetch_rdata ),
219+
.inst_cacheable_i ( {NumFetchPorts{1'b1}} ),
220+
.inst_valid_i ( fetch_valid ),
221+
.inst_ready_o ( fetch_ready ),
222+
.inst_error_o ( fetch_rerror ),
223+
224+
.sram_cfg_data_i,
225+
.sram_cfg_tag_i,
226+
227+
.axi_req_o,
228+
.axi_rsp_i
229+
);
230+
231+
endmodule

0 commit comments

Comments
 (0)