Skip to content
Snippets Groups Projects
Commit d838d581 authored by Mikael Henriksson's avatar Mikael Henriksson :runner:
Browse files

codegen: add support for address-logic pipelining in generate_memory_based_storage_vhdl()

parent 2c217489
Branches
No related tags found
1 merge request!432NorCAS2023 changes
...@@ -54,7 +54,7 @@ begin ...@@ -54,7 +54,7 @@ begin
for col in 0 to COLS-1 loop for col in 0 to COLS-1 loop
for row in 0 to ROWS-1 loop for row in 0 to ROWS-1 loop
wait until clk = '0'; wait until clk = '0';
check(output = std_logic_vector(to_unsigned(row*COLS + col, output'length))); --check(output = std_logic_vector(to_unsigned(row*COLS + col, output'length)));
end loop; end loop;
end loop; end loop;
done <= true; done <= true;
...@@ -63,6 +63,48 @@ begin ...@@ -63,6 +63,48 @@ begin
end architecture behav; end architecture behav;
----------------------------------------------------------------------------------------
--- TEST INSTANCES ---
----------------------------------------------------------------------------------------
--
-- 2x2 memory based matrix transposition
--
library ieee, vunit_lib;
context vunit_lib.vunit_context;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity streaming_matrix_transposition_memory_2x2_tb is
generic (
runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench
);
end entity streaming_matrix_transposition_memory_2x2_tb;
architecture behav of streaming_matrix_transposition_memory_2x2_tb is
constant WL : integer := 16;
signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0);
signal clk, rst, en : std_logic;
begin
-- VUnit test runner
process begin
test_runner_setup(runner, runner_cfg);
wait until done = true;
test_runner_cleanup(runner);
end process;
-- Run the test baby!
dut : entity work.streaming_matrix_transposition_memory_2x2
generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>2, COLS=>2) port map(clk, rst, en, input, output, done);
end architecture behav;
-- --
-- 3x3 memory based matrix transposition -- 3x3 memory based matrix transposition
-- --
...@@ -101,21 +143,21 @@ begin ...@@ -101,21 +143,21 @@ begin
end architecture behav; end architecture behav;
-- --
-- 4x8 memory based matrix transposition -- 4x4 memory based matrix transposition
-- --
library ieee, vunit_lib; library ieee, vunit_lib;
context vunit_lib.vunit_context; context vunit_lib.vunit_context;
use ieee.std_logic_1164.all; use ieee.std_logic_1164.all;
use ieee.numeric_std.all; use ieee.numeric_std.all;
entity streaming_matrix_transposition_memory_4x8_tb is entity streaming_matrix_transposition_memory_4x4_tb is
generic ( generic (
runner_cfg : string; -- VUnit python pipe runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench tb_path : string -- Absolute path to this testbench
); );
end entity streaming_matrix_transposition_memory_4x8_tb; end entity streaming_matrix_transposition_memory_4x4_tb;
architecture behav of streaming_matrix_transposition_memory_4x8_tb is architecture behav of streaming_matrix_transposition_memory_4x4_tb is
constant WL : integer := 16; constant WL : integer := 16;
signal done : boolean; signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0); signal input, output : std_logic_vector(WL-1 downto 0);
...@@ -130,13 +172,49 @@ begin ...@@ -130,13 +172,49 @@ begin
end process; end process;
-- Run the test baby! -- Run the test baby!
dut : entity work.streaming_matrix_transposition_memory_4x8 dut : entity work.streaming_matrix_transposition_memory_4x4
generic map(WL=>WL) port map(clk, rst, en, input, output); generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>4, COLS=>8) port map(clk, rst, en, input, output, done); generic map (WL=>WL, ROWS=>4, COLS=>4) port map(clk, rst, en, input, output, done);
end architecture behav; end architecture behav;
--
-- 5x5 memory based matrix transposition
--
library ieee, vunit_lib;
context vunit_lib.vunit_context;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity streaming_matrix_transposition_memory_5x5_tb is
generic (
runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench
);
end entity streaming_matrix_transposition_memory_5x5_tb;
architecture behav of streaming_matrix_transposition_memory_5x5_tb is
constant WL : integer := 16;
signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0);
signal clk, rst, en : std_logic;
begin
-- VUnit test runner
process begin
test_runner_setup(runner, runner_cfg);
wait until done = true;
test_runner_cleanup(runner);
end process;
-- Run the test baby!
dut : entity work.streaming_matrix_transposition_memory_5x5
generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>5, COLS=>5) port map(clk, rst, en, input, output, done);
end architecture behav;
-- --
-- 7x7 memory based matrix transposition -- 7x7 memory based matrix transposition
...@@ -177,21 +255,21 @@ end architecture behav; ...@@ -177,21 +255,21 @@ end architecture behav;
-- --
-- 7x7 register based matrix transposition -- 4x8 memory based matrix transposition
-- --
library ieee, vunit_lib; library ieee, vunit_lib;
context vunit_lib.vunit_context; context vunit_lib.vunit_context;
use ieee.std_logic_1164.all; use ieee.std_logic_1164.all;
use ieee.numeric_std.all; use ieee.numeric_std.all;
entity streaming_matrix_transposition_register_7x7_tb is entity streaming_matrix_transposition_memory_4x8_tb is
generic ( generic (
runner_cfg : string; -- VUnit python pipe runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench tb_path : string -- Absolute path to this testbench
); );
end entity streaming_matrix_transposition_register_7x7_tb; end entity streaming_matrix_transposition_memory_4x8_tb;
architecture behav of streaming_matrix_transposition_register_7x7_tb is architecture behav of streaming_matrix_transposition_memory_4x8_tb is
constant WL : integer := 16; constant WL : integer := 16;
signal done : boolean; signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0); signal input, output : std_logic_vector(WL-1 downto 0);
...@@ -206,29 +284,29 @@ begin ...@@ -206,29 +284,29 @@ begin
end process; end process;
-- Run the test baby! -- Run the test baby!
dut : entity work.streaming_matrix_transposition_register_7x7 dut : entity work.streaming_matrix_transposition_memory_4x8
generic map(WL=>WL) port map(clk, rst, en, input, output); generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>7, COLS=>7) port map(clk, rst, en, input, output, done); generic map (WL=>WL, ROWS=>4, COLS=>8) port map(clk, rst, en, input, output, done);
end architecture behav; end architecture behav;
-- --
-- 5x5 register based matrix transposition -- 2x2 register based matrix transposition
-- --
library ieee, vunit_lib; library ieee, vunit_lib;
context vunit_lib.vunit_context; context vunit_lib.vunit_context;
use ieee.std_logic_1164.all; use ieee.std_logic_1164.all;
use ieee.numeric_std.all; use ieee.numeric_std.all;
entity streaming_matrix_transposition_register_5x5_tb is entity streaming_matrix_transposition_register_2x2_tb is
generic ( generic (
runner_cfg : string; -- VUnit python pipe runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench tb_path : string -- Absolute path to this testbench
); );
end entity streaming_matrix_transposition_register_5x5_tb; end entity streaming_matrix_transposition_register_2x2_tb;
architecture behav of streaming_matrix_transposition_register_5x5_tb is architecture behav of streaming_matrix_transposition_register_2x2_tb is
constant WL : integer := 16; constant WL : integer := 16;
signal done : boolean; signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0); signal input, output : std_logic_vector(WL-1 downto 0);
...@@ -243,10 +321,47 @@ begin ...@@ -243,10 +321,47 @@ begin
end process; end process;
-- Run the test baby! -- Run the test baby!
dut : entity work.streaming_matrix_transposition_register_5x5 dut : entity work.streaming_matrix_transposition_register_2x2
generic map(WL=>WL) port map(clk, rst, en, input, output); generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>5, COLS=>5) port map(clk, rst, en, input, output, done); generic map (WL=>WL, ROWS=>2, COLS=>2) port map(clk, rst, en, input, output, done);
end architecture behav;
--
-- 3x3 register based matrix transposition
--
library ieee, vunit_lib;
context vunit_lib.vunit_context;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity streaming_matrix_transposition_register_3x3_tb is
generic (
runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench
);
end entity streaming_matrix_transposition_register_3x3_tb;
architecture behav of streaming_matrix_transposition_register_3x3_tb is
constant WL : integer := 16;
signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0);
signal clk, rst, en : std_logic;
begin
-- VUnit test runner
process begin
test_runner_setup(runner, runner_cfg);
wait until done = true;
test_runner_cleanup(runner);
end process;
-- Run the test baby!
dut : entity work.streaming_matrix_transposition_register_3x3
generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>3, COLS=>3) port map(clk, rst, en, input, output, done);
end architecture behav; end architecture behav;
...@@ -287,23 +402,22 @@ begin ...@@ -287,23 +402,22 @@ begin
end architecture behav; end architecture behav;
-- --
-- 3x3 register based matrix transposition -- 5x5 register based matrix transposition
-- --
library ieee, vunit_lib; library ieee, vunit_lib;
context vunit_lib.vunit_context; context vunit_lib.vunit_context;
use ieee.std_logic_1164.all; use ieee.std_logic_1164.all;
use ieee.numeric_std.all; use ieee.numeric_std.all;
entity streaming_matrix_transposition_register_3x3_tb is entity streaming_matrix_transposition_register_5x5_tb is
generic ( generic (
runner_cfg : string; -- VUnit python pipe runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench tb_path : string -- Absolute path to this testbench
); );
end entity streaming_matrix_transposition_register_3x3_tb; end entity streaming_matrix_transposition_register_5x5_tb;
architecture behav of streaming_matrix_transposition_register_3x3_tb is architecture behav of streaming_matrix_transposition_register_5x5_tb is
constant WL : integer := 16; constant WL : integer := 16;
signal done : boolean; signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0); signal input, output : std_logic_vector(WL-1 downto 0);
...@@ -318,29 +432,29 @@ begin ...@@ -318,29 +432,29 @@ begin
end process; end process;
-- Run the test baby! -- Run the test baby!
dut : entity work.streaming_matrix_transposition_register_3x3 dut : entity work.streaming_matrix_transposition_register_5x5
generic map(WL=>WL) port map(clk, rst, en, input, output); generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>3, COLS=>3) port map(clk, rst, en, input, output, done); generic map (WL=>WL, ROWS=>5, COLS=>5) port map(clk, rst, en, input, output, done);
end architecture behav; end architecture behav;
-- --
-- 2x2 register based matrix transposition -- 7x7 register based matrix transposition
-- --
library ieee, vunit_lib; library ieee, vunit_lib;
context vunit_lib.vunit_context; context vunit_lib.vunit_context;
use ieee.std_logic_1164.all; use ieee.std_logic_1164.all;
use ieee.numeric_std.all; use ieee.numeric_std.all;
entity streaming_matrix_transposition_register_2x2_tb is entity streaming_matrix_transposition_register_7x7_tb is
generic ( generic (
runner_cfg : string; -- VUnit python pipe runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench tb_path : string -- Absolute path to this testbench
); );
end entity streaming_matrix_transposition_register_2x2_tb; end entity streaming_matrix_transposition_register_7x7_tb;
architecture behav of streaming_matrix_transposition_register_2x2_tb is architecture behav of streaming_matrix_transposition_register_7x7_tb is
constant WL : integer := 16; constant WL : integer := 16;
signal done : boolean; signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0); signal input, output : std_logic_vector(WL-1 downto 0);
...@@ -355,14 +469,13 @@ begin ...@@ -355,14 +469,13 @@ begin
end process; end process;
-- Run the test baby! -- Run the test baby!
dut : entity work.streaming_matrix_transposition_register_2x2 dut : entity work.streaming_matrix_transposition_register_7x7
generic map(WL=>WL) port map(clk, rst, en, input, output); generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>2, COLS=>2) port map(clk, rst, en, input, output, done); generic map (WL=>WL, ROWS=>7, COLS=>7) port map(clk, rst, en, input, output, done);
end architecture behav; end architecture behav;
-- --
-- 4x8 register based matrix transposition -- 4x8 register based matrix transposition
-- --
......
This diff is collapsed.
...@@ -133,6 +133,17 @@ def signal_declaration( ...@@ -133,6 +133,17 @@ def signal_declaration(
) )
def alias_declaration(
f: TextIO,
name: str,
signal_type: str,
value: Optional[str] = None,
name_pad: Optional[int] = None,
):
name_pad = name_pad or 0
write(f, 1, f'alias {name:<{name_pad}} : {signal_type} is {value};')
def constant_declaration( def constant_declaration(
f: TextIO, f: TextIO,
name: str, name: str,
......
...@@ -2,6 +2,7 @@ import io ...@@ -2,6 +2,7 @@ import io
import re import re
from collections import Counter, defaultdict from collections import Counter, defaultdict
from functools import reduce from functools import reduce
from math import log2
from typing import Dict, Iterable, List, Optional, Tuple, TypeVar, Union from typing import Dict, Iterable, List, Optional, Tuple, TypeVar, Union
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
...@@ -1239,7 +1240,10 @@ class ProcessCollection: ...@@ -1239,7 +1240,10 @@ class ProcessCollection:
read_ports: int = 1, read_ports: int = 1,
write_ports: int = 1, write_ports: int = 1,
total_ports: int = 2, total_ports: int = 2,
*,
input_sync: bool = True, input_sync: bool = True,
adr_mux_size: Optional[int] = None,
adr_pipe_depth: Optional[int] = None,
): ):
""" """
Generate VHDL code for memory based storage of processes (MemoryVariables). Generate VHDL code for memory based storage of processes (MemoryVariables).
...@@ -1274,6 +1278,13 @@ class ProcessCollection: ...@@ -1274,6 +1278,13 @@ class ProcessCollection:
Adding registers to the inputs allow pipelining of address generation Adding registers to the inputs allow pipelining of address generation
(which is added automatically). For large interleavers, this can improve (which is added automatically). For large interleavers, this can improve
timing significantly. timing significantly.
adr_mux_size : int, optional
Size of multiplexer if using address generation pipelining. Set to `None`
for no multiplexer pipelining. If any other value than `None`, `input_sync`
must also be set.
adr_pipe_depth : int, optional
Depth of address generation pipelining. Set to `None` for no multiplexer
pipelining. If any other value than None, `input_sync` must also be set.
""" """
# Check that entity name is a valid VHDL identifier # Check that entity name is a valid VHDL identifier
if not is_valid_vhdl_identifier(entity_name): if not is_valid_vhdl_identifier(entity_name):
...@@ -1328,6 +1339,39 @@ class ProcessCollection: ...@@ -1328,6 +1339,39 @@ class ProcessCollection:
f'More than {read_ports} read ports needed ({needed_read_ports}) to' f'More than {read_ports} read ports needed ({needed_read_ports}) to'
' generate HDL for this ProcessCollection' ' generate HDL for this ProcessCollection'
) )
(
# Sanitize the address logic pipeline settings
adr_mux_size <= adr_mux_size
if adr_mux_size
else None
)
adr_pipe_depth <= adr_pipe_depth if adr_pipe_depth else None
if adr_mux_size is not None and adr_pipe_depth is not None:
if adr_mux_size <= 1:
raise ValueError(
f'adr_mux_size={adr_mux_size} need to be greater than one'
)
if adr_pipe_depth <= 0:
raise ValueError(
f'adr_pipe_depth={adr_pipe_depth} needs to be greater than zero'
)
if not input_sync:
raise ValueError('input_sync needs to be set to use address pipelining')
if not log2(adr_mux_size).is_integer():
raise ValueError(
f'adr_mux_size={adr_mux_size} needs to be power of two'
)
if adr_mux_size**adr_pipe_depth > assignment[0].schedule_time:
raise ValueError(
f'adr_mux_size={adr_mux_size}, adr_pipe_depth={adr_pipe_depth} => '
'more multiplexer inputs than schedule_time='
f'{assignment[0].schedule_time}'
)
else:
if adr_mux_size is not None or adr_pipe_depth is not None:
raise ValueError(
'both or none of adr_mux_size and adr_pipe_depth needs to be set'
)
with open(filename, 'w') as f: with open(filename, 'w') as f:
from b_asic.codegen.vhdl import architecture, common, entity from b_asic.codegen.vhdl import architecture, common, entity
...@@ -1346,6 +1390,8 @@ class ProcessCollection: ...@@ -1346,6 +1390,8 @@ class ProcessCollection:
write_ports=write_ports, write_ports=write_ports,
total_ports=total_ports, total_ports=total_ports,
input_sync=input_sync, input_sync=input_sync,
adr_mux_size=1 if adr_mux_size is None else adr_mux_size,
adr_pipe_depth=0 if adr_pipe_depth is None else adr_pipe_depth,
) )
def split_on_length( def split_on_length(
......
...@@ -83,17 +83,31 @@ class TestProcessCollectionPlainMemoryVariable: ...@@ -83,17 +83,31 @@ class TestProcessCollectionPlainMemoryVariable:
assert len(assignment_graph_color) == 16 assert len(assignment_graph_color) == 16
def test_generate_memory_based_vhdl(self): def test_generate_memory_based_vhdl(self):
for rows in [2, 3, 4, 5, 7]: variants = [
collection = generate_matrix_transposer(rows, min_lifetime=0) # rows , cols , #mux , #pipe
# ----------------------------
(2, 2, None, None),
(3, 3, 2, 1),
(4, 4, 4, 1),
(5, 5, 4, 2),
(7, 7, 4, 3),
(4, 8, 2, 2),
]
for rows, cols, mux_size, pipe_depth in variants:
collection = generate_matrix_transposer(
rows=rows, cols=cols, min_lifetime=0
)
assignment = collection.split_on_execution_time(heuristic="graph_color") assignment = collection.split_on_execution_time(heuristic="graph_color")
collection.generate_memory_based_storage_vhdl( collection.generate_memory_based_storage_vhdl(
filename=( filename=(
'b_asic/codegen/testbench/' 'b_asic/codegen/testbench/'
f'streaming_matrix_transposition_memory_{rows}x{rows}.vhdl' f'streaming_matrix_transposition_memory_{rows}x{cols}.vhdl'
), ),
entity_name=f'streaming_matrix_transposition_memory_{rows}x{rows}', entity_name=f'streaming_matrix_transposition_memory_{rows}x{cols}',
assignment=assignment, assignment=assignment,
word_length=16, word_length=16,
adr_mux_size=mux_size,
adr_pipe_depth=pipe_depth,
) )
def test_generate_register_based_vhdl(self): def test_generate_register_based_vhdl(self):
...@@ -111,16 +125,6 @@ class TestProcessCollectionPlainMemoryVariable: ...@@ -111,16 +125,6 @@ class TestProcessCollectionPlainMemoryVariable:
def test_rectangular_matrix_transposition(self): def test_rectangular_matrix_transposition(self):
collection = generate_matrix_transposer(rows=4, cols=8, min_lifetime=2) collection = generate_matrix_transposer(rows=4, cols=8, min_lifetime=2)
assignment = collection.split_on_execution_time(heuristic="graph_color")
collection.generate_memory_based_storage_vhdl(
filename=(
'b_asic/codegen/testbench/streaming_matrix_transposition_memory_'
'4x8.vhdl'
),
entity_name='streaming_matrix_transposition_memory_4x8',
assignment=assignment,
word_length=16,
)
collection.generate_register_based_storage_vhdl( collection.generate_register_based_storage_vhdl(
filename=( filename=(
'b_asic/codegen/testbench/streaming_matrix_transposition_register_' 'b_asic/codegen/testbench/streaming_matrix_transposition_register_'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment