diff --git a/.clang-format b/.clang-format
index 429ea9a55ee8fa6b0a502e3e1a7cb7c5231e60fc..22e04bab0e95d05981218e51cd6affb85f82a45f 100644
--- a/.clang-format
+++ b/.clang-format
@@ -12,7 +12,7 @@ AllowAllArgumentsOnNextLine: true
 AllowAllConstructorInitializersOnNextLine: false
 AllowAllParametersOfDeclarationOnNextLine: false
 AllowShortBlocksOnASingleLine: Empty
-AllowShortCaseLabelsOnASingleLine: true
+AllowShortCaseLabelsOnASingleLine: false
 AllowShortFunctionsOnASingleLine: Empty
 AllowShortIfStatementsOnASingleLine: Never
 AllowShortLambdasOnASingleLine: Inline
diff --git a/src/simulation/compile.cpp b/src/simulation/compile.cpp
index c0c80480b79707f8267d3cfe4d414560a2a1fe02..31538a87d293ffa109108dac6d8d1107f9c07685 100644
--- a/src/simulation/compile.cpp
+++ b/src/simulation/compile.cpp
@@ -1,406 +1,290 @@
 #include "compile.h"
 
-#include "../debug.h"
 #include "../algorithm.h"
+#include "../debug.h"
 #include "../span.h"
+#include "format_code.h"
 
 #include <Python.h>
-#include <unordered_map>
-#include <string_view>
 #include <fmt/format.h>
-#include <utility>
 #include <limits>
 #include <optional>
+#include <string_view>
+#include <unordered_map>
+#include <utility>
 
 namespace py = pybind11;
- 
-namespace asic {
-
-[[maybe_unused]] [[nodiscard]] static std::string format_number(number const& value) {
-    if (value.imag() == 0) {
-        return fmt::to_string(value.real());
-    }
-    if (value.real() == 0) {
-        return fmt::format("{}j", value.imag());
-    }
-    if (value.imag() < 0) {
-        return fmt::format("{}-{}j", value.real(), -value.imag());
-    }
-    return fmt::format("{}+{}j", value.real(), value.imag());
-}
-
-[[maybe_unused]] [[nodiscard]] static std::string format_compiled_simulation_code_result_keys(simulation_code const& code) {
-    auto result = std::string{};
-    for (auto const& [i, result_key] : enumerate(code.result_keys)) {
-        result += fmt::format("{:>2}: \"{}\"\n", i, result_key);
-    }
-    return result;
-}
-
-[[maybe_unused]] [[nodiscard]] static std::string format_compiled_simulation_code_delays(simulation_code const& code) {
-    auto result = std::string{};
-    for (auto const& [i, delay] : enumerate(code.delays)) {
-        ASIC_ASSERT(delay.result_index < code.result_keys.size());
-        result += fmt::format("{:>2}: Initial value: {}, Result: {}: \"{}\"\n", i, format_number(delay.initial_value), delay.result_index, code.result_keys[delay.result_index]);
-    }
-    return result;
-}
 
-[[maybe_unused]] [[nodiscard]] static std::string format_compiled_simulation_code_instruction(instruction const& instruction) {
-    switch (instruction.type) {
-        case instruction_type::push_input:
-            return fmt::format("push_input inputs[{}]", instruction.index);
-        case instruction_type::push_result:
-            return fmt::format("push_result results[{}]", instruction.index);
-        case instruction_type::push_constant:
-            return fmt::format("push_constant {}", format_number(instruction.value));
-        case instruction_type::truncate:
-            return fmt::format("truncate {:#018x}", instruction.bit_mask);
-        case instruction_type::addition:
-            return "addition";
-        case instruction_type::subtraction:
-            return "subtraction";
-        case instruction_type::multiplication:
-            return "multiplication";
-        case instruction_type::division:
-            return "division";
-        case instruction_type::min:
-            return "min";
-        case instruction_type::max:
-            return "max";
-        case instruction_type::square_root:
-            return "square_root";
-        case instruction_type::complex_conjugate:
-            return "complex_conjugate";
-        case instruction_type::absolute:
-            return "absolute";
-        case instruction_type::constant_multiplication:
-            return fmt::format("constant_multiplication {}", format_number(instruction.value));
-        case instruction_type::delay:
-            return fmt::format("delay delays[{}]", instruction.index);
-        case instruction_type::custom:
-            return fmt::format("custom custom_sources[{}]", instruction.index);
-        case instruction_type::forward_value:
-            return "forward_value";
-    }
-    return std::string{};
-}
-
-[[maybe_unused]] [[nodiscard]] static std::string format_compiled_simulation_code_instructions(simulation_code const& code) {
-    auto result = std::string{};
-    for (auto const& [i, instruction] : enumerate(code.instructions)) {
-        auto instruction_string = format_compiled_simulation_code_instruction(instruction);
-        if (instruction.result_index < code.result_keys.size()) {
-            instruction_string = fmt::format("{:<26} -> {}: \"{}\"", instruction_string, instruction.result_index, code.result_keys[instruction.result_index]);
-        }
-        result += fmt::format("{:>2}: {}\n", i, instruction_string);
-    }
-    return result;
-}
-
-[[maybe_unused]] [[nodiscard]] static std::string format_compiled_simulation_code(simulation_code const& code) {
-    return fmt::format(
-        "==============================================\n"
-        "> Code stats\n"
-        "==============================================\n"
-        "Input count: {}\n"
-        "Output count: {}\n"
-        "Instruction count: {}\n"
-        "Required stack size: {}\n"
-        "Delay count: {}\n"
-        "Result count: {}\n"
-        "Custom operation count: {}\n"
-        "Custom source count: {}\n"
-        "==============================================\n"
-        "> Delays\n"
-        "==============================================\n"
-        "{}"
-        "==============================================\n"
-        "> Result keys\n"
-        "==============================================\n"
-        "{}"
-        "==============================================\n"
-        "> Instructions\n"
-        "==============================================\n"
-        "{}"
-        "==============================================",
-        code.input_count,
-        code.output_count,
-        code.instructions.size(),
-        code.required_stack_size,
-        code.delays.size(),
-        code.result_keys.size(),
-        code.custom_operations.size(),
-        code.custom_sources.size(),
-        format_compiled_simulation_code_delays(code),
-        format_compiled_simulation_code_result_keys(code),
-        format_compiled_simulation_code_instructions(code)
-    );
-}
+namespace asic {
 
 [[nodiscard]] static result_key key_base(py::handle op, std::string_view prefix) {
-    auto const graph_id = op.attr("graph_id").cast<std::string_view>();
-    return (prefix.empty()) ? result_key{graph_id} : fmt::format("{}.{}", prefix, graph_id);
+	auto const graph_id = op.attr("graph_id").cast<std::string_view>();
+	return (prefix.empty()) ? result_key{graph_id} : fmt::format("{}.{}", prefix, graph_id);
 }
 
 [[nodiscard]] static result_key key_of_output(py::handle op, std::size_t output_index, std::string_view prefix) {
-    auto const base = key_base(op, prefix);
-    if (base.empty()) {
-        return fmt::to_string(output_index);
-    }
-    if (op.attr("output_count").cast<std::size_t>() == 1) {
-        return base;
-    }
-    return fmt::format("{}.{}", base, output_index);
+	auto const base = key_base(op, prefix);
+	if (base.empty()) {
+		return fmt::to_string(output_index);
+	}
+	if (op.attr("output_count").cast<std::size_t>() == 1) {
+		return base;
+	}
+	return fmt::format("{}.{}", base, output_index);
 }
 
 class compiler final {
 public:
-    simulation_code compile(pybind11::handle sfg) {
-        ASIC_DEBUG_MSG("Compiling code...");
-        this->initialize_code(sfg.attr("input_count").cast<std::size_t>(), sfg.attr("output_count").cast<std::size_t>());
-        for (auto const i : range(m_code.output_count)) {
-            this->add_operation_output(sfg, i, std::string_view{}, sfg_info_stack{});
-        }
-        this->resolve_invalid_result_indices();
-        ASIC_DEBUG_MSG("Compiled code:\n{}\n", format_compiled_simulation_code(m_code));
-        return std::move(m_code);
-    }
+	simulation_code compile(pybind11::handle sfg) {
+		ASIC_DEBUG_MSG("Compiling code...");
+		this->initialize_code(sfg.attr("input_count").cast<std::size_t>(), sfg.attr("output_count").cast<std::size_t>());
+		for (auto const i : range(m_code.output_count)) {
+			this->add_operation_output(sfg, i, std::string_view{}, sfg_info_stack{});
+		}
+		this->resolve_invalid_result_indices();
+		ASIC_DEBUG_MSG("Compiled code:\n{}\n", format_compiled_simulation_code(m_code));
+		return std::move(m_code);
+	}
 
 private:
-    struct sfg_info final {
-        py::handle sfg;
-        std::size_t prefix_length;
-
-        sfg_info(py::handle sfg, std::size_t prefix_length)
-            : sfg(sfg)
-            , prefix_length(prefix_length) {}
-
-        [[nodiscard]] std::size_t find_input_operation_index(py::handle op) const {
-            for (auto const& [i, in] : enumerate(sfg.attr("input_operations"))) {
-                if (in.is(op)) {
-                    return i;
-                }
-            }
-            throw py::value_error{"Stray Input operation in simulation SFG"};
-        }
-    };
-
-    using sfg_info_stack = std::vector<sfg_info>;
-    using added_output_cache = std::unordered_set<PyObject const*>;
-    using added_result_cache = std::unordered_map<PyObject const*, result_index_t>;
-    using added_custom_operation_cache = std::unordered_map<PyObject const*, std::size_t>;
-
-    static constexpr auto no_result_index = std::numeric_limits<result_index_t>::max();
-
-    void initialize_code(std::size_t input_count, std::size_t output_count) {
-        m_code.required_stack_size = 0;
-        m_code.input_count = input_count;
-        m_code.output_count = output_count;
-    }
-
-    void resolve_invalid_result_indices() {
-        for (auto& instruction : m_code.instructions) {
-            if (instruction.result_index == no_result_index) {
-                instruction.result_index = m_code.result_keys.size();
-            }
-        }
-    }
-
-    [[nodiscard]] static sfg_info_stack push_sfg(sfg_info_stack const& sfg_stack, py::handle sfg, std::size_t prefix_length) {
-        auto const new_size = static_cast<std::size_t>(sfg_stack.size() + 1);
-        auto new_sfg_stack = sfg_info_stack{};
-        new_sfg_stack.reserve(new_size);
-        for (auto const& info : sfg_stack) {
-            new_sfg_stack.push_back(info);
-        }
-        new_sfg_stack.emplace_back(sfg, prefix_length);
-        return new_sfg_stack;
-    }
-
-    [[nodiscard]] static sfg_info_stack pop_sfg(sfg_info_stack const& sfg_stack) {
-        ASIC_ASSERT(!sfg_stack.empty());
-        auto const new_size = static_cast<std::size_t>(sfg_stack.size() - 1);
-        auto new_sfg_stack = sfg_info_stack{};
-        new_sfg_stack.reserve(new_size);
-        for (auto const& info : span{sfg_stack}.first(new_size)) {
-            new_sfg_stack.push_back(info);
-        }
-        return new_sfg_stack;
-    }
-
-    instruction& add_instruction(instruction_type type, result_index_t result_index, std::ptrdiff_t stack_diff) {
-        m_stack_depth += stack_diff;
-        if (m_stack_depth < 0) {
-            throw py::value_error{"Detected input/output count mismatch in simulation SFG"};
-        }
-        if (auto const stack_size = static_cast<std::size_t>(m_stack_depth); stack_size > m_code.required_stack_size) {
-            m_code.required_stack_size = stack_size;
-        }
-        auto& instruction = m_code.instructions.emplace_back();
-        instruction.type = type;
-        instruction.result_index = result_index;
-        return instruction;
-    }
-
-    [[nodiscard]] std::optional<result_index_t> begin_operation_output(py::handle op, std::size_t output_index, std::string_view prefix) {
-        auto const pointer = op.attr("outputs")[py::int_{output_index}].ptr();
-        if (m_incomplete_outputs.count(pointer) != 0) {
-            // Make sure the output doesn't depend on its own value, unless it's a delay operation.
-            if (op.attr("type_name").cast<std::string_view>() != "t") {
-                throw py::value_error{"Direct feedback loop detected in simulation SFG"};
-            }
-        }
-        // Try to add a new result.
-        auto const [it, inserted] = m_added_results.try_emplace(pointer, static_cast<result_index_t>(m_code.result_keys.size()));
-        if (inserted) {
-            if (m_code.result_keys.size() >= static_cast<std::size_t>(std::numeric_limits<result_index_t>::max())) {
-                throw py::value_error{fmt::format("Simulation SFG requires too many outputs to be stored (limit: {})", std::numeric_limits<result_index_t>::max())};
-            }
-            m_code.result_keys.push_back(key_of_output(op, output_index, prefix));
-            m_incomplete_outputs.insert(pointer);
-            return it->second;
-        }
-        // If the result has already been added, we re-use the old result and
-        // return std::nullopt to indicate that we don't need to add all the required instructions again.
-        this->add_instruction(instruction_type::push_result, it->second, 1).index = static_cast<std::size_t>(it->second);
-        return std::nullopt;
-    }
-
-    void end_operation_output(py::handle op, std::size_t output_index) {
-        auto const pointer = op.attr("outputs")[py::int_{output_index}].ptr();
-        [[maybe_unused]] auto const erased = m_incomplete_outputs.erase(pointer);
-        ASIC_ASSERT(erased == 1);
-    }
-
-    [[nodiscard]] std::size_t try_add_custom_operation(py::handle op) {
-        auto const [it, inserted] = m_added_custom_operations.try_emplace(op.ptr(), m_added_custom_operations.size());
-        if (inserted) {
-            auto& custom_operation = m_code.custom_operations.emplace_back();
-            custom_operation.evaluate_output = op.attr("evaluate_output");
-            custom_operation.input_count = op.attr("input_count").cast<std::size_t>();
-            custom_operation.output_count = op.attr("output_count").cast<std::size_t>();
-        }
-        return it->second;
-    }
-
-    [[nodiscard]] std::size_t add_delay_info(number initial_value, result_index_t result_index) {
-        auto const delay_index = m_code.delays.size();
-        auto& delay = m_code.delays.emplace_back();
-        delay.initial_value = initial_value;
-        delay.result_index = result_index;
-        return delay_index;
-    }
-
-    void add_source(py::handle op, std::size_t input_index, std::string_view prefix, sfg_info_stack const& sfg_stack) {
-        auto const signal = py::object{op.attr("inputs")[py::int_{input_index}].attr("signals")[py::int_{0}]};
-        auto const src = py::handle{signal.attr("source")};
-        auto const operation = py::handle{src.attr("operation")};
-        auto const index = src.attr("index").cast<std::size_t>();
-        this->add_operation_output(operation, index, prefix, sfg_stack);
-        if (!signal.attr("bits").is_none()) {
-            auto const bits = signal.attr("bits").cast<std::size_t>();
-            if (bits > 64) {
-                throw py::value_error{"Cannot truncate to more than 64 bits"};
-            }
-            this->add_instruction(instruction_type::truncate, no_result_index, 0).bit_mask = static_cast<std::int64_t>(std::int64_t{1} << bits);
-        }
-    }
-
-    void add_unary_operation_output(py::handle op, result_index_t result_index, std::string_view prefix, sfg_info_stack const& sfg_stack, instruction_type type) {
-        this->add_source(op, 0, prefix, sfg_stack);
-        this->add_instruction(type, result_index, 0);
-    }
-
-    void add_binary_operation_output(py::handle op, result_index_t result_index, std::string_view prefix, sfg_info_stack const& sfg_stack, instruction_type type) {
-        this->add_source(op, 0, prefix, sfg_stack);
-        this->add_source(op, 1, prefix, sfg_stack);
-        this->add_instruction(type, result_index, -1);
-    }
-
-    void add_operation_output(py::handle op, std::size_t output_index, std::string_view prefix, sfg_info_stack const& sfg_stack) {
-        auto const type_name = op.attr("type_name").cast<std::string_view>();
-        if (type_name == "out") {
-            this->add_source(op, 0, prefix, sfg_stack);
-        } else if (auto const result_index = this->begin_operation_output(op, output_index, prefix)) {
-            if (type_name == "c") {
-                this->add_instruction(instruction_type::push_constant, *result_index, 1).value = op.attr("value").cast<number>();
-            } else if (type_name == "add") {
-                this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::addition);
-            } else if (type_name == "sub") {
-                this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::subtraction);
-            } else if (type_name == "mul") {
-                this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::multiplication);
-            } else if (type_name == "div") {
-                this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::division);
-            } else if (type_name == "min") {
-                this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::min);
-            } else if (type_name == "max") {
-                this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::max);
-            } else if (type_name == "sqrt") {
-                this->add_unary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::square_root);
-            } else if (type_name == "conj") {
-                this->add_unary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::complex_conjugate);
-            } else if (type_name == "abs") {
-                this->add_unary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::absolute);
-            } else if (type_name == "cmul") {
-                this->add_source(op, 0, prefix, sfg_stack);
-                this->add_instruction(instruction_type::constant_multiplication, *result_index, 0).value = op.attr("value").cast<number>();
-            } else if (type_name == "bfly") {
-                if (output_index == 0) {
-                    this->add_source(op, 0, prefix, sfg_stack);
-                    this->add_source(op, 1, prefix, sfg_stack);
-                    this->add_instruction(instruction_type::addition, *result_index, -1);
-                } else {
-                    this->add_source(op, 0, prefix, sfg_stack);
-                    this->add_source(op, 1, prefix, sfg_stack);
-                    this->add_instruction(instruction_type::subtraction, *result_index, -1);
-                }
-            } else if (type_name == "in") {
-                if (sfg_stack.empty()) {
-                    throw py::value_error{"Encountered Input operation outside SFG in simulation"};
-                }
-                auto const& info = sfg_stack.back();
-                auto const input_index = info.find_input_operation_index(op);
-                if (sfg_stack.size() == 1) {
-                    this->add_instruction(instruction_type::push_input, *result_index, 1).index = input_index;
-                } else {
-                    this->add_source(info.sfg, input_index, prefix.substr(0, info.prefix_length), pop_sfg(sfg_stack));
-                    this->add_instruction(instruction_type::forward_value, *result_index, 0);
-                }
-            } else if (type_name == "t") {
-                auto const delay_index = this->add_delay_info(op.attr("initial_value").cast<number>(), *result_index);
-                this->add_source(op, 0, prefix, sfg_stack);
-                this->add_instruction(instruction_type::delay, *result_index, 0).index = delay_index;
-            } else if (type_name == "sfg") {
-                this->add_source(op.attr("output_operations")[py::int_{output_index}], 0, key_base(op, prefix), push_sfg(sfg_stack, op, prefix.size()));
-                this->add_instruction(instruction_type::forward_value, *result_index, 0);
-            } else {
-                auto const custom_operation_index = this->try_add_custom_operation(op);
-                auto const& custom_operation = m_code.custom_operations[custom_operation_index];
-                for (auto const i : range(custom_operation.input_count)) {
-                    this->add_source(op, i, prefix, sfg_stack);
-                }
-                auto const custom_source_index = m_code.custom_sources.size();
-                auto& custom_source = m_code.custom_sources.emplace_back();
-                custom_source.custom_operation_index = custom_operation_index;
-                custom_source.output_index = output_index;
-                auto const stack_diff = std::ptrdiff_t{1} - static_cast<std::ptrdiff_t>(custom_operation.input_count);
-                this->add_instruction(instruction_type::custom, *result_index, stack_diff).index = custom_source_index;
-            }
-            this->end_operation_output(op, output_index);
-        }
-    }
-
-    simulation_code m_code;
-    added_output_cache m_incomplete_outputs;
-    added_result_cache m_added_results;
-    added_custom_operation_cache m_added_custom_operations;
-    std::ptrdiff_t m_stack_depth = 0;
+	struct sfg_info final {
+		py::handle sfg;
+		std::size_t prefix_length;
+
+		sfg_info(py::handle sfg, std::size_t prefix_length)
+			: sfg(sfg)
+			, prefix_length(prefix_length) {}
+
+		[[nodiscard]] std::size_t find_input_operation_index(py::handle op) const {
+			for (auto const& [i, in] : enumerate(sfg.attr("input_operations"))) {
+				if (in.is(op)) {
+					return i;
+				}
+			}
+			throw py::value_error{"Stray Input operation in simulation SFG"};
+		}
+	};
+
+	using sfg_info_stack = std::vector<sfg_info>;
+	using added_output_cache = std::unordered_set<PyObject const*>;
+	using added_result_cache = std::unordered_map<PyObject const*, result_index_t>;
+	using added_custom_operation_cache = std::unordered_map<PyObject const*, std::size_t>;
+
+	static constexpr auto no_result_index = std::numeric_limits<result_index_t>::max();
+
+	void initialize_code(std::size_t input_count, std::size_t output_count) {
+		m_code.required_stack_size = 0;
+		m_code.input_count = input_count;
+		m_code.output_count = output_count;
+	}
+
+	void resolve_invalid_result_indices() {
+		for (auto& instruction : m_code.instructions) {
+			if (instruction.result_index == no_result_index) {
+				instruction.result_index = m_code.result_keys.size();
+			}
+		}
+	}
+
+	[[nodiscard]] static sfg_info_stack push_sfg(sfg_info_stack const& sfg_stack, py::handle sfg, std::size_t prefix_length) {
+		auto const new_size = static_cast<std::size_t>(sfg_stack.size() + 1);
+		auto new_sfg_stack = sfg_info_stack{};
+		new_sfg_stack.reserve(new_size);
+		for (auto const& info : sfg_stack) {
+			new_sfg_stack.push_back(info);
+		}
+		new_sfg_stack.emplace_back(sfg, prefix_length);
+		return new_sfg_stack;
+	}
+
+	[[nodiscard]] static sfg_info_stack pop_sfg(sfg_info_stack const& sfg_stack) {
+		ASIC_ASSERT(!sfg_stack.empty());
+		auto const new_size = static_cast<std::size_t>(sfg_stack.size() - 1);
+		auto new_sfg_stack = sfg_info_stack{};
+		new_sfg_stack.reserve(new_size);
+		for (auto const& info : span{sfg_stack}.first(new_size)) {
+			new_sfg_stack.push_back(info);
+		}
+		return new_sfg_stack;
+	}
+
+	instruction& add_instruction(instruction_type type, result_index_t result_index, std::ptrdiff_t stack_diff) {
+		m_stack_depth += stack_diff;
+		if (m_stack_depth < 0) {
+			throw py::value_error{"Detected input/output count mismatch in simulation SFG"};
+		}
+		if (auto const stack_size = static_cast<std::size_t>(m_stack_depth); stack_size > m_code.required_stack_size) {
+			m_code.required_stack_size = stack_size;
+		}
+		auto& instruction = m_code.instructions.emplace_back();
+		instruction.type = type;
+		instruction.result_index = result_index;
+		return instruction;
+	}
+
+	[[nodiscard]] std::optional<result_index_t> begin_operation_output(py::handle op, std::size_t output_index, std::string_view prefix) {
+		auto const pointer = op.attr("outputs")[py::int_{output_index}].ptr();
+		if (m_incomplete_outputs.count(pointer) != 0) {
+			// Make sure the output doesn't depend on its own value, unless it's a delay operation.
+			if (op.attr("type_name").cast<std::string_view>() != "t") {
+				throw py::value_error{"Direct feedback loop detected in simulation SFG"};
+			}
+		}
+		// Try to add a new result.
+		auto const [it, inserted] = m_added_results.try_emplace(pointer, static_cast<result_index_t>(m_code.result_keys.size()));
+		if (inserted) {
+			if (m_code.result_keys.size() >= static_cast<std::size_t>(std::numeric_limits<result_index_t>::max())) {
+				throw py::value_error{fmt::format("Simulation SFG requires too many outputs to be stored (limit: {})",
+												  std::numeric_limits<result_index_t>::max())};
+			}
+			m_code.result_keys.push_back(key_of_output(op, output_index, prefix));
+			m_incomplete_outputs.insert(pointer);
+			return it->second;
+		}
+		// If the result has already been added, we re-use the old result and
+		// return std::nullopt to indicate that we don't need to add all the required instructions again.
+		this->add_instruction(instruction_type::push_result, it->second, 1).index = static_cast<std::size_t>(it->second);
+		return std::nullopt;
+	}
+
+	void end_operation_output(py::handle op, std::size_t output_index) {
+		auto const pointer = op.attr("outputs")[py::int_{output_index}].ptr();
+		[[maybe_unused]] auto const erased = m_incomplete_outputs.erase(pointer);
+		ASIC_ASSERT(erased == 1);
+	}
+
+	[[nodiscard]] std::size_t try_add_custom_operation(py::handle op) {
+		auto const [it, inserted] = m_added_custom_operations.try_emplace(op.ptr(), m_added_custom_operations.size());
+		if (inserted) {
+			auto& custom_operation = m_code.custom_operations.emplace_back();
+			custom_operation.evaluate_output = op.attr("evaluate_output");
+			custom_operation.input_count = op.attr("input_count").cast<std::size_t>();
+			custom_operation.output_count = op.attr("output_count").cast<std::size_t>();
+		}
+		return it->second;
+	}
+
+	[[nodiscard]] std::size_t add_delay_info(number initial_value, result_index_t result_index) {
+		auto const delay_index = m_code.delays.size();
+		auto& delay = m_code.delays.emplace_back();
+		delay.initial_value = initial_value;
+		delay.result_index = result_index;
+		return delay_index;
+	}
+
+	void add_source(py::handle op, std::size_t input_index, std::string_view prefix, sfg_info_stack const& sfg_stack) {
+		auto const signal = py::object{op.attr("inputs")[py::int_{input_index}].attr("signals")[py::int_{0}]};
+		auto const src = py::handle{signal.attr("source")};
+		auto const operation = py::handle{src.attr("operation")};
+		auto const index = src.attr("index").cast<std::size_t>();
+		this->add_operation_output(operation, index, prefix, sfg_stack);
+		if (!signal.attr("bits").is_none()) {
+			auto const bits = signal.attr("bits").cast<std::size_t>();
+			if (bits > 64) {
+				throw py::value_error{"Cannot truncate to more than 64 bits"};
+			}
+			this->add_instruction(instruction_type::truncate, no_result_index, 0).bit_mask = static_cast<std::int64_t>(std::int64_t{1}
+																													   << bits);
+		}
+	}
+
+	void add_unary_operation_output(py::handle op, result_index_t result_index, std::string_view prefix, sfg_info_stack const& sfg_stack,
+									instruction_type type) {
+		this->add_source(op, 0, prefix, sfg_stack);
+		this->add_instruction(type, result_index, 0);
+	}
+
+	void add_binary_operation_output(py::handle op, result_index_t result_index, std::string_view prefix, sfg_info_stack const& sfg_stack,
+									 instruction_type type) {
+		this->add_source(op, 0, prefix, sfg_stack);
+		this->add_source(op, 1, prefix, sfg_stack);
+		this->add_instruction(type, result_index, -1);
+	}
+
+	void add_operation_output(py::handle op, std::size_t output_index, std::string_view prefix, sfg_info_stack const& sfg_stack) {
+		auto const type_name = op.attr("type_name").cast<std::string_view>();
+		if (type_name == "out") {
+			this->add_source(op, 0, prefix, sfg_stack);
+		} else if (auto const result_index = this->begin_operation_output(op, output_index, prefix)) {
+			if (type_name == "c") {
+				this->add_instruction(instruction_type::push_constant, *result_index, 1).value = op.attr("value").cast<number>();
+			} else if (type_name == "add") {
+				this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::addition);
+			} else if (type_name == "sub") {
+				this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::subtraction);
+			} else if (type_name == "mul") {
+				this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::multiplication);
+			} else if (type_name == "div") {
+				this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::division);
+			} else if (type_name == "min") {
+				this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::min);
+			} else if (type_name == "max") {
+				this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::max);
+			} else if (type_name == "sqrt") {
+				this->add_unary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::square_root);
+			} else if (type_name == "conj") {
+				this->add_unary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::complex_conjugate);
+			} else if (type_name == "abs") {
+				this->add_unary_operation_output(op, *result_index, prefix, sfg_stack, instruction_type::absolute);
+			} else if (type_name == "cmul") {
+				this->add_source(op, 0, prefix, sfg_stack);
+				this->add_instruction(instruction_type::constant_multiplication, *result_index, 0).value = op.attr("value").cast<number>();
+			} else if (type_name == "bfly") {
+				if (output_index == 0) {
+					this->add_source(op, 0, prefix, sfg_stack);
+					this->add_source(op, 1, prefix, sfg_stack);
+					this->add_instruction(instruction_type::addition, *result_index, -1);
+				} else {
+					this->add_source(op, 0, prefix, sfg_stack);
+					this->add_source(op, 1, prefix, sfg_stack);
+					this->add_instruction(instruction_type::subtraction, *result_index, -1);
+				}
+			} else if (type_name == "in") {
+				if (sfg_stack.empty()) {
+					throw py::value_error{"Encountered Input operation outside SFG in simulation"};
+				}
+				auto const& info = sfg_stack.back();
+				auto const input_index = info.find_input_operation_index(op);
+				if (sfg_stack.size() == 1) {
+					this->add_instruction(instruction_type::push_input, *result_index, 1).index = input_index;
+				} else {
+					this->add_source(info.sfg, input_index, prefix.substr(0, info.prefix_length), pop_sfg(sfg_stack));
+					this->add_instruction(instruction_type::forward_value, *result_index, 0);
+				}
+			} else if (type_name == "t") {
+				auto const delay_index = this->add_delay_info(op.attr("initial_value").cast<number>(), *result_index);
+				this->add_source(op, 0, prefix, sfg_stack);
+				this->add_instruction(instruction_type::delay, *result_index, 0).index = delay_index;
+			} else if (type_name == "sfg") {
+				this->add_source(
+					op.attr("output_operations")[py::int_{output_index}], 0, key_base(op, prefix), push_sfg(sfg_stack, op, prefix.size()));
+				this->add_instruction(instruction_type::forward_value, *result_index, 0);
+			} else {
+				auto const custom_operation_index = this->try_add_custom_operation(op);
+				auto const& custom_operation = m_code.custom_operations[custom_operation_index];
+				for (auto const i : range(custom_operation.input_count)) {
+					this->add_source(op, i, prefix, sfg_stack);
+				}
+				auto const custom_source_index = m_code.custom_sources.size();
+				auto& custom_source = m_code.custom_sources.emplace_back();
+				custom_source.custom_operation_index = custom_operation_index;
+				custom_source.output_index = output_index;
+				auto const stack_diff = std::ptrdiff_t{1} - static_cast<std::ptrdiff_t>(custom_operation.input_count);
+				this->add_instruction(instruction_type::custom, *result_index, stack_diff).index = custom_source_index;
+			}
+			this->end_operation_output(op, output_index);
+		}
+	}
+
+	simulation_code m_code;
+	added_output_cache m_incomplete_outputs;
+	added_result_cache m_added_results;
+	added_custom_operation_cache m_added_custom_operations;
+	std::ptrdiff_t m_stack_depth = 0;
 };
 
 simulation_code compile_simulation(pybind11::handle sfg) {
-    return compiler{}.compile(sfg);
+	return compiler{}.compile(sfg);
 }
 
 } // namespace asic
\ No newline at end of file
diff --git a/src/simulation/compile.h b/src/simulation/compile.h
index 9fb17b5f8fddbbdd86f974a356cfbe399fc3a005..883f4c5832978ea1bfd33c767fc947c1efde718e 100644
--- a/src/simulation/compile.h
+++ b/src/simulation/compile.h
@@ -3,40 +3,55 @@
 
 #include "instruction.h"
 
-#include <vector>
 #include <cstddef>
 #include <pybind11/pybind11.h>
 #include <string>
+#include <vector>
 
 namespace asic {
 
 using result_key = std::string;
 
 struct simulation_code final {
-    struct custom_operation final {
-        pybind11::object evaluate_output; // Python function used to evaluate the custom operation.
-        std::size_t input_count; // Number of inputs that the custom operation takes.
-        std::size_t output_count; // Number of outputs that the custom operation gives.
-    };
-
-    struct custom_source final {
-        std::size_t custom_operation_index; // Index into custom_operations where the custom_operation corresponding to this custom_source is located.
-        std::size_t output_index; // Output index of the custom_operation that this source gets it value from.
-    };
-
-    struct delay_info final {
-        number initial_value; // Initial value to set at the start of the simulation.
-        result_index_t result_index; // The result index where the current value should be stored at the start of each iteration.
-    };
-
-    std::vector<instruction> instructions; // Instructions to execute for one full iteration of the simulation.
-    std::vector<custom_operation> custom_operations; // Custom operations used by the simulation.
-    std::vector<custom_source> custom_sources; // Signal sources that use custom operations.
-    std::vector<delay_info> delays; // Info about the delay operations used in the simulation.
-    std::vector<result_key> result_keys; // Keys for each result produced by the simulation. The index of the key matches the index of the result in the simulation state.
-    std::size_t input_count; // Number of values expected as input to the simulation.
-    std::size_t output_count; // Number of values given as output from the simulation. This will be the number of values left on the stack after a full iteration of the simulation has been run.
-    std::size_t required_stack_size; // Maximum number of values that need to be able to fit on the stack in order to run a full iteration of the simulation.
+	struct custom_operation final {
+		// Python function used to evaluate the custom operation.
+		pybind11::object evaluate_output;
+		// Number of inputs that the custom operation takes.
+		std::size_t input_count;
+		// Number of outputs that the custom operation gives.
+		std::size_t output_count;
+	};
+
+	struct custom_source final {
+		// Index into custom_operations where the custom_operation corresponding to this custom_source is located.
+		std::size_t custom_operation_index;
+		// Output index of the custom_operation that this source gets it value from.
+		std::size_t output_index;
+	};
+
+	struct delay_info final {
+		// Initial value to set at the start of the simulation.
+		number initial_value;
+		// The result index where the current value should be stored at the start of each iteration.
+		result_index_t result_index;
+	};
+
+	// Instructions to execute for one full iteration of the simulation.
+	std::vector<instruction> instructions;
+	// Custom operations used by the simulation.
+	std::vector<custom_operation> custom_operations;
+	// Signal sources that use custom operations.
+	std::vector<custom_source> custom_sources;
+	// Info about the delay operations used in the simulation.
+	std::vector<delay_info> delays;
+	// Keys for each result produced by the simulation. The index of the key matches the index of the result in the simulation state.
+	std::vector<result_key> result_keys;
+	// Number of values expected as input to the simulation.
+	std::size_t input_count;
+	// Number of values given as output from the simulation. This will be the number of values left on the stack after a full iteration of the simulation has been run.
+	std::size_t output_count;
+	// Maximum number of values that need to be able to fit on the stack in order to run a full iteration of the simulation.
+	std::size_t required_stack_size;
 };
 
 [[nodiscard]] simulation_code compile_simulation(pybind11::handle sfg);
diff --git a/src/simulation/format_code.h b/src/simulation/format_code.h
new file mode 100644
index 0000000000000000000000000000000000000000..f50a3b7771c4a1da1eca5ff480c3fab8176dc370
--- /dev/null
+++ b/src/simulation/format_code.h
@@ -0,0 +1,128 @@
+#ifndef ASIC_SIMULATION_FORMAT_CODE_H
+#define ASIC_SIMULATION_FORMAT_CODE_H
+
+#include "../algorithm.h"
+#include "../debug.h"
+#include "../number.h"
+#include "compile.h"
+#include "instruction.h"
+
+#include <fmt/format.h>
+#include <string>
+
+namespace asic {
+
+[[maybe_unused]] [[nodiscard]] inline std::string format_number(number const& value) {
+	if (value.imag() == 0) {
+		return fmt::to_string(value.real());
+	}
+	if (value.real() == 0) {
+		return fmt::format("{}j", value.imag());
+	}
+	if (value.imag() < 0) {
+		return fmt::format("{}-{}j", value.real(), -value.imag());
+	}
+	return fmt::format("{}+{}j", value.real(), value.imag());
+}
+
+[[maybe_unused]] [[nodiscard]] inline std::string format_compiled_simulation_code_result_keys(simulation_code const& code) {
+	auto result = std::string{};
+	for (auto const& [i, result_key] : enumerate(code.result_keys)) {
+		result += fmt::format("{:>2}: \"{}\"\n", i, result_key);
+	}
+	return result;
+}
+
+[[maybe_unused]] [[nodiscard]] inline std::string format_compiled_simulation_code_delays(simulation_code const& code) {
+	auto result = std::string{};
+	for (auto const& [i, delay] : enumerate(code.delays)) {
+		ASIC_ASSERT(delay.result_index < code.result_keys.size());
+		result += fmt::format("{:>2}: Initial value: {}, Result: {}: \"{}\"\n",
+							  i,
+							  format_number(delay.initial_value),
+							  delay.result_index,
+							  code.result_keys[delay.result_index]);
+	}
+	return result;
+}
+
+[[maybe_unused]] [[nodiscard]] inline std::string format_compiled_simulation_code_instruction(instruction const& instruction) {
+	switch (instruction.type) {
+		// clang-format off
+		case instruction_type::push_input:              return fmt::format("push_input inputs[{}]", instruction.index);
+		case instruction_type::push_result:             return fmt::format("push_result results[{}]", instruction.index);
+		case instruction_type::push_constant:           return fmt::format("push_constant {}", format_number(instruction.value));
+		case instruction_type::truncate:                return fmt::format("truncate {:#018x}", instruction.bit_mask);
+		case instruction_type::addition:                return "addition";
+		case instruction_type::subtraction:             return "subtraction";
+		case instruction_type::multiplication:          return "multiplication";
+		case instruction_type::division:                return "division";
+		case instruction_type::min:                     return "min";
+		case instruction_type::max:                     return "max";
+		case instruction_type::square_root:             return "square_root";
+		case instruction_type::complex_conjugate:       return "complex_conjugate";
+		case instruction_type::absolute:                return "absolute";
+		case instruction_type::constant_multiplication: return fmt::format("constant_multiplication {}", format_number(instruction.value));
+		case instruction_type::delay:                   return fmt::format("delay delays[{}]", instruction.index);
+		case instruction_type::custom:                  return fmt::format("custom custom_sources[{}]", instruction.index);
+		case instruction_type::forward_value:           return "forward_value";
+			// clang-format on
+	}
+	return std::string{};
+}
+
+[[maybe_unused]] [[nodiscard]] inline std::string format_compiled_simulation_code_instructions(simulation_code const& code) {
+	auto result = std::string{};
+	for (auto const& [i, instruction] : enumerate(code.instructions)) {
+		auto instruction_string = format_compiled_simulation_code_instruction(instruction);
+		if (instruction.result_index < code.result_keys.size()) {
+			instruction_string = fmt::format(
+				"{:<26} -> {}: \"{}\"", instruction_string, instruction.result_index, code.result_keys[instruction.result_index]);
+		}
+		result += fmt::format("{:>2}: {}\n", i, instruction_string);
+	}
+	return result;
+}
+
+[[maybe_unused]] [[nodiscard]] inline std::string format_compiled_simulation_code(simulation_code const& code) {
+	return fmt::format(
+		"==============================================\n"
+		"> Code stats\n"
+		"==============================================\n"
+		"Input count: {}\n"
+		"Output count: {}\n"
+		"Instruction count: {}\n"
+		"Required stack size: {}\n"
+		"Delay count: {}\n"
+		"Result count: {}\n"
+		"Custom operation count: {}\n"
+		"Custom source count: {}\n"
+		"==============================================\n"
+		"> Delays\n"
+		"==============================================\n"
+		"{}"
+		"==============================================\n"
+		"> Result keys\n"
+		"==============================================\n"
+		"{}"
+		"==============================================\n"
+		"> Instructions\n"
+		"==============================================\n"
+		"{}"
+		"==============================================",
+		code.input_count,
+		code.output_count,
+		code.instructions.size(),
+		code.required_stack_size,
+		code.delays.size(),
+		code.result_keys.size(),
+		code.custom_operations.size(),
+		code.custom_sources.size(),
+		format_compiled_simulation_code_delays(code),
+		format_compiled_simulation_code_result_keys(code),
+		format_compiled_simulation_code_instructions(code));
+}
+
+} // namespace asic
+
+#endif // ASIC_SIMULATION_FORMAT_CODE
\ No newline at end of file
diff --git a/src/simulation/instruction.h b/src/simulation/instruction.h
index 60bed068827838b3fdd6f96cde16bb8455d56369..5fd95f92fb42ad54e1c6cf1c8d07e662129cdd6c 100644
--- a/src/simulation/instruction.h
+++ b/src/simulation/instruction.h
@@ -2,6 +2,7 @@
 #define ASIC_SIMULATION_INSTRUCTION_H
 
 #include "../number.h"
+
 #include <cstddef>
 #include <cstdint>
 #include <optional>
@@ -9,41 +10,45 @@
 namespace asic {
 
 enum class instruction_type : std::uint8_t {
-    push_input,                 // push(inputs[index])
-    push_result,                // push(results[index])
-    push_constant,              // push(value)
-    truncate,                   // push(trunc(pop(), bit_mask))
-    addition,                   // push(pop() + pop())
-    subtraction,                // push(pop() - pop())
-    multiplication,             // push(pop() * pop())
-    division,                   // push(pop() / pop())
-    min,                        // push(min(pop(), pop()))
-    max,                        // push(max(pop(), pop()))
-    square_root,                // push(sqrt(pop()))
-    complex_conjugate,          // push(conj(pop()))
-    absolute,                   // push(abs(pop()))
-    constant_multiplication,    // push(pop() * value)
-    delay,                      // auto const value = pop(); push(delays[index]); delays[index] = value
-    custom,                     // Custom operation. Uses custom_source[index].
-    forward_value               // Just forward the current value on the stack (i.e. do nothing).
+	push_input,              // push(inputs[index])
+	push_result,             // push(results[index])
+	push_constant,           // push(value)
+	truncate,                // push(trunc(pop(), bit_mask))
+	addition,                // push(pop() + pop())
+	subtraction,             // push(pop() - pop())
+	multiplication,          // push(pop() * pop())
+	division,                // push(pop() / pop())
+	min,                     // push(min(pop(), pop()))
+	max,                     // push(max(pop(), pop()))
+	square_root,             // push(sqrt(pop()))
+	complex_conjugate,       // push(conj(pop()))
+	absolute,                // push(abs(pop()))
+	constant_multiplication, // push(pop() * value)
+	delay,                   // auto const value = pop(); push(delays[index]); delays[index] = value
+	custom,                  // Custom operation. Uses custom_source[index].
+	forward_value            // Forward the current value on the stack (push(pop()), i.e. do nothing).
 };
 
 using result_index_t = std::uint16_t;
 
 struct instruction final {
-    constexpr instruction() noexcept
-        : index(0)
-        , result_index(0)
-        , type(instruction_type::forward_value)
-    {}
-
-    union {
-        std::size_t index; // Index used by push_input, push_result, delay and custom.
-        std::int64_t bit_mask; // Bit mask used by truncate.
-        number value; // Constant value used by push_constant and constant_multiplication.
-    };
-    result_index_t result_index; // Index into where the result of the instruction will be stored. If the result should be ignored, this index will be one past the last valid result index.
-    instruction_type type; // Specifies what kind of operation the instruction should execute.
+	constexpr instruction() noexcept
+		: index(0)
+		, result_index(0)
+		, type(instruction_type::forward_value) {}
+
+	union {
+		// Index used by push_input, push_result, delay and custom.
+		std::size_t index;
+		// Bit mask used by truncate.
+		std::int64_t bit_mask;
+		// Constant value used by push_constant and constant_multiplication.
+		number value;
+	};
+	// Index into where the result of the instruction will be stored. If the result should be ignored, this index will be one past the last valid result index.
+	result_index_t result_index;
+	// Specifies what kind of operation the instruction should execute.
+	instruction_type type;
 };
 
 } // namespace asic
diff --git a/src/simulation/run.cpp b/src/simulation/run.cpp
index b9fa8edb7e6b7b0766cb87f019c396ede27459d4..7120f8e42b06de161d5dc81e38c8c08eb9457a5b 100644
--- a/src/simulation/run.cpp
+++ b/src/simulation/run.cpp
@@ -1,187 +1,175 @@
 #include "run.h"
-#include "../debug.h"
+
 #include "../algorithm.h"
-#include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-#include <cstddef>
-#include <iterator>
+#include "../debug.h"
+#include "format_code.h"
+
 #include <algorithm>
+#include <complex>
+#include <cstddef>
 #include <fmt/format.h>
+#include <iterator>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
 #include <stdexcept>
-#include <complex>
 
 namespace py = pybind11;
 
 namespace asic {
 
 [[nodiscard]] static number truncate_value(number value, std::int64_t bit_mask) {
-    if (value.imag() != 0) {
-        throw py::type_error{"Complex value cannot be truncated"};
-    }
-    return number{static_cast<number::value_type>(static_cast<std::int64_t>(value.real()) & bit_mask)};
+	if (value.imag() != 0) {
+		throw py::type_error{"Complex value cannot be truncated"};
+	}
+	return number{static_cast<number::value_type>(static_cast<std::int64_t>(value.real()) & bit_mask)};
 }
 
 [[nodiscard]] static std::int64_t setup_truncation_parameters(bool& truncate, std::optional<std::uint8_t>& bits_override) {
-    if (truncate && bits_override) {
-        truncate = false; // Ignore truncate instructions, they will be truncated using bits_override instead.
-        if (*bits_override > 64) {
-		    throw py::value_error{"Cannot truncate to more than 64 bits"};
-        }
-        return static_cast<std::int64_t>(std::int64_t{1} << *bits_override); // Return the bit mask override to use.
-    }
-    bits_override.reset(); // Don't use bits_override if truncate is false.
-    return std::int64_t{};
+	if (truncate && bits_override) {
+		truncate = false; // Ignore truncate instructions, they will be truncated using bits_override instead.
+		if (*bits_override > 64) {
+			throw py::value_error{"Cannot truncate to more than 64 bits"};
+		}
+		return static_cast<std::int64_t>(std::int64_t{1} << *bits_override); // Return the bit mask override to use.
+	}
+	bits_override.reset(); // Don't use bits_override if truncate is false.
+	return std::int64_t{};
 }
 
-simulation_state run_simulation(simulation_code const& code, span<number const> inputs, span<number> delays, std::optional<std::uint8_t> bits_override, bool truncate) {
-    ASIC_ASSERT(inputs.size() == code.input_count);
-    ASIC_ASSERT(delays.size() == code.delays.size());
-    ASIC_ASSERT(code.output_count <= code.required_stack_size);
+simulation_state run_simulation(simulation_code const& code, span<number const> inputs, span<number> delays,
+								std::optional<std::uint8_t> bits_override, bool truncate) {
+	ASIC_ASSERT(inputs.size() == code.input_count);
+	ASIC_ASSERT(delays.size() == code.delays.size());
+	ASIC_ASSERT(code.output_count <= code.required_stack_size);
 
-    auto state = simulation_state{};
+	auto state = simulation_state{};
 
-    // Setup results.
-    state.results.resize(code.result_keys.size() + 1); // Add one space to store ignored results.
-    // Initialize delay results to their current values.
-    for (auto const& [i, delay] : enumerate(code.delays)) {
-        state.results[delay.result_index] = delays[i];
-    }
+	// Setup results.
+	state.results.resize(code.result_keys.size() + 1); // Add one space to store ignored results.
+	// Initialize delay results to their current values.
+	for (auto const& [i, delay] : enumerate(code.delays)) {
+		state.results[delay.result_index] = delays[i];
+	}
 
-    // Setup stack.
-    state.stack.resize(code.required_stack_size);
-    auto stack_pointer = state.stack.data();
+	// Setup stack.
+	state.stack.resize(code.required_stack_size);
+	auto stack_pointer = state.stack.data();
 
-    // Utility functions to make the stack manipulation code below more readable.
-    // Should hopefully be inlined by the compiler.
-    auto const push = [&](number value) -> void {
-        ASIC_ASSERT(std::distance(state.stack.data(), stack_pointer) < static_cast<std::ptrdiff_t>(state.stack.size()));
-        *stack_pointer++ = value;
-    };
-    auto const pop = [&]() -> number {
-        ASIC_ASSERT(std::distance(state.stack.data(), stack_pointer) > std::ptrdiff_t{0});
-        return *--stack_pointer;
-    };
-    auto const peek = [&]() -> number {
-        ASIC_ASSERT(std::distance(state.stack.data(), stack_pointer) > std::ptrdiff_t{0});
-        ASIC_ASSERT(std::distance(state.stack.data(), stack_pointer) <= static_cast<std::ptrdiff_t>(state.stack.size()));
-        return *(stack_pointer - 1);
-    };
+	// Utility functions to make the stack manipulation code below more readable.
+	// Should hopefully be inlined by the compiler.
+	auto const push = [&](number value) -> void {
+		ASIC_ASSERT(std::distance(state.stack.data(), stack_pointer) < static_cast<std::ptrdiff_t>(state.stack.size()));
+		*stack_pointer++ = value;
+	};
+	auto const pop = [&]() -> number {
+		ASIC_ASSERT(std::distance(state.stack.data(), stack_pointer) > std::ptrdiff_t{0});
+		return *--stack_pointer;
+	};
+	auto const peek = [&]() -> number {
+		ASIC_ASSERT(std::distance(state.stack.data(), stack_pointer) > std::ptrdiff_t{0});
+		ASIC_ASSERT(std::distance(state.stack.data(), stack_pointer) <= static_cast<std::ptrdiff_t>(state.stack.size()));
+		return *(stack_pointer - 1);
+	};
 
-    // Check if results should be truncated.
-    auto const bit_mask_override = setup_truncation_parameters(truncate, bits_override);
+	// Check if results should be truncated.
+	auto const bit_mask_override = setup_truncation_parameters(truncate, bits_override);
 
-    // Hot instruction evaluation loop.
-    for (auto const& instruction : code.instructions) {
-        // Execute the instruction.
-        switch (instruction.type) {
-            case instruction_type::push_input:
-                ASIC_DEBUG_MSG("Evaluating push_input.");
-                push(inputs[instruction.index]);
-                break;
-            case instruction_type::push_result:
-                ASIC_DEBUG_MSG("Evaluating push_result.");
-                push(state.results[instruction.index]);
-                break;
-            case instruction_type::push_constant:
-                ASIC_DEBUG_MSG("Evaluating push_constant.");
-                push(instruction.value);
-                break;
-            case instruction_type::truncate:
-                ASIC_DEBUG_MSG("Evaluating truncate.");
-                if (truncate) {
-                    push(truncate_value(pop(), instruction.bit_mask));
-                }
-                break;
-            case instruction_type::addition:
-                ASIC_DEBUG_MSG("Evaluating addition.");
-                push(pop() + pop());
-                break;
-            case instruction_type::subtraction:
-                ASIC_DEBUG_MSG("Evaluating subtraction.");
-                push(pop() - pop());
-                break;
-            case instruction_type::multiplication:
-                ASIC_DEBUG_MSG("Evaluating multiplication.");
-                push(pop() * pop());
-                break;
-            case instruction_type::division:
-                ASIC_DEBUG_MSG("Evaluating division.");
-                push(pop() / pop());
-                break;
-            case instruction_type::min: {
-                ASIC_DEBUG_MSG("Evaluating min.");
-                auto const lhs = pop();
-                auto const rhs = pop();
-                if (lhs.imag() != 0 || rhs.imag() != 0) {
-                    throw std::runtime_error{"Min does not support complex numbers."};
-                }
-                push(std::min(lhs.real(), rhs.real()));
-                break;
-            }
-            case instruction_type::max: {
-                ASIC_DEBUG_MSG("Evaluating max.");
-                auto const lhs = pop();
-                auto const rhs = pop();
-                if (lhs.imag() != 0 || rhs.imag() != 0) {
-                    throw std::runtime_error{"Max does not support complex numbers."};
-                }
-                push(std::max(lhs.real(), rhs.real()));
-                break;
-            }
-            case instruction_type::square_root:
-                ASIC_DEBUG_MSG("Evaluating sqrt.");
-                push(std::sqrt(pop()));
-                break;
-            case instruction_type::complex_conjugate:
-                ASIC_DEBUG_MSG("Evaluating conj.");
-                push(std::conj(pop()));
-                break;
-            case instruction_type::absolute:
-                ASIC_DEBUG_MSG("Evaluating abs.");
-                push(number{std::abs(pop())});
-                break;
-            case instruction_type::constant_multiplication:
-                ASIC_DEBUG_MSG("Evaluating cmul.");
-                push(pop() * instruction.value);
-                break;
-            case instruction_type::delay: {
-                ASIC_DEBUG_MSG("Evaluating delay.");
-                auto const value = delays[instruction.index];
-                delays[instruction.index] = pop();
-                push(value);
-                break;
-            }
-            case instruction_type::custom: {
-                ASIC_DEBUG_MSG("Evaluating custom.");
-                using namespace pybind11::literals;
-                auto const& src = code.custom_sources[instruction.index];
-                auto const& op = code.custom_operations[src.custom_operation_index];
-                auto input_values = std::vector<number>{};
-                input_values.reserve(op.input_count);
-                for (auto i = std::size_t{0}; i < op.input_count; ++i) {
-                    input_values.push_back(pop());
-                }
-                push(op.evaluate_output(src.output_index, std::move(input_values), "truncate"_a = truncate).cast<number>());
-                break;
-            }
-            case instruction_type::forward_value:
-                ASIC_DEBUG_MSG("Evaluating forward_value.");
-                // Doing push(pop()) would be wasteful, so we just do nothing.
-                break;
-        }
-        // If we've been given a global override for how many bits to use, always truncate the result.
-        if (bits_override) {
-            push(truncate_value(pop(), bit_mask_override));
-        }
-        // Store the result.
-        state.results[instruction.result_index] = peek();
-    }
+	// Hot instruction evaluation loop.
+	for (auto const& instruction : code.instructions) {
+		ASIC_DEBUG_MSG("Evaluating {}.", format_compiled_simulation_code_instruction(instruction));
+		// Execute the instruction.
+		switch (instruction.type) {
+			case instruction_type::push_input:
+				push(inputs[instruction.index]);
+				break;
+			case instruction_type::push_result:
+				push(state.results[instruction.index]);
+				break;
+			case instruction_type::push_constant:
+				push(instruction.value);
+				break;
+			case instruction_type::truncate:
+				if (truncate) {
+					push(truncate_value(pop(), instruction.bit_mask));
+				}
+				break;
+			case instruction_type::addition:
+				push(pop() + pop());
+				break;
+			case instruction_type::subtraction:
+				push(pop() - pop());
+				break;
+			case instruction_type::multiplication:
+				push(pop() * pop());
+				break;
+			case instruction_type::division:
+				push(pop() / pop());
+				break;
+			case instruction_type::min: {
+				auto const lhs = pop();
+				auto const rhs = pop();
+				if (lhs.imag() != 0 || rhs.imag() != 0) {
+					throw std::runtime_error{"Min does not support complex numbers."};
+				}
+				push(std::min(lhs.real(), rhs.real()));
+				break;
+			}
+			case instruction_type::max: {
+				auto const lhs = pop();
+				auto const rhs = pop();
+				if (lhs.imag() != 0 || rhs.imag() != 0) {
+					throw std::runtime_error{"Max does not support complex numbers."};
+				}
+				push(std::max(lhs.real(), rhs.real()));
+				break;
+			}
+			case instruction_type::square_root:
+				push(std::sqrt(pop()));
+				break;
+			case instruction_type::complex_conjugate:
+				push(std::conj(pop()));
+				break;
+			case instruction_type::absolute:
+				push(number{std::abs(pop())});
+				break;
+			case instruction_type::constant_multiplication:
+				push(pop() * instruction.value);
+				break;
+			case instruction_type::delay: {
+				auto const value = delays[instruction.index];
+				delays[instruction.index] = pop();
+				push(value);
+				break;
+			}
+			case instruction_type::custom: {
+				using namespace pybind11::literals;
+				auto const& src = code.custom_sources[instruction.index];
+				auto const& op = code.custom_operations[src.custom_operation_index];
+				auto input_values = std::vector<number>{};
+				input_values.reserve(op.input_count);
+				for (auto i = std::size_t{0}; i < op.input_count; ++i) {
+					input_values.push_back(pop());
+				}
+				push(op.evaluate_output(src.output_index, std::move(input_values), "truncate"_a = truncate).cast<number>());
+				break;
+			}
+			case instruction_type::forward_value:
+				// Do nothing, since doing push(pop()) would be pointless.
+				break;
+		}
+		// If we've been given a global override for how many bits to use, always truncate the result.
+		if (bits_override) {
+			push(truncate_value(pop(), bit_mask_override));
+		}
+		// Store the result.
+		state.results[instruction.result_index] = peek();
+	}
 
-    // Remove the space that we used for ignored results.
-    state.results.pop_back();
-    // Erase the portion of the stack that does not contain the output values.
-    state.stack.erase(state.stack.begin() + static_cast<std::ptrdiff_t>(code.output_count), state.stack.end());
-    return state;
+	// Remove the space that we used for ignored results.
+	state.results.pop_back();
+	// Erase the portion of the stack that does not contain the output values.
+	state.stack.erase(state.stack.begin() + static_cast<std::ptrdiff_t>(code.output_count), state.stack.end());
+	return state;
 }
 
 } // namespace asic
\ No newline at end of file
diff --git a/src/simulation/run.h b/src/simulation/run.h
index 173fb5a88cea80e17036500e0ca30488d5e4ae9f..2174c571ef59f3e12236471e3e064f2619c38a60 100644
--- a/src/simulation/run.h
+++ b/src/simulation/run.h
@@ -1,20 +1,22 @@
 #ifndef ASIC_SIMULATION_RUN_H
 #define ASIC_SIMULATION_RUN_H
 
-#include "compile.h"
-#include "../span.h"
 #include "../number.h"
-#include <vector>
+#include "../span.h"
+#include "compile.h"
+
 #include <cstdint>
+#include <vector>
 
 namespace asic {
 
 struct simulation_state final {
-    std::vector<number> stack;
-    std::vector<number> results;
+	std::vector<number> stack;
+	std::vector<number> results;
 };
 
-simulation_state run_simulation(simulation_code const& code, span<number const> inputs, span<number> delays, std::optional<std::uint8_t> bits_override, bool truncate);
+simulation_state run_simulation(simulation_code const& code, span<number const> inputs, span<number> delays,
+								std::optional<std::uint8_t> bits_override, bool truncate);
 
 } // namespace asic
 
diff --git a/src/simulation/simulation.cpp b/src/simulation/simulation.cpp
index 808b40cd073491fbdd6338f1dcfbe02cff97153f..455b928733c9fd081e39c5cd14a00adc01ca2c42 100644
--- a/src/simulation/simulation.cpp
+++ b/src/simulation/simulation.cpp
@@ -1,14 +1,14 @@
 #include "simulation.h"
+
 #include "../algorithm.h"
 #include "../debug.h"
-
 #include "compile.h"
 #include "run.h"
 
-#include <pybind11/numpy.h>
 #include <fmt/format.h>
-#include <utility>
 #include <limits>
+#include <pybind11/numpy.h>
+#include <utility>
 
 namespace py = pybind11;
 
@@ -64,25 +64,27 @@ std::vector<number> simulation::step(bool save_results, std::optional<std::uint8
 	return this->run_for(1, save_results, bits_override, truncate);
 }
 
-std::vector<number> simulation::run_until(iteration_t iteration, bool save_results, std::optional<std::uint8_t> bits_override, bool truncate) {
+std::vector<number> simulation::run_until(iteration_t iteration, bool save_results, std::optional<std::uint8_t> bits_override,
+										  bool truncate) {
 	auto result = std::vector<number>{};
 	while (m_iteration < iteration) {
 		ASIC_DEBUG_MSG("Running simulation iteration.");
-        auto inputs = std::vector<number>(m_code.input_count);
+		auto inputs = std::vector<number>(m_code.input_count);
 		for (auto&& [input, function] : zip(inputs, m_input_functions)) {
 			input = function(m_iteration);
 		}
-        auto state = run_simulation(m_code, inputs, m_delays, bits_override, truncate);
-        result = std::move(state.stack);
+		auto state = run_simulation(m_code, inputs, m_delays, bits_override, truncate);
+		result = std::move(state.stack);
 		if (save_results) {
-            m_results.push_back(std::move(state.results));
+			m_results.push_back(std::move(state.results));
 		}
 		++m_iteration;
 	}
 	return result;
 }
 
-std::vector<number> simulation::run_for(iteration_t iterations, bool save_results, std::optional<std::uint8_t> bits_override, bool truncate) {
+std::vector<number> simulation::run_for(iteration_t iterations, bool save_results, std::optional<std::uint8_t> bits_override,
+										bool truncate) {
 	if (iterations > std::numeric_limits<iteration_t>::max() - m_iteration) {
 		throw py::value_error("Simulation iteration type overflow!");
 	}
@@ -101,17 +103,17 @@ iteration_t simulation::iteration() const noexcept {
 }
 
 pybind11::dict simulation::results() const noexcept {
-    auto results = py::dict{};
-    if (!m_results.empty()) {
-        for (auto const& [i, key] : enumerate(m_code.result_keys)) {
-            auto values = std::vector<number>{};
-            values.reserve(m_results.size());
-            for (auto const& result : m_results) {
-                values.push_back(result[i]);
-            }
-            results[py::str{key}] = py::array{static_cast<py::ssize_t>(values.size()), values.data()};
-        }
-    }
+	auto results = py::dict{};
+	if (!m_results.empty()) {
+		for (auto const& [i, key] : enumerate(m_code.result_keys)) {
+			auto values = std::vector<number>{};
+			values.reserve(m_results.size());
+			for (auto const& result : m_results) {
+				values.push_back(result[i]);
+			}
+			results[py::str{key}] = py::array{static_cast<py::ssize_t>(values.size()), values.data()};
+		}
+	}
 	return results;
 }
 
diff --git a/src/simulation/simulation.h b/src/simulation/simulation.h
index 6839f34ee552a4257e1a6c15901c3d632c85aeb1..c1a36cbc93492494af14a198c970a6a534477794 100644
--- a/src/simulation/simulation.h
+++ b/src/simulation/simulation.h
@@ -4,15 +4,15 @@
 #include "../number.h"
 #include "compile.h"
 
-#include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-#include <pybind11/functional.h>
+#include <cstddef>
 #include <cstdint>
 #include <functional>
+#include <optional>
+#include <pybind11/functional.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
 #include <variant>
 #include <vector>
-#include <optional>
-#include <cstddef>
 
 namespace asic {
 
@@ -28,8 +28,10 @@ public:
 	void set_inputs(std::vector<std::optional<input_provider_t>> input_providers);
 
 	[[nodiscard]] std::vector<number> step(bool save_results, std::optional<std::uint8_t> bits_override, bool truncate);
-	[[nodiscard]] std::vector<number> run_until(iteration_t iteration, bool save_results, std::optional<std::uint8_t> bits_override, bool truncate);
-	[[nodiscard]] std::vector<number> run_for(iteration_t iterations, bool save_results, std::optional<std::uint8_t> bits_override, bool truncate);
+	[[nodiscard]] std::vector<number> run_until(iteration_t iteration, bool save_results, std::optional<std::uint8_t> bits_override,
+												bool truncate);
+	[[nodiscard]] std::vector<number> run_for(iteration_t iterations, bool save_results, std::optional<std::uint8_t> bits_override,
+											  bool truncate);
 	[[nodiscard]] std::vector<number> run(bool save_results, std::optional<std::uint8_t> bits_override, bool truncate);
 
 	[[nodiscard]] iteration_t iteration() const noexcept;
@@ -40,7 +42,7 @@ public:
 
 private:
 	simulation_code m_code;
-    std::vector<number> m_delays;
+	std::vector<number> m_delays;
 	std::vector<input_function_t> m_input_functions;
 	std::optional<iteration_t> m_input_length;
 	iteration_t m_iteration = 0;