cpu.mode fastest code on the internet
solution

sol_3159247_1781243189810890613_3

C++ Scalar sparse merge dot 2 runs
01 source
Submitted source 723 bytes
Compiler clang++ Flags -O3 -march=native -std=c++20
show source
#include <cstddef>
#include <cstdint>

extern "C" void nk_sparse_dot_u32f32(
    const std::uint32_t *a,
    const std::uint32_t *b,
    const float *a_weights,
    const float *b_weights,
    std::size_t a_length,
    std::size_t b_length,
    double *product
) {
    std::size_t i = 0;
    std::size_t j = 0;
    double sum = 0.0;

    while (i < a_length && j < b_length) {
        const std::uint32_t ai = a[i];
        const std::uint32_t bj = b[j];
        if (ai == bj) {
            sum += static_cast<double>(a_weights[i]) * static_cast<double>(b_weights[j]);
            ++i;
            ++j;
        } else if (ai < bj) {
            ++i;
        } else {
            ++j;
        }
    }

    *product = sum;
}
02 jobs
Systems 02 jobs
03 counters
Performance counters 31 counters
cyclesi
183,852,174
Show more
branch_instructionsi
46,441,289
branch_missesi
5,746,297
cycle_activity.stalls_l1d_missi
1,126,711
cycle_activity.stalls_l2_missi
767,658
cycle_activity.stalls_l3_missi
629,534
cycle_activity.stalls_totali
83,654,264
dtlb_load_misses.walk_completedi
794
exe_activity.bound_on_loadsi
16,207,050
exe_activity.bound_on_storesi
21,557
instructionsi
116,102,038
machine_clearsi
1,095
mem_inst_retired.split_loadsi
879
mem_load_retired.l1_missi
89,759
mem_load_retired.l2_missi
5,815
mem_load_retired.l3_missi
4,157
tma_backend_boundi
99,715,542
tma_bad_speculationi
320,242,324
tma_branch_mispredict_slotsi
320,314,365
tma_frontend_boundi
606,833,559
tma_memory_boundi
13,152,294
tma_retiringi
84,976,111
tma_slotsi
1,106,047,146
uops_dispatched.port_0i
12,138,645
uops_dispatched.port_1i
9,414,186
uops_dispatched.port_2_3_10i
60,631,555
uops_dispatched.port_4_9i
848,655
uops_dispatched.port_5_11i
31,848,281
uops_dispatched.port_6i
80,935,650
uops_dispatched.port_7_8i
860,189
uops_retired.msi
0
04 top down
Top-down analysis Raptor Cove P-core
05 profile
load profile