Sorted summary for file /box/solution-bin ---------------------------------------------- 23.41 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:44 8.21 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:15 7.97 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:17 6.98 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:24 6.37 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:14 5.53 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:16 5.00 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:48 4.90 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:49 4.55 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:43 3.60 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:50 3.58 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:0 3.51 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:13 2.69 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:12 2.60 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:57 1.92 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:26 0.94 /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:60 Samples | Source code & Disassembly of /box/solution-bin for cpu_core/cycles/P (4383 samples, percent: local period) -------------------------------------------------------------------------------------------------------------------------- : : : : 3 Disassembly of section .text: : : 5 0000000000001170
: : 6 } while (*p != '\n'); : 7 ++p; : 8 return v; : 9 } : : 11 int main() { 0 : 1170: push rbp 0 : 1171: mov rbp,rsp 0 : 1174: push r15 0 : 1176: push r14 0 : 1178: push r12 0 : 117a: push rbx 0 : 117b: sub rsp,0x90 0 : 1182: lea rsi,[rbp-0xb0] : 20 struct stat st; : 21 if (fstat(0, &st) != 0 || st.st_size <= 0) return 0; 0 : 1189: xor edi,edi 0 : 118b: call 1050 0 : 1190: test eax,eax 0 : 1192: jne 155d 0 : 1198: mov rbx,QWORD PTR [rbp-0x80] 0 : 119c: test rbx,rbx 0 : 119f: jle 155d : 29 const size_t len = (size_t)st.st_size; : 30 void* map = mmap(nullptr, len, PROT_READ, MAP_PRIVATE, 0, 0); 0 : 11a5: xor edi,edi 0 : 11a7: mov rsi,rbx 0 : 11aa: mov edx,0x1 0 : 11af: mov ecx,0x2 0 : 11b4: xor r8d,r8d 0 : 11b7: xor r9d,r9d 0 : 11ba: call 1060 : 38 if (map == MAP_FAILED) return 0; 0 : 11bf: cmp rax,0xffffffffffffffff 0 : 11c3: je 155d 0 : 11c9: movabs rsi,0xfffffffee1e5d000 0 : 11d3: movabs rdi,0xcfcfcfcfcfcfcfd0 0 : 11dd: movabs r8,0xff00ff00ff00ff 0 : 11e7: movabs r9,0xffff0000ffff : : 46 const char* p = (const char*)map; : 47 const char* end = p + len; 0 : 11f1: mov rcx,rax 0 : 11f4: add rcx,rbx 0 : 11f7: xor r14d,r14d 0 : 11fa: mov rdx,rax : 52 return v; : 53 } : 54 return parse_slow(q); : 55 }; : : 57 while (p + 22 <= end) { 0 : 11fd: cmp rbx,0x16 0 : 1201: jae 1320 : 60 _mm_prefetch(p + 2048, _MM_HINT_NTA); : 61 sum += parse_one(p); : 62 sum += parse_one(p); : 63 } : 64 while (p + 11 <= end) { 0 : 1207: lea r10,[rdx+0xb] 0 : 120b: cmp r10,rcx 0 : 120e: ja 1513 0 : 1214: data16 data16 cs nop WORD PTR [rax+rax*1+0x0] : 69 _mm_prefetch(p + 1536, _MM_HINT_NTA); 0 : 1220: prefetchnta BYTE PTR [rdx+0x600] : 71 if (__builtin_expect(q[9] == '\n', 1)) { 0 : 1227: movsx r11,BYTE PTR [rdx+0x9] 0 : 122c: cmp r11,0xa 0 : 1230: jne 1290 : 75 uint64_t v = (uint64_t)(q[0] - '0') * 100000000ULL + (uint64_t)parse8(q + 1); 0 : 1232: movsx r10,BYTE PTR [rdx] 0 : 1236: imul r10,r10,0x5f5e100 0 : 123d: add r10,rsi 0 : 1240: mov r11,QWORD PTR [rdx+0x1] : 80 v -= 0x3030303030303030ULL; 0 : 1244: add r11,rdi : 82 v = (v * 10ULL) + (v >> 8); 0 : 1247: lea r15,[r11+r11*4] 0 : 124b: shr r11,0x8 0 : 124f: lea r11,[r11+r15*2] : 86 v &= 0x00FF00FF00FF00FFULL; 0 : 1253: and r11,r8 : 88 v = (v * 100ULL) + (v >> 16); 0 : 1256: imul r15,r11,0x64 0 : 125a: shr r11,0x10 0 : 125e: add r11,r15 : 92 v &= 0x0000FFFF0000FFFFULL; 0 : 1261: and r11,r9 : 94 v = (v * 10000ULL) + (v >> 32); 0 : 1264: imul r15d,r11d,0x2710 0 : 126b: shr r11,0x20 0 : 126f: add r11d,r15d : 98 uint64_t v = (uint64_t)(q[0] - '0') * 100000000ULL + (uint64_t)parse8(q + 1); 0 : 1272: add r11,r10 : 100 q += 10; 0 : 1275: add rdx,0xa 0 : 1279: mov r10,rdx 0 : 127c: mov rdx,r10 : 104 sum += parse_one(p); 0 : 127f: add r14,r11 : 106 while (p + 11 <= end) { 0 : 1282: add r10,0xb 0 : 1286: cmp r10,rcx 0 : 1289: jbe 1220 0 : 128b: jmp 1513 : 111 if (__builtin_expect(q[10] == '\n', 1)) { 0 : 1290: cmp BYTE PTR [rdx+0xa],0xa 0 : 1294: jne 12e5 0 : 1296: mov r15,QWORD PTR [rdx] : 115 v -= 0x3030303030303030ULL; 0 : 1299: add r15,rdi : 117 v = (v * 10ULL) + (v >> 8); 0 : 129c: lea r12,[r15+r15*4] 0 : 12a0: shr r15,0x8 0 : 12a4: lea r15,[r15+r12*2] : 121 v &= 0x00FF00FF00FF00FFULL; 0 : 12a8: and r15,r8 : 123 v = (v * 100ULL) + (v >> 16); 0 : 12ab: imul r12,r15,0x64 0 : 12af: shr r15,0x10 0 : 12b3: add r15,r12 : 127 v &= 0x0000FFFF0000FFFFULL; 0 : 12b6: and r15,r9 : 129 v = (v * 10000ULL) + (v >> 32); 0 : 12b9: imul r12d,r15d,0x2710 0 : 12c0: shr r15,0x20 0 : 12c4: add r15d,r12d : 133 uint64_t v = (uint64_t)parse8(q) * 100ULL; 0 : 12c7: imul r15,r15,0x64 : 135 v += (uint64_t)(q[8] - '0') * 10ULL + (uint64_t)(q[9] - '0'); 0 : 12cb: movsx rdx,BYTE PTR [rdx+0x8] 0 : 12d0: lea rdx,[rdx+rdx*4] 0 : 12d4: lea rdx,[r11+rdx*2] 0 : 12d8: lea r11,[r15+rdx*1] 0 : 12dc: add r11,0xfffffffffffffdf0 0 : 12e3: jmp 127c : 142 v = v * 10 + (uint64_t)(*p - '0'); 0 : 12e5: movzx r15d,BYTE PTR [rdx] : 144 do { 0 : 12e9: inc rdx 0 : 12ec: mov r10,rdx 0 : 12ef: xor r11d,r11d : 148 v = v * 10 + (uint64_t)(*p - '0'); 0 : 12f2: lea rdx,[r11+r11*4] 0 : 12f6: movsx r11,r15b 0 : 12fa: lea r11,[r11+rdx*2] 0 : 12fe: add r11,0xffffffffffffffd0 : 153 } while (*p != '\n'); 0 : 1302: movzx r15d,BYTE PTR [r10] 0 : 1306: inc r10 0 : 1309: cmp r15b,0xa 0 : 130d: jne 12f2 0 : 130f: jmp 127c 0 : 1314: data16 data16 cs nop WORD PTR [rax+rax*1+0x0] : 160 _mm_prefetch(p + 2048, _MM_HINT_NTA); 16 : 1320: prefetchnta BYTE PTR [rdx+0x800] : 162 if (__builtin_expect(q[9] == '\n', 1)) { 24 : 1327: movsx r10,BYTE PTR [rdx+0x9] // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:43 12 : 132c: cmp r10,0xa 78 : 1330: jne 13eb : 166 uint64_t v = (uint64_t)(q[0] - '0') * 100000000ULL + (uint64_t)parse8(q + 1); 246 : 1336: movsx r10,BYTE PTR [rdx] // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:44 119 : 133a: imul r11,r10,0x5f5e100 107 : 1341: add r11,rsi 20 : 1344: mov r10,QWORD PTR [rdx+0x1] : 171 v -= 0x3030303030303030ULL; 15 : 1348: add r10,rdi : 173 v = (v * 10ULL) + (v >> 8); 4 : 134b: lea r15,[r10+r10*4] 38 : 134f: shr r10,0x8 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:13 4 : 1353: lea r10,[r10+r15*2] : 177 v &= 0x00FF00FF00FF00FFULL; 93 : 1357: and r10,r8 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:14 : 179 v = (v * 100ULL) + (v >> 16); 93 : 135a: imul r15,r10,0x64 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:15 7 : 135e: shr r10,0x10 24 : 1362: add r10,r15 : 183 v &= 0x0000FFFF0000FFFFULL; 64 : 1365: and r10,r9 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:16 : 185 v = (v * 10000ULL) + (v >> 32); 84 : 1368: imul r15d,r10d,0x2710 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:17 0 : 136f: shr r10,0x20 44 : 1373: add r10d,r15d : 189 uint64_t v = (uint64_t)(q[0] - '0') * 100000000ULL + (uint64_t)parse8(q + 1); 43 : 1376: add r10,r11 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:44 : 191 q += 10; 9 : 1379: add rdx,0xa : 193 if (__builtin_expect(q[9] == '\n', 1)) { 28 : 137d: movsx r11,BYTE PTR [rdx+0x9] // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:43 18 : 1382: cmp r11,0xa 69 : 1386: jne 144a : 197 uint64_t v = (uint64_t)(q[0] - '0') * 100000000ULL + (uint64_t)parse8(q + 1); 224 : 138c: movsx r11,BYTE PTR [rdx] // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:44 150 : 1390: imul r15,r11,0x5f5e100 99 : 1397: add r15,rsi 17 : 139a: mov r11,QWORD PTR [rdx+0x1] : 202 v -= 0x3030303030303030ULL; 3 : 139e: add r11,rdi : 204 v = (v * 10ULL) + (v >> 8); 15 : 13a1: lea r12,[r11+r11*4] 85 : 13a5: shr r11,0x8 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:13 12 : 13a9: lea r11,[r11+r12*2] : 208 v &= 0x00FF00FF00FF00FFULL; 26 : 13ad: and r11,r8 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:14 : 210 v = (v * 100ULL) + (v >> 16); 110 : 13b0: imul r12,r11,0x64 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:15 16 : 13b4: shr r11,0x10 26 : 13b8: add r11,r12 : 214 v &= 0x0000FFFF0000FFFFULL; 37 : 13bb: and r11,r9 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:16 : 216 v = (v * 10000ULL) + (v >> 32); 103 : 13be: imul r12d,r11d,0x2710 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:17 0 : 13c5: shr r11,0x20 39 : 13c9: add r11d,r12d : 220 uint64_t v = (uint64_t)(q[0] - '0') * 100000000ULL + (uint64_t)parse8(q + 1); 38 : 13cc: add r11,r15 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:44 : 222 q += 10; 19 : 13cf: add rdx,0xa 1 : 13d3: add r14,r10 : 225 sum += parse_one(p); 41 : 13d6: add r14,r11 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:60 : 227 while (p + 22 <= end) { 6 : 13d9: lea r10,[rdx+0x16] 0 : 13dd: cmp r10,rcx 114 : 13e0: jbe 1320 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:57 0 : 13e6: jmp 1207 : 232 if (__builtin_expect(q[10] == '\n', 1)) { 75 : 13eb: cmp BYTE PTR [rdx+0xa],0xa // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:48 30 : 13ef: jne 14a9 12 : 13f5: mov r11,QWORD PTR [rdx] : 236 v -= 0x3030303030303030ULL; 64 : 13f8: add r11,rdi // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:12 : 238 v = (v * 10ULL) + (v >> 8); 13 : 13fb: lea r15,[r11+r11*4] 2 : 13ff: shr r11,0x8 21 : 1403: lea r11,[r11+r15*2] : 242 v &= 0x00FF00FF00FF00FFULL; 74 : 1407: and r11,r8 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:14 : 244 v = (v * 100ULL) + (v >> 16); 50 : 140a: imul r15,r11,0x64 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:15 0 : 140e: shr r11,0x10 10 : 1412: add r11,r15 : 248 v &= 0x0000FFFF0000FFFFULL; 71 : 1415: and r11,r9 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:16 : 250 v = (v * 10000ULL) + (v >> 32); 46 : 1418: imul r15d,r11d,0x2710 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:17 1 : 141f: shr r11,0x20 9 : 1423: add r11d,r15d : 254 uint64_t v = (uint64_t)parse8(q) * 100ULL; 117 : 1426: imul r11,r11,0x64 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:49 : 256 v += (uint64_t)(q[8] - '0') * 10ULL + (uint64_t)(q[9] - '0'); 1 : 142a: movsx r15,BYTE PTR [rdx+0x8] 1 : 142f: lea r15,[r15+r15*4] 2 : 1433: lea r10,[r10+r15*2] 89 : 1437: add r10,r11 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:50 3 : 143a: add r10,0xfffffffffffffdf0 : 262 q += 11; 1 : 1441: add rdx,0xb 72 : 1445: jmp 137d // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:0 : 265 if (__builtin_expect(q[10] == '\n', 1)) { 82 : 144a: cmp BYTE PTR [rdx+0xa],0xa // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:48 32 : 144e: jne 14d5 11 : 1454: mov r15,QWORD PTR [rdx] : 269 v -= 0x3030303030303030ULL; 54 : 1457: add r15,rdi // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:12 : 271 v = (v * 10ULL) + (v >> 8); 6 : 145a: lea r12,[r15+r15*4] 3 : 145e: shr r15,0x8 31 : 1462: lea r15,[r15+r12*2] // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:13 : 275 v &= 0x00FF00FF00FF00FFULL; 86 : 1466: and r15,r8 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:14 : 277 v = (v * 100ULL) + (v >> 16); 57 : 1469: imul r12,r15,0x64 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:15 1 : 146d: shr r15,0x10 14 : 1471: add r15,r12 : 281 v &= 0x0000FFFF0000FFFFULL; 70 : 1474: and r15,r9 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:16 : 283 v = (v * 10000ULL) + (v >> 32); 33 : 1477: imul r12d,r15d,0x2710 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:17 1 : 147e: shr r15,0x20 19 : 1482: add r15d,r12d : 287 uint64_t v = (uint64_t)parse8(q) * 100ULL; 98 : 1485: imul r15,r15,0x64 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:49 : 289 v += (uint64_t)(q[8] - '0') * 10ULL + (uint64_t)(q[9] - '0'); 3 : 1489: movsx r12,BYTE PTR [rdx+0x8] 1 : 148e: lea r12,[r12+r12*4] 1 : 1492: lea r11,[r11+r12*2] 69 : 1496: add r11,r15 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:50 0 : 1499: add r11,0xfffffffffffffdf0 : 295 q += 11; 0 : 14a0: add rdx,0xb 85 : 14a4: jmp 13d3 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:0 : 298 v = v * 10 + (uint64_t)(*p - '0'); 31 : 14a9: movzx r11d,BYTE PTR [rdx] // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:24 : 300 do { 0 : 14ad: inc rdx 0 : 14b0: xor r10d,r10d : 303 v = v * 10 + (uint64_t)(*p - '0'); 2 : 14b3: lea r10,[r10+r10*4] 4 : 14b7: movsx r11,r11b 75 : 14bb: lea r10,[r11+r10*2] 44 : 14bf: add r10,0xffffffffffffffd0 : 308 } while (*p != '\n'); 1 : 14c3: movzx r11d,BYTE PTR [rdx] 0 : 14c7: inc rdx 0 : 14ca: cmp r11b,0xa 49 : 14ce: jne 14b3 // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:26 10 : 14d0: jmp 137d : 314 v = v * 10 + (uint64_t)(*p - '0'); 37 : 14d5: movzx r15d,BYTE PTR [rdx] // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:24 : 316 do { 0 : 14d9: inc rdx 0 : 14dc: xor r11d,r11d : 319 v = v * 10 + (uint64_t)(*p - '0'); 2 : 14df: lea r11,[r11+r11*4] 4 : 14e3: movsx r15,r15b 84 : 14e7: lea r11,[r15+r11*2] 35 : 14eb: add r11,0xffffffffffffffd0 : 324 } while (*p != '\n'); 3 : 14ef: movzx r15d,BYTE PTR [rdx] 0 : 14f3: inc rdx 1 : 14f6: cmp r15b,0xa 35 : 14fa: jne 14df // /tmp/cpu-mode-161014-1777968509765480994-44/solution/solution.cpp:26 5 : 14fc: jmp 13d3 0 : 1501: data16 data16 data16 data16 data16 cs nop WORD PTR [rax+rax*1+0x0] : 331 while (p < end) { : 332 char c = *p++; : 333 if (c == '\n') break; : 334 v = v * 10 + (uint64_t)(c - '0'); : 335 } : 336 sum += v; 0 : 1510: add r14,rsi : 338 while (p < end) { 0 : 1513: cmp rdx,rcx 0 : 1516: jae 1543 0 : 1518: xor edi,edi 0 : 151a: nop WORD PTR [rax+rax*1+0x0] : 343 uint64_t v = 0; 0 : 1520: mov rsi,rdi : 345 while (p < end) { 0 : 1523: cmp rdx,rcx 0 : 1526: jae 1510 : 348 char c = *p++; 0 : 1528: movsx r8,BYTE PTR [rdx] 0 : 152c: inc rdx : 351 if (c == '\n') break; 0 : 152f: lea rdi,[rsi+rsi*4] 0 : 1533: lea rdi,[r8+rdi*2] 0 : 1537: add rdi,0xffffffffffffffd0 : 355 char c = *p++; 0 : 153b: cmp r8,0xa 0 : 153f: jne 1520 0 : 1541: jmp 1510 : 359 } : : 361 munmap(map, len); 0 : 1543: mov rdi,rax 0 : 1546: mov rsi,rbx 0 : 1549: call 1030 : 365 operator<<(long __n) : 366 { return _M_insert(__n); } : : 368 __ostream_type& : 369 operator<<(unsigned long __n) : 370 { return _M_insert(__n); } 0 : 154e: mov rdi,QWORD PTR [rip+0x2a73] # 3fc8 0 : 1555: mov rsi,r14 0 : 1558: call 1040 (unsigned long)@plt> : 374 std::cout << sum; : 375 return 0; : 376 } 0 : 155d: xor eax,eax 0 : 155f: add rsp,0x90 0 : 1566: pop rbx 0 : 1567: pop r12 0 : 1569: pop r14 0 : 156b: pop r15 0 : 156d: pop rbp 0 : 156e: ret