We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
This bug report is similar to others but in my opinion sufficiently different. This code:
pub fn e76() -> u32 { const N: usize = 100; let mut ways = [0_u32; N + 1]; ways[0] = 1; for j in 1 .. N { for i in j ..= N { ways[i] += ways[i - j]; } } ways[N] }
Using rustc 1.47.0-nightly cfc572cae 2020-07-30 contains a panic_bounds_check:
rustc 1.47.0-nightly cfc572cae 2020-07-30
example::e76: sub rsp, 408 vxorps xmm0, xmm0, xmm0 vmovups ymmword ptr [rsp + 376], ymm0 vmovups ymmword ptr [rsp + 360], ymm0 vmovups ymmword ptr [rsp + 328], ymm0 vmovups ymmword ptr [rsp + 296], ymm0 vmovups ymmword ptr [rsp + 264], ymm0 vmovups ymmword ptr [rsp + 232], ymm0 vmovups ymmword ptr [rsp + 200], ymm0 vmovups ymmword ptr [rsp + 168], ymm0 vmovups ymmword ptr [rsp + 136], ymm0 vmovups ymmword ptr [rsp + 104], ymm0 vmovups ymmword ptr [rsp + 72], ymm0 vmovups ymmword ptr [rsp + 40], ymm0 vmovups ymmword ptr [rsp + 8], ymm0 mov dword ptr [rsp + 4], 1 mov ecx, 1 mov r8d, 100 .LBB0_1: lea rdx, [rcx + 1] mov rax, rcx .LBB0_2: mov rdi, rax sub rdi, rcx cmp rdi, 100 ja .LBB0_11 lea rsi, [rax + 1] cmp rax, 100 cmovae rsi, r8 mov edi, dword ptr [rsp + 4*rdi + 4] add dword ptr [rsp + 4*rax + 4], edi cmp rsi, 100 ja .LBB0_4 cmp rax, 99 mov rax, rsi jbe .LBB0_2 .LBB0_4: cmp rdx, 100 je .LBB0_5 add rcx, 2 mov rsi, rdx .LBB0_8: mov rdi, rsi sub rdi, rdx cmp rdi, 100 ja .LBB0_11 lea rax, [rsi + 1] cmp rsi, 100 cmovae rax, r8 mov edi, dword ptr [rsp + 4*rdi + 4] add dword ptr [rsp + 4*rsi + 4], edi cmp rax, 100 ja .LBB0_1 cmp rsi, 99 mov rsi, rax jbe .LBB0_8 jmp .LBB0_1 .LBB0_5: mov eax, dword ptr [rsp + 404] add rsp, 408 vzeroupper ret .LBB0_11: lea rdx, [rip + .L__unnamed_1] mov esi, 101 vzeroupper call qword ptr [rip + core::panicking::panic_bounds_check@GOTPCREL] ud2
While the same code with ".. N + 1" instead of "..= N":
pub fn e76() -> u32 { const N: usize = 100; let mut ways = [0_u32; N + 1]; ways[0] = 1; for j in 1 .. N { for i in j .. N + 1 { ways[i] += ways[i - j]; } } ways[N] }
Contains no panic_bounds_check (and gets vectorized):
example::e76: push rbp push r15 push r14 push r13 push r12 push rbx sub rsp, 404 vpxor xmm0, xmm0, xmm0 vmovdqu ymmword ptr [rsp + 372], ymm0 vmovdqu ymmword ptr [rsp + 356], ymm0 vmovdqu ymmword ptr [rsp + 324], ymm0 vmovdqu ymmword ptr [rsp + 292], ymm0 vmovdqu ymmword ptr [rsp + 260], ymm0 vmovdqu ymmword ptr [rsp + 228], ymm0 vmovdqu ymmword ptr [rsp + 196], ymm0 vmovdqu ymmword ptr [rsp + 164], ymm0 vmovdqu ymmword ptr [rsp + 132], ymm0 vmovdqu ymmword ptr [rsp + 100], ymm0 vmovdqu ymmword ptr [rsp + 68], ymm0 lea rax, [rsp + 4] vmovdqu ymmword ptr [rsp + 36], ymm0 vmovdqu ymmword ptr [rsp + 4], ymm0 mov dword ptr [rsp], 1 lea rcx, [rsp + 228] mov r9d, 1 mov r8d, 4 mov r10d, 92 mov r11, -4 xor r15d, r15d jmp .LBB0_2 .LBB0_1: inc r15 add rcx, 4 add r8, 4 dec r10 add r11, -4 add rax, -4 cmp r9, 100 je .LBB0_18 .LBB0_2: mov rsi, r9 mov r13d, 100 sub r13, r15 inc r9 cmp r13, 8 jb .LBB0_13 lea rdx, [rsp + 4*r13] lea rdi, [rsp + 4*r15] add rdi, 4 cmp rdi, rdx jb .LBB0_13 mov edi, 92 sub rdi, r15 mov rdx, rdi shr rdx, 3 inc rdx mov r14d, edx and r14d, 7 cmp rdi, 56 jae .LBB0_6 xor ebx, ebx jmp .LBB0_8 .LBB0_6: sub rdx, r14 xor ebx, ebx .LBB0_7: vmovdqu ymm0, ymmword ptr [rcx + 4*rbx - 224] vmovdqu ymm1, ymmword ptr [rcx + 4*rbx - 192] vmovdqu ymm2, ymmword ptr [rcx + 4*rbx - 160] vmovdqu ymm3, ymmword ptr [rcx + 4*rbx - 128] vpaddd ymm0, ymm0, ymmword ptr [rsp + 4*rbx] vmovdqu ymmword ptr [rcx + 4*rbx - 224], ymm0 vpaddd ymm0, ymm1, ymmword ptr [rsp + 4*rbx + 32] vmovdqu ymmword ptr [rcx + 4*rbx - 192], ymm0 vpaddd ymm0, ymm2, ymmword ptr [rsp + 4*rbx + 64] vmovdqu ymmword ptr [rcx + 4*rbx - 160], ymm0 vpaddd ymm0, ymm3, ymmword ptr [rsp + 4*rbx + 96] vmovdqu ymmword ptr [rcx + 4*rbx - 128], ymm0 vmovdqu ymm0, ymmword ptr [rcx + 4*rbx - 96] vpaddd ymm0, ymm0, ymmword ptr [rsp + 4*rbx + 128] vmovdqu ymmword ptr [rcx + 4*rbx - 96], ymm0 vmovdqu ymm0, ymmword ptr [rcx + 4*rbx - 64] vpaddd ymm0, ymm0, ymmword ptr [rsp + 4*rbx + 160] vmovdqu ymmword ptr [rcx + 4*rbx - 64], ymm0 vmovdqu ymm0, ymmword ptr [rcx + 4*rbx - 32] vpaddd ymm0, ymm0, ymmword ptr [rsp + 4*rbx + 192] vmovdqu ymmword ptr [rcx + 4*rbx - 32], ymm0 vmovdqu ymm0, ymmword ptr [rcx + 4*rbx] vpaddd ymm0, ymm0, ymmword ptr [rsp + 4*rbx + 224] vmovdqu ymmword ptr [rcx + 4*rbx], ymm0 add rbx, 64 add rdx, -8 jne .LBB0_7 .LBB0_8: mov r12, r13 and r12, -8 test r14, r14 je .LBB0_11 mov edx, r10d shr dl, 3 inc dl movzx r14d, dl and r14d, 7 shl r14, 5 lea rbx, [rsp + 4*rbx] lea rdi, [rbx + r8] xor edx, edx .LBB0_10: vmovdqu ymm0, ymmword ptr [rdi + rdx] vpaddd ymm0, ymm0, ymmword ptr [rbx + rdx] vmovdqu ymmword ptr [rdi + rdx], ymm0 add rdx, 32 cmp r14, rdx jne .LBB0_10 .LBB0_11: cmp r13, r12 je .LBB0_1 add rsi, r12 .LBB0_13: mov edi, 1 sub edi, esi mov edx, 100 sub rdx, rsi and rdi, 3 je .LBB0_16 lea rbx, [rsp + r11] .LBB0_15: mov ebp, dword ptr [rbx + 4*rsi] add dword ptr [rsp + 4*rsi], ebp inc rsi dec rdi jne .LBB0_15 .LBB0_16: cmp rdx, 3 jb .LBB0_1 .LBB0_17: mov edx, dword ptr [rax + 4*rsi - 8] add dword ptr [rsp + 4*rsi], edx mov edx, dword ptr [rax + 4*rsi - 4] add dword ptr [rsp + 4*rsi + 4], edx mov edx, dword ptr [rax + 4*rsi] add dword ptr [rsp + 4*rsi + 8], edx mov edx, dword ptr [rax + 4*rsi + 4] add dword ptr [rsp + 4*rsi + 12], edx add rsi, 4 cmp rsi, 101 jne .LBB0_17 jmp .LBB0_1 .LBB0_18: mov eax, dword ptr [rsp + 400] add rsp, 404 pop rbx pop r12 pop r13 pop r14 pop r15 pop rbp vzeroupper ret
The text was updated successfully, but these errors were encountered:
No branches or pull requests
This bug report is similar to others but in my opinion sufficiently different. This code:
Using
rustc 1.47.0-nightly cfc572cae 2020-07-30
contains a panic_bounds_check:While the same code with ".. N + 1" instead of "..= N":
Contains no panic_bounds_check (and gets vectorized):
The text was updated successfully, but these errors were encountered: