Skip to content

Commit 878a679

Browse files
test: dont optimize to invalid bitcasts
1 parent 5a3e2a4 commit 878a679

File tree

2 files changed

+43
-0
lines changed

2 files changed

+43
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//@ build-pass
2+
//@ compile-flags: -Copt-level=3
3+
4+
// regression test for https://github.com./rust-lang/rust/issues/110722
5+
// in --release we were optimizing to invalid bitcasts, due to a combination of MIR inlining and
6+
// mostly bad repr(simd) lowering which prevented even basic splats from working
7+
#![crate_type = "rlib"]
8+
#![feature(portable_simd)]
9+
use std::simd::*;
10+
use std::simd::num::*;
11+
12+
pub unsafe fn mask_to_array(mask: u8) -> [i32; 8] {
13+
let mut output = [0; 8];
14+
let m = masksizex8::from_bitmask(mask as _);
15+
output.copy_from_slice(&m.to_int().cast::<i32>().to_array());
16+
output
17+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
//@ build-pass
2+
//@ compile-flags: -Copt-level=3
3+
//@ only-x86_64
4+
5+
// regression test for https://github.com./rust-lang/rust/issues/110707
6+
// in --release we were optimizing to invalid bitcasts, due to a combination of MIR inlining and
7+
// mostly bad repr(simd) lowering which prevented even basic splats from working
8+
9+
#![crate_type = "rlib"]
10+
#![feature(portable_simd)]
11+
use std::simd::*;
12+
use std::arch::x86_64::*;
13+
14+
#[target_feature(enable = "sse4.1")]
15+
pub unsafe fn fast_round_sse(i: f32x8) -> f32x8 {
16+
let a = i.to_array();
17+
let [low, high]: [[f32; 4]; 2] =
18+
unsafe { std::mem::transmute::<[f32; 8], [[f32; 4]; 2]>(a) };
19+
20+
let low = f32x4::from(_mm_round_ps::<{_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC}>(f32x4::from_array(low).into()));
21+
let high = f32x4::from(_mm_round_ps::<{_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC}>(f32x4::from_array(high).into()));
22+
23+
let a: [f32; 8] =
24+
unsafe { std::mem::transmute::<[[f32; 4]; 2], [f32; 8]>([low.to_array(), high.to_array()]) };
25+
f32x8::from_array(a)
26+
}

0 commit comments

Comments
 (0)