`f16` generates code that uses the incorrect ABI for compiler-rt #123885

tgross35 · 2024-04-13T04:19:55Z

On the playground and godbolt, the following is non-deterministic:

#![feature(f16,f128)]
use std::mem::transmute;

#[inline(never)]
pub fn add(a: f16) -> f16 {
    10.0f16 + a
}

fn main() {
    let a = add(10.0);
    println!("{:#06x}", unsafe { transmute::<_, u16>(a) });
}

Running different times gives me different values like 0x9200, 0x06e0, 0xc000. The generated assembly is always the same:

example::add::hb3d3e297b1262388:
        push    rax
        mov     rax, qword ptr [rip + __extendhfsf2@GOTPCREL]
        call    rax
        movss   xmm1, dword ptr [rip + .LCPI1_0]
        addss   xmm0, xmm1
        mov     rax, qword ptr [rip + __truncsfhf2@GOTPCREL]
        call    rax
        pop     rax
        ret

Not really sure what would be going on here, seems unlikely to be a bug in extend/trunc.

This shows up on both the playground (nightly 2024-04-12 7942405) and godbolt (2024-04-11), but I cannot reproduce locally (nightly 2024-04-12, aarch64-darwin).

Links: https://play.rust-lang.org/?version=nightly&mode=debug&edition=2021&gist=b261d218028b8bddfe769658c195f529, https://rust.godbolt.org/z/fT1xcMMWz

@rustbot label +F-f16_and_f128

The text was updated successfully, but these errors were encountered:

tgross35 · 2024-04-13T04:20:08Z

@rustbot label -needs-triage

Noratrieb · 2024-04-13T06:14:06Z

sounds like something is not compiling correctly and it's just reading completely wrong data ...

Noratrieb · 2024-04-13T06:31:59Z

the full code

#![feature(f16,f128)]
use std::mem::transmute;

#[inline(never)]
pub fn add(a: f16) -> f16 {
    10.0f16 + a
}

#[inline(never)]
pub fn x() -> u16 {
    let a = add(10.0);
    unsafe { transmute::<_, u16>(a) }
}

results in

.LCPI0_0:
        .long   0x41200000
example::add:
        push    rax
        call    qword ptr [rip + __extendhfsf2@GOTPCREL]
        addss   xmm0, dword ptr [rip + .LCPI0_0]
        call    qword ptr [rip + __truncsfhf2@GOTPCREL]
        pop     rax
        ret

.LCPI1_0:
        .short  0x4900
example::x:
        push    rax
        pinsrw  xmm0, word ptr [rip + .LCPI1_0], 0
        call    qword ptr [rip + example::add@GOTPCREL]
        pextrw  eax, xmm0, 0
        pop     rcx
        ret

The 10.0 for add is stored as the full 0 padded 4 bytes and moved normally, while the 10.0 for x is only stored as 2 bytes and pinsrw is used, which, from what I understand, leaves the upper bits untouched. So it does seem like it just adds whatever garbage happens to be around in xmm0?

Noratrieb · 2024-04-13T06:35:16Z

The LLVM IR contains the exact same constants for both cases...

source_filename = "example.1b362aa86f0e2f27-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define noundef half @_ZN7example3add17hb3d3e297b1262388E(half noundef %a) unnamed_addr #0 {
  %_0 = fadd half %a, 0xH4900
  ret half %_0
}

define noundef i16 @_ZN7example1x17h714cc7590e57d450E() unnamed_addr #0 {
  %a = tail call noundef half @_ZN7example3add17hb3d3e297b1262388E(half noundef 0xH4900)
  %_0 = bitcast half %a to i16
  ret i16 %_0
}

attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 2, !"RtLibUseGOT", i32 1}
!2 = !{!"rustc version 1.79.0-nightly (a07f3eb43 2024-04-11)"}

tgross35 · 2024-04-13T06:56:54Z

It seems like the bitcast is causing the weird codegen, ~~the C version has similar IR but a much simpler x https://llvm.godbolt.org/z/8cafr6KT7:~~ (I was looking at the wrong thing) https://llvm.godbolt.org/z/s1boEjqn8:

define dso_local half @add(half noundef %a) {
entry:
  %a.addr = alloca half, align 2
  store half %a, ptr %a.addr, align 2
  %0 = load half, ptr %a.addr, align 2
  %ext = fpext half %0 to float
  %add = fadd float 1.000000e+01, %ext
  %unpromotion = fptrunc float %add to half
  ret half %unpromotion
}

declare void @llvm.dbg.declare(metadata, metadata, metadata) #1

define dso_local signext i16 @x() {
entry:
  %a = alloca half, align 2
  %b = alloca half, align 2
  store half 0xH4900, ptr %a, align 2
  %0 = load half, ptr %a, align 2
  %call = call half @add(half noundef %0)
  store half %call, ptr %b, align 2
  %1 = load i16, ptr %b, align 2
  ret i16 %1
}

.LCPI0_0:
        .long   0x41200000                      # float 10
add:                                    # @add
        push    rbp
        mov     rbp, rsp
        sub     rsp, 16
        pextrw  eax, xmm0, 0
        mov     word ptr [rbp - 2], ax
        pinsrw  xmm0, word ptr [rbp - 2], 0
        call    __extendhfsf2@PLT
        movss   xmm1, dword ptr [rip + .LCPI0_0] # xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0]
        addss   xmm0, xmm1
        call    __truncsfhf2@PLT
        add     rsp, 16
        pop     rbp
        ret
.LCPI1_0:
        .short  0x4900                          # half 10
x:                                      # @x
        push    rbp
        mov     rbp, rsp
        sub     rsp, 16
        pinsrw  xmm0, word ptr [rip + .LCPI1_0], 0
        pextrw  eax, xmm0, 0
        mov     word ptr [rbp - 2], ax
        pinsrw  xmm0, word ptr [rbp - 2], 0
        call    add
        pextrw  eax, xmm0, 0
        mov     word ptr [rbp - 4], ax
        movsx   eax, word ptr [rbp - 4]
        add     rsp, 16
        pop     rbp
        ret

.long 0x41200000 is the f32 value of 10.0, I guess it can do f16->f32 extension of the const operand at comptime

tgross35 · 2024-04-13T06:57:26Z

@rustbot label +A-LLVM

beetrees · 2024-04-13T23:52:25Z

It's an ABI issue with __extendhfsf2 and __truncsfhf2 from compiler-rt. LLVM is expecting f16s to be passed and returned in xmm0, but compiler-rt appears to have been compiled to pass them like u16s. In this example (playground link):

#![feature(f16)]

#[inline(never)]
pub fn f16_as_f32_cast(x: f16) -> f32 {
    x as f32
}

#[inline(never)]
pub fn f16_as_f32_intrinsic(x: f16) -> f32 {
    extern "C" {
        fn __extendhfsf2(x: u16) -> f32;
    }
    unsafe { __extendhfsf2(std::mem::transmute(x)) } 
}

fn main() {
	dbg!(f16_as_f32_cast(10.0));
	dbg!(f16_as_f32_intrinsic(10.0));
}

f16_as_f32_cast will return garbage whereas f16_as_f32_intrinsic will return the correct result.

tgross35 · 2024-04-14T05:41:50Z

Oh, yuck, if I am understanding this correctly it seems like compiler-rt emits a different ABI based on whether or not SSE2 is enabled llvm/llvm-project#56854. I guess that Rust must be building compiler-rt without SSE since the integer ABI works, but LLVM is expecting the +SSE rt that uses the float ABI.

Seems like there isn't an easy way to force the integer ABI either, running with -Ctarget-feature=-sse

error: example.rs:6:5: in function _ZN7example3add17hb3d3e297b1262388E half (half): SSE register return with SSE disabled

Any idea what Rust could do here?

Tangentially related #114479

beetrees · 2024-04-14T06:43:27Z

Defining the COMPILER_RT_HAS_FLOAT16 macro when compiling the relevant C intrinsics in compiler-builtins build.rs will fix the issue (if that macro is not defined, compiler-rt will use uint16_t instead of _Float16 to define the intrinsics). Note that _Float16 has limited platform support that depends on the C compiler (clang, gcc), so the long-term solution is to port the intrinsics to Rust in compiler-builtins.

beetrees · 2024-04-14T07:42:26Z

I've posted a PR that should fix this at rust-lang/compiler-builtins#593.

beetrees · 2024-04-14T09:05:43Z

Note that this bug affects stable Rust when C code that uses _Float16 is being compiled into a Rust binary. In this scenario the C code will also use the incorrectly-compiled builtin. Example to reproduce using the cc crate:

// src/main.rs
fn main() {
    extern "C" {
        fn do_cast(ignored: u32, x: u16) -> f32;
    }
    dbg!(unsafe { do_cast(123456, 0x4900 /* 10.0_f16.to_bits() */) });
}

// src/code.c
float do_cast(unsigned int ignored, unsigned short x) {
	union { _Float16 f; unsigned short bits; } u = { .bits = x };
	return (float) u.f;
}

// build.rs
fn main() {
	cc::Build::new().file("src/code.c").compile("code");
}

# Cargo.toml
[package]
name = "example"
version = "0.1.0"
edition = "2021"

[build-dependencies]
cc = "1.0.94"

This example should print 10.0, but doesn't on current stable on x86_64 due to the incorrectly-compiled builtin from Rust's build of compiler-rt (as opposed to using the correctly-compiled builtin that ships with gcc or clang).

usamoi · 2024-04-15T16:03:29Z

related issue: #118813

RalfJung · 2024-04-16T05:12:43Z

Seems like there isn't an easy way to force the integer ABI either, running with -Ctarget-feature=-sse

Disabling such basic ABI-relevant target features is anyway wildly unsafe and should probably be rejected by rustc. Lucky enough, on 64bit targets LLVM catches this rather than generating code with a non-standard ABI.

chorman0773 · 2024-04-17T17:19:36Z

It seems to be worse, though, because the non-standard ABI here is the one the library is expecting.
This is just a plain bug in compiler-rt.

tgross35 · 2024-04-18T23:30:37Z

@rustbot label -T-libs

beetrees · 2024-07-29T11:22:08Z

This has been fixed by #125016.

tgross35 · 2024-07-29T14:46:21Z

Amazing, thanks for fixing this!

rustbot added needs-triage This issue may need triage. Remove it if it has been sufficiently triaged. F-f16_and_f128 `#![feature(f16)]`, `#![feature(f128)]` T-libs Relevant to the library team, which will review and decide on the PR/issue. labels Apr 13, 2024

rustbot removed the needs-triage This issue may need triage. Remove it if it has been sufficiently triaged. label Apr 13, 2024

Noratrieb added T-compiler Relevant to the compiler team, which will review and decide on the PR/issue. C-bug Category: This is a bug. and removed T-libs Relevant to the library team, which will review and decide on the PR/issue. labels Apr 13, 2024

rustbot added the A-LLVM Area: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues. label Apr 13, 2024

Noratrieb added the I-unsound Issue: A soundness hole (worst kind of bug), see: https://en.wikipedia.org/wiki/Soundness label Apr 13, 2024

rustbot added the I-prioritize Issue: Indicates that prioritization has been requested for this issue. label Apr 13, 2024

Noratrieb added requires-nightly This issue requires a nightly compiler in some way. and removed I-prioritize Issue: Indicates that prioritization has been requested for this issue. labels Apr 13, 2024

Noratrieb added the A-ABI Area: Concerning the application binary interface (ABI) label Apr 14, 2024

beetrees mentioned this issue Apr 14, 2024

Add builtins for f16/f128 float conversions rust-lang/compiler-builtins#593

Merged

tgross35 mentioned this issue Apr 15, 2024

Add f16 and f128 #114607

Closed

tgross35 changed the title ~~f16 seems non-deterministic~~ f16 generates code that uses the incorrect ABI for compiler-rt Apr 18, 2024

rustbot added the T-libs Relevant to the library team, which will review and decide on the PR/issue. label Apr 18, 2024

rustbot removed the T-libs Relevant to the library team, which will review and decide on the PR/issue. label Apr 18, 2024

beetrees mentioned this issue Jun 19, 2024

Add more constants, functions, and tests for f16 and f128 #126608

Merged

tgross35 mentioned this issue Jun 19, 2024

Tracking Issue for f16 and f128 float types #116909

Open

84 tasks

beetrees mentioned this issue Jun 22, 2024

Replace f16 and f128 pattern matching stubs with real implementations #123088

Merged

tgross35 closed this as completed Jul 29, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

`f16` generates code that uses the incorrect ABI for compiler-rt #123885

`f16` generates code that uses the incorrect ABI for compiler-rt #123885

tgross35 commented Apr 13, 2024 •

edited

Loading

tgross35 commented Apr 13, 2024

Noratrieb commented Apr 13, 2024

Noratrieb commented Apr 13, 2024

Noratrieb commented Apr 13, 2024

tgross35 commented Apr 13, 2024 •

edited

Loading

tgross35 commented Apr 13, 2024

beetrees commented Apr 13, 2024

tgross35 commented Apr 14, 2024

beetrees commented Apr 14, 2024

beetrees commented Apr 14, 2024

beetrees commented Apr 14, 2024 •

edited

Loading

usamoi commented Apr 15, 2024

RalfJung commented Apr 16, 2024

chorman0773 commented Apr 17, 2024

tgross35 commented Apr 18, 2024

beetrees commented Jul 29, 2024

tgross35 commented Jul 29, 2024

f16 generates code that uses the incorrect ABI for compiler-rt #123885

f16 generates code that uses the incorrect ABI for compiler-rt #123885

Comments

tgross35 commented Apr 13, 2024 • edited Loading

tgross35 commented Apr 13, 2024

Noratrieb commented Apr 13, 2024

Noratrieb commented Apr 13, 2024

Noratrieb commented Apr 13, 2024

tgross35 commented Apr 13, 2024 • edited Loading

tgross35 commented Apr 13, 2024

beetrees commented Apr 13, 2024

tgross35 commented Apr 14, 2024

beetrees commented Apr 14, 2024

beetrees commented Apr 14, 2024

beetrees commented Apr 14, 2024 • edited Loading

usamoi commented Apr 15, 2024

RalfJung commented Apr 16, 2024

chorman0773 commented Apr 17, 2024

tgross35 commented Apr 18, 2024

beetrees commented Jul 29, 2024

tgross35 commented Jul 29, 2024

`f16` generates code that uses the incorrect ABI for compiler-rt #123885

`f16` generates code that uses the incorrect ABI for compiler-rt #123885

tgross35 commented Apr 13, 2024 •

edited

Loading

tgross35 commented Apr 13, 2024 •

edited

Loading

beetrees commented Apr 14, 2024 •

edited

Loading