Illegal instruction in std::remainder (libsystem_m.dylib) when floating point exceptions enabled on M1 mac

Call to std::remainder(double(411.0), int(365)); results in a crash due to a nan in libsystem_m.dylib. MCVE program is provided + lldb backtrace and system report.


$ clang++ -g -arch arm64 -std=c++20 main.cpp -o test
$ ./test
ori_fpcr=0, new_fpcr=1792
std::fmod(simTimeInDays, numDays) = 46
Illegal instruction: 4

main.cpp

#include <cassert>
#include <cfenv>
#include <cmath>
#include <iostream>

#if !defined(__arm64__) || !defined(__APPLE__)
#  error "Meant to be run on arm64 apple"
#endif

inline int feenableexcept(unsigned int excepts) {
  static fenv_t fenv;
  if (std::fegetenv(&fenv) != 0) {
    return -1;
  }
  const unsigned long long old_fpcr = fenv.__fpcr;
  const unsigned int old_excepts = (old_fpcr >> 8u) & unsigned(FE_ALL_EXCEPT);

  // Check the bits passed are valid, and bit shift them
  const unsigned int new_excepts = excepts & unsigned(FE_ALL_EXCEPT);
  const unsigned long long new_fpcr = new_excepts << 8u;

  // Set the new bits
  fenv.__fpcr = fenv.__fpcr | new_fpcr;

  return (std::fesetenv(&fenv) != 0) ? -1 : static_cast<int>(old_excepts);
}

int main([[maybe_unused]] int argc, [[maybe_unused]] const char** argv) {

  constexpr unsigned int flags = FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW;
  static_assert(flags == 7);
  constexpr uint32_t fpcr_flags_shifted = flags << 8;
  constexpr uint32_t fpcr_flags = (__fpcr_trap_divbyzero | __fpcr_trap_invalid | __fpcr_trap_overflow);
  static_assert(fpcr_flags_shifted == fpcr_flags);
  static_assert(fpcr_flags_shifted == 1792);

  uint32_t ori_fpcr = __builtin_arm_rsr("fpcr");
  feenableexcept(flags);
  uint32_t new_fpcr = __builtin_arm_rsr("fpcr");

  // std::cout << "(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW) = " << flags << '\n';
  // std::cout << "((FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW) << 8) = " << fpcr_flags_shifted << '\n';
  // std::cout << "(__fpcr_trap_divbyzero | __fpcr_trap_invalid | __fpcr_trap_overflow) = " << fpcr_flags << '\n';
  std::cout << "ori_fpcr=" << ori_fpcr << ", new_fpcr=" << new_fpcr << '\n';

  const double simTimeInDays = 411.0;
  const int numDays = 365;

  // This is fine
  std::cout << "std::fmod(simTimeInDays, numDays) = " << std::fmod(simTimeInDays, numDays) << '\n';

  // This isn't
  std::cout << "std::fmod(simTimeInDays, numDays) = " << std::remainder(simTimeInDays, numDays) << '\n';

  return 0;
}

backtrace: see attachment

(lldb) pro lau
Process 25418 launched: '/Users/julien/Software/test_floatingpointexceptions_cpp/mcve/test' (arm64)
ori_fpcr=0, new_fpcr=1792
std::fmod(simTimeInDays, numDays) = 46
Process 25418 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_INSTRUCTION (code=1, subcode=0x1e642863)
    frame #0: 0x000000018c1f29ec libsystem_m.dylib`remainder + 160
libsystem_m.dylib`remainder:
->  0x18c1f29ec <+160>: fadd   d3, d3, d4
    0x18c1f29f0 <+164>: fmov   x10, d3
    0x18c1f29f4 <+168>: lsr    x10, x10, #52
    0x18c1f29f8 <+172>: sub    w10, w10, #0x7fd
Target 0: (test) stopped.
(lldb) disassemble --frame
libsystem_m.dylib`remainder:
    0x18c1f294c <+0>:   fcmp   d0, d0
    0x18c1f2950 <+4>:   fccmp  d1, d1, #0x1, vc
    0x18c1f2954 <+8>:   b.vc   0x18c1f2960               ; <+20>
    0x18c1f2958 <+12>:  fadd   d0, d0, d1
    0x18c1f295c <+16>:  ret
    0x18c1f2960 <+20>:  fabs   d2, d0
    0x18c1f2964 <+24>:  mov    x8, #0x7ff0000000000000
    0x18c1f2968 <+28>:  fmov   d3, x8
    0x18c1f296c <+32>:  fcmp   d2, d3
    0x18c1f2970 <+36>:  b.eq   0x18c1f2a6c               ; <+288>
    0x18c1f2974 <+40>:  fcmp   d1, #0.0
    0x18c1f2978 <+44>:  b.eq   0x18c1f2a6c               ; <+288>
    0x18c1f297c <+48>:  fabs   d1, d1
    0x18c1f2980 <+52>:  mov    x8, #0x7ff0000000000000
    0x18c1f2984 <+56>:  fmov   d3, x8
    0x18c1f2988 <+60>:  fcmp   d1, d3
    0x18c1f298c <+64>:  b.eq   0x18c1f2be8               ; <+668>
    0x18c1f2990 <+68>:  fcmp   d0, #0.0
    0x18c1f2994 <+72>:  b.eq   0x18c1f2be8               ; <+668>
    0x18c1f2998 <+76>:  fmov   x8, d2
    0x18c1f299c <+80>:  and    x9, x8, #0x7fffffffffffffff
    0x18c1f29a0 <+84>:  ubfx   x8, x8, #52, #11
    0x18c1f29a4 <+88>:  sub    w10, w8, #0x1
    0x18c1f29a8 <+92>:  sub    w8, w8, #0x3ff
    0x18c1f29ac <+96>:  orr    x9, x9, #0x3ff0000000000000
    0x18c1f29b0 <+100>: fmov   d3, x9
    0x18c1f29b4 <+104>: fmov   d4, #-1.00000000
    0x18c1f29b8 <+108>: fadd   d3, d3, d4
    0x18c1f29bc <+112>: fmov   x9, d3
    0x18c1f29c0 <+116>: lsr    x9, x9, #52
    0x18c1f29c4 <+120>: sub    w9, w9, #0x7fd
    0x18c1f29c8 <+124>: cmp    w10, #0x7fe
    0x18c1f29cc <+128>: csel   w9, w8, w9, lo
    0x18c1f29d0 <+132>: fmov   x8, d1
    0x18c1f29d4 <+136>: and    x10, x8, #0x7fffffffffffffff
    0x18c1f29d8 <+140>: ubfx   x8, x8, #52, #11
    0x18c1f29dc <+144>: sub    w11, w8, #0x1
    0x18c1f29e0 <+148>: sub    w8, w8, #0x3ff
    0x18c1f29e4 <+152>: orr    x10, x10, #0x3ff0000000000000
    0x18c1f29e8 <+156>: fmov   d3, x10
->  0x18c1f29ec <+160>: fadd   d3, d3, d4
    0x18c1f29f0 <+164>: fmov   x10, d3
    0x18c1f29f4 <+168>: lsr    x10, x10, #52
    0x18c1f29f8 <+172>: sub    w10, w10, #0x7fd
    0x18c1f29fc <+176>: cmp    w11, #0x7fe
    0x18c1f2a00 <+180>: csel   w8, w8, w10, lo
    0x18c1f2a04 <+184>: subs   w10, w9, w8
    0x18c1f2a08 <+188>: b.mi   0x18c1f2a80               ; <+308>
    0x18c1f2a0c <+192>: cbz    w10, 0x18c1f2b88          ; <+572>
    0x18c1f2a10 <+196>: negs   w10, w8
    0x18c1f2a14 <+200>: cneg   w11, w8, pl
    0x18c1f2a18 <+204>: cmp    w11, #0x3ff
    0x18c1f2a1c <+208>: b.lo   0x18c1f2a88               ; <+316>
    0x18c1f2a20 <+212>: asr    w11, w10, #31
    0x18c1f2a24 <+216>: eor    w12, w11, #0x3fe
    0x18c1f2a28 <+220>: sub    w11, w12, w11
    0x18c1f2a2c <+224>: add    w12, w11, #0x3ff
    0x18c1f2a30 <+228>: lsl    x12, x12, #52
    0x18c1f2a34 <+232>: fmov   d4, x12
    0x18c1f2a38 <+236>: fmov   d3, d1
    0x18c1f2a3c <+240>: cmp    w8, #0x0
    0x18c1f2a40 <+244>: b.le   0x18c1f2a58               ; <+268>
    0x18c1f2a44 <+248>: fmul   d3, d3, d4
    0x18c1f2a48 <+252>: sub    w10, w10, w11
    0x18c1f2a4c <+256>: cmn    w10, #0x3fe
    0x18c1f2a50 <+260>: b.lt   0x18c1f2a44               ; <+248>
    0x18c1f2a54 <+264>: b      0x18c1f2a8c               ; <+320>
    0x18c1f2a58 <+268>: fmul   d3, d3, d4
    0x18c1f2a5c <+272>: sub    w10, w10, w11
    0x18c1f2a60 <+276>: cmp    w10, #0x3fe
    0x18c1f2a64 <+280>: b.gt   0x18c1f2a58               ; <+268>
    0x18c1f2a68 <+284>: b      0x18c1f2a8c               ; <+320>
    0x18c1f2a6c <+288>: adrp   x8, 14
    0x18c1f2a70 <+292>: ldr    d0, [x8, #0x8f0]
    0x18c1f2a74 <+296>: ldr    d1, [x8, #0x8f0]
    0x18c1f2a78 <+300>: fsub   d0, d0, d1
    0x18c1f2a7c <+304>: ret
    0x18c1f2a80 <+308>: mov    w8, #0x0
    0x18c1f2a84 <+312>: b      0x18c1f2b9c               ; <+592>
    0x18c1f2a88 <+316>: fmov   d3, d1
    0x18c1f2a8c <+320>: mov    x11, #0x3ff0000000000000
    0x18c1f2a90 <+324>: add    x10, x11, x10, lsl #52
    0x18c1f2a94 <+328>: fmov   d4, x10
    0x18c1f2a98 <+332>: negs   w12, w9
    0x18c1f2a9c <+336>: cneg   w10, w9, pl
    0x18c1f2aa0 <+340>: cmp    w10, #0x3ff
    0x18c1f2aa4 <+344>: b.lo   0x18c1f2aec               ; <+416>
    0x18c1f2aa8 <+348>: asr    w10, w12, #31
    0x18c1f2aac <+352>: eor    w13, w10, #0x3fe
    0x18c1f2ab0 <+356>: sub    w10, w13, w10
    0x18c1f2ab4 <+360>: add    w13, w10, #0x3ff
    0x18c1f2ab8 <+364>: lsl    x13, x13, #52
    0x18c1f2abc <+368>: fmov   d5, x13
    0x18c1f2ac0 <+372>: cmp    w9, #0x0
    0x18c1f2ac4 <+376>: b.le   0x18c1f2adc               ; <+400>
    0x18c1f2ac8 <+380>: fmul   d2, d2, d5
    0x18c1f2acc <+384>: sub    w12, w12, w10
    0x18c1f2ad0 <+388>: cmn    w12, #0x3fe
    0x18c1f2ad4 <+392>: b.lt   0x18c1f2ac8               ; <+380>
    0x18c1f2ad8 <+396>: b      0x18c1f2aec               ; <+416>
    0x18c1f2adc <+400>: fmul   d2, d2, d5
    0x18c1f2ae0 <+404>: sub    w12, w12, w10
    0x18c1f2ae4 <+408>: cmp    w12, #0x3fe
    0x18c1f2ae8 <+412>: b.gt   0x18c1f2adc               ; <+400>
    0x18c1f2aec <+416>: mov    w10, #0x0
    0x18c1f2af0 <+420>: fmul   d3, d3, d4
    0x18c1f2af4 <+424>: add    x11, x11, x12, lsl #52
    0x18c1f2af8 <+428>: fmov   d4, x11
    0x18c1f2afc <+432>: fmul   d2, d2, d4
    0x18c1f2b00 <+436>: fcmp   d2, d3
    0x18c1f2b04 <+440>: cset   w11, ge
    0x18c1f2b08 <+444>: fsub   d4, d2, d3
    0x18c1f2b0c <+448>: orr    w10, w10, w11
    0x18c1f2b10 <+452>: fcsel  d2, d2, d4, lt
    0x18c1f2b14 <+456>: lsl    w10, w10, #1
    0x18c1f2b18 <+460>: fadd   d2, d2, d2
    0x18c1f2b1c <+464>: sub    w9, w9, #0x1
    0x18c1f2b20 <+468>: cmp    w8, w9
    0x18c1f2b24 <+472>: b.ne   0x18c1f2b00               ; <+436>
    0x18c1f2b28 <+476>: cmp    w8, #0x0
    0x18c1f2b2c <+480>: cneg   w9, w8, mi
    0x18c1f2b30 <+484>: cmp    w9, #0x3ff
    0x18c1f2b34 <+488>: b.lo   0x18c1f2b78               ; <+556>
    0x18c1f2b38 <+492>: asr    w9, w8, #31
    0x18c1f2b3c <+496>: eor    w11, w9, #0x3fe
    0x18c1f2b40 <+500>: sub    w9, w11, w9
    0x18c1f2b44 <+504>: add    w11, w9, #0x3ff
    0x18c1f2b48 <+508>: lsl    x11, x11, #52
    0x18c1f2b4c <+512>: fmov   d3, x11
    0x18c1f2b50 <+516>: tbnz   w8, #0x1f, 0x18c1f2b68    ; <+540>
    0x18c1f2b54 <+520>: fmul   d2, d2, d3
    0x18c1f2b58 <+524>: sub    w8, w8, w9
    0x18c1f2b5c <+528>: cmp    w8, #0x3fe
    0x18c1f2b60 <+532>: b.gt   0x18c1f2b54               ; <+520>
    0x18c1f2b64 <+536>: b      0x18c1f2b78               ; <+556>
    0x18c1f2b68 <+540>: fmul   d2, d2, d3
    0x18c1f2b6c <+544>: sub    w8, w8, w9
    0x18c1f2b70 <+548>: cmn    w8, #0x3fe
    0x18c1f2b74 <+552>: b.lt   0x18c1f2b68               ; <+540>
    0x18c1f2b78 <+556>: mov    x9, #0x3ff0000000000000
    0x18c1f2b7c <+560>: add    x8, x9, x8, lsl #52
    0x18c1f2b80 <+564>: fmov   d3, x8
    0x18c1f2b84 <+568>: fmul   d2, d2, d3
    0x18c1f2b88 <+572>: fcmp   d2, d1
    0x18c1f2b8c <+576>: cset   w8, ge
    0x18c1f2b90 <+580>: fsub   d3, d2, d1
    0x18c1f2b94 <+584>: orr    w8, w10, w8
    0x18c1f2b98 <+588>: fcsel  d2, d2, d3, lt
    0x18c1f2b9c <+592>: mov    x9, #0x7fd0000000000000
    0x18c1f2ba0 <+596>: fmov   d3, x9
    0x18c1f2ba4 <+600>: fcmp   d2, d3
    0x18c1f2ba8 <+604>: b.pl   0x18c1f2bbc               ; <+624>
    0x18c1f2bac <+608>: fadd   d3, d2, d2
    0x18c1f2bb0 <+612>: fcmp   d3, d1
    0x18c1f2bb4 <+616>: b.le   0x18c1f2bcc               ; <+640>
    0x18c1f2bb8 <+620>: b      0x18c1f2bd8               ; <+652>
    0x18c1f2bbc <+624>: fmov   d3, #0.50000000
    0x18c1f2bc0 <+628>: fmul   d3, d1, d3
    0x18c1f2bc4 <+632>: fcmp   d2, d3
    0x18c1f2bc8 <+636>: b.gt   0x18c1f2bd8               ; <+652>
    0x18c1f2bcc <+640>: cset   w9, eq
    0x18c1f2bd0 <+644>: tst    w8, w9
    0x18c1f2bd4 <+648>: b.eq   0x18c1f2bdc               ; <+656>
    0x18c1f2bd8 <+652>: fsub   d2, d2, d1
    0x18c1f2bdc <+656>: fneg   d1, d2
    0x18c1f2be0 <+660>: fcmp   d0, #0.0
    0x18c1f2be4 <+664>: fcsel  d0, d1, d2, mi
    0x18c1f2be8 <+668>: ret
(lldb) register read d0 d1 d2 d3 x8 x9 w10 w8
      d0 = 411
      d1 = 365
      d2 = 411
      d3 = nan
      x8 = 0x0000000000000008
      x9 = 0x0000000000000008
     w10 = 0x00000000
      w8 = 0x00000008

$ system_profiler SPSoftwareDataType SPHardwareDataType
Software:

    System Software Overview:

      System Version: macOS 13.2 (22D49)
      Kernel Version: Darwin 22.3.0
      Boot Volume: Macintosh HD
      Boot Mode: Normal
      Secure Virtual Memory: Enabled
      System Integrity Protection: Enabled
      Time since boot: 7 hours, 58 minutes

Hardware:

    Hardware Overview:

      Model Name: MacBook Pro
      Model Identifier: MacBookPro18,2
      Model Number: Z14V000NBFN/A
      Chip: Apple M1 Max
      Total Number of Cores: 10 (8 performance and 2 efficiency)
      Memory: 64 GB
      System Firmware Version: 8419.80.7
      OS Loader Version: 8419.80.7
      Activation Lock Status: Enabled
$ otool -L test
test:
	/usr/lib/libc++.1.dylib (compatibility version 1.0.0, current version 1300.36.0)
	/usr/lib/libSystem.B.dylib (compatibility version 1.0.0, current version 1319.0.0
$ clang++ --version
Apple clang version 14.0.0 (clang-1400.0.29.202)
Target: arm64-apple-darwin22.3.0
Thread model: posix
InstalledDir: /Library/Developer/CommandLineTools/usr/bin
Answered by Apple Staff in 802402022

Please try macOS 15 beta where we expect this to be resolved.

Please try macOS 15 beta where we expect this to be resolved.

Illegal instruction in std::remainder (libsystem_m.dylib) when floating point exceptions enabled on M1 mac
 
 
Q