Current Path : /usr/src/contrib/compiler-rt/lib/arm/ |
FreeBSD hs32.drive.ne.jp 9.1-RELEASE FreeBSD 9.1-RELEASE #1: Wed Jan 14 12:18:08 JST 2015 root@hs32.drive.ne.jp:/sys/amd64/compile/hs32 amd64 |
Current File : //usr/src/contrib/compiler-rt/lib/arm/udivmodsi4.S |
/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// * * The LLVM Compiler Infrastructure * * This file is dual licensed under the MIT and the University of Illinois Open * Source Licenses. See LICENSE.TXT for details. * *===----------------------------------------------------------------------===// * * This file implements the __udivmodsi4 (32-bit unsigned integer divide and * modulus) function for the ARM architecture. A naive digit-by-digit * computation is employed for simplicity. * *===----------------------------------------------------------------------===*/ #include "../assembly.h" #define ESTABLISH_FRAME \ push {r4, r7, lr} ;\ add r7, sp, #4 #define CLEAR_FRAME_AND_RETURN \ pop {r4, r7, pc} #define a r0 #define b r1 #define i r3 #define r r4 #define q ip #define one lr .syntax unified .align 3 DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) // We use a simple digit by digit algorithm; before we get into the actual // divide loop, we must calculate the left-shift amount necessary to align // the MSB of the divisor with that of the dividend (If this shift is // negative, then the result is zero, and we early out). We also conjure a // bit mask of 1 to use in constructing the quotient, and initialize the // quotient to zero. ESTABLISH_FRAME clz r4, a tst b, b // detect divide-by-zero clz r3, b mov q, #0 beq LOCAL_LABEL(return) // return 0 if b is zero. mov one, #1 subs i, r3, r4 blt LOCAL_LABEL(return) // return 0 if MSB(a) < MSB(b) LOCAL_LABEL(mainLoop): // This loop basically implements the following: // // do { // if (a >= b << i) { // a -= b << i; // q |= 1 << i; // if (a == 0) break; // } // } while (--i) // // Note that this does not perform the final iteration (i == 0); by doing it // this way, we can merge the two branches which is a substantial win for // such a tight loop on current ARM architectures. subs r, a, b, lsl i orrhs q, q,one, lsl i movhs a, r subsne i, i, #1 bhi LOCAL_LABEL(mainLoop) // Do the final test subtraction and update of quotient (i == 0), as it is // not performed in the main loop. subs r, a, b orrhs q, #1 movhs a, r LOCAL_LABEL(return): // Store the remainder, and move the quotient to r0, then return. str a, [r2] mov r0, q CLEAR_FRAME_AND_RETURN