From 92ab87522f241442304de5f0714b1cf22553737f Mon Sep 17 00:00:00 2001 From: "yuqiang.xian@intel.com" Date: Mon, 21 Nov 2011 04:47:00 +0000 Subject: [PATCH] Improve modulo operation on 32bit platforms https://bugs.webkit.org/show_bug.cgi?id=72501 Reviewed by Filip Pizlo. Extend softModulo to support X86 and MIPS in baseline JIT. Apply the same optimization to 32bit DFG JIT. 1% gain on Kraken, tested on Linux Core i7 Nehalem 32bit. * dfg/DFGSpeculativeJIT.h: * dfg/DFGSpeculativeJIT32_64.cpp: (JSC::DFG::SpeculativeJIT::compileSoftModulo): (JSC::DFG::SpeculativeJIT::compile): * jit/JITArithmetic32_64.cpp: (JSC::JIT::emit_op_mod): (JSC::JIT::emitSlow_op_mod): * jit/JITOpcodes32_64.cpp: (JSC::JIT::softModulo): * wtf/Platform.h: git-svn-id: http://svn.webkit.org/repository/webkit/trunk@100881 268f45cc-cd09-0410-ab3c-d52691b4dbfc --- Source/JavaScriptCore/ChangeLog | 22 +++ Source/JavaScriptCore/dfg/DFGSpeculativeJIT.h | 5 +- .../dfg/DFGSpeculativeJIT32_64.cpp | 157 +++++++++++++----- .../JavaScriptCore/jit/JITArithmetic32_64.cpp | 74 +-------- Source/JavaScriptCore/jit/JITOpcodes32_64.cpp | 27 +-- Source/JavaScriptCore/wtf/Platform.h | 6 + 6 files changed, 165 insertions(+), 126 deletions(-) diff --git a/Source/JavaScriptCore/ChangeLog b/Source/JavaScriptCore/ChangeLog index e182269f48b..8e3268f0a13 100644 --- a/Source/JavaScriptCore/ChangeLog +++ b/Source/JavaScriptCore/ChangeLog @@ -1,3 +1,25 @@ +2011-11-20 Yuqiang Xian + + Improve modulo operation on 32bit platforms + https://bugs.webkit.org/show_bug.cgi?id=72501 + + Reviewed by Filip Pizlo. + + Extend softModulo to support X86 and MIPS in baseline JIT. + Apply the same optimization to 32bit DFG JIT. + 1% gain on Kraken, tested on Linux Core i7 Nehalem 32bit. + + * dfg/DFGSpeculativeJIT.h: + * dfg/DFGSpeculativeJIT32_64.cpp: + (JSC::DFG::SpeculativeJIT::compileSoftModulo): + (JSC::DFG::SpeculativeJIT::compile): + * jit/JITArithmetic32_64.cpp: + (JSC::JIT::emit_op_mod): + (JSC::JIT::emitSlow_op_mod): + * jit/JITOpcodes32_64.cpp: + (JSC::JIT::softModulo): + * wtf/Platform.h: + 2011-11-18 Filip Pizlo Inline caches that refer to otherwise dead objects should be cleared diff --git a/Source/JavaScriptCore/dfg/DFGSpeculativeJIT.h b/Source/JavaScriptCore/dfg/DFGSpeculativeJIT.h index b7300457813..5ffee8d622b 100644 --- a/Source/JavaScriptCore/dfg/DFGSpeculativeJIT.h +++ b/Source/JavaScriptCore/dfg/DFGSpeculativeJIT.h @@ -1917,7 +1917,10 @@ private: void compileValueToInt32(Node&); void compileGetByValOnByteArray(Node&); void compilePutByValForByteArray(GPRReg base, GPRReg property, Node&); - +#if USE(JSVALUE32_64) + void compileSoftModulo(Node&); +#endif + // It is acceptable to have structure be equal to scratch, so long as you're fine // with the structure GPR being clobbered. template diff --git a/Source/JavaScriptCore/dfg/DFGSpeculativeJIT32_64.cpp b/Source/JavaScriptCore/dfg/DFGSpeculativeJIT32_64.cpp index 49b6bd46cbc..a53d6ed3ecb 100644 --- a/Source/JavaScriptCore/dfg/DFGSpeculativeJIT32_64.cpp +++ b/Source/JavaScriptCore/dfg/DFGSpeculativeJIT32_64.cpp @@ -2052,6 +2052,121 @@ void SpeculativeJIT::emitBranch(Node& node) } } +void SpeculativeJIT::compileSoftModulo(Node& node) +{ + SpeculateIntegerOperand op1(this, node.child1()); + SpeculateIntegerOperand op2(this, node.child2()); + GPRReg op1Gpr = op1.gpr(); + GPRReg op2Gpr = op2.gpr(); + + speculationCheck(JSValueRegs(), NoNode, m_jit.branchTest32(JITCompiler::Zero, op2Gpr)); + +#if CPU(X86) + GPRTemporary eax(this, X86Registers::eax); + GPRTemporary edx(this, X86Registers::edx); + GPRReg temp2 = InvalidGPRReg; + if (op2Gpr == X86Registers::eax || op2Gpr == X86Registers::edx) { + temp2 = allocate(); + m_jit.move(op2Gpr, temp2); + op2Gpr = temp2; + } + GPRReg resultGPR = edx.gpr(); + GPRReg scratchGPR = eax.gpr(); +#else + GPRTemporary result(this); + GPRTemporary scratch(this); + GPRReg resultGPR = result.gpr(); + GPRReg scratchGPR = scratch.gpr(); +#endif + + GPRTemporary scratch2(this); + GPRReg scratchGPR2 = scratch2.gpr(); + JITCompiler::JumpList exitBranch; + + // resultGPR is to hold the ABS value of the dividend before final result is produced + m_jit.move(op1Gpr, resultGPR); + // scratchGPR2 is to hold the ABS value of the divisor + m_jit.move(op2Gpr, scratchGPR2); + + // Check for negative result remainder + // According to ECMA-262, the sign of the result equals the sign of the dividend + JITCompiler::Jump positiveDividend = m_jit.branch32(JITCompiler::GreaterThanOrEqual, op1Gpr, TrustedImm32(0)); + m_jit.neg32(resultGPR); + m_jit.move(TrustedImm32(1), scratchGPR); + JITCompiler::Jump saveCondition = m_jit.jump(); + + positiveDividend.link(&m_jit); + m_jit.move(TrustedImm32(0), scratchGPR); + + // Save the condition for negative remainder + saveCondition.link(&m_jit); + m_jit.push(scratchGPR); + + JITCompiler::Jump positiveDivisor = m_jit.branch32(JITCompiler::GreaterThanOrEqual, op2Gpr, TrustedImm32(0)); + m_jit.neg32(scratchGPR2); + + positiveDivisor.link(&m_jit); + exitBranch.append(m_jit.branch32(JITCompiler::LessThan, resultGPR, scratchGPR2)); + + // Power of two fast case + m_jit.move(scratchGPR2, scratchGPR); + m_jit.sub32(TrustedImm32(1), scratchGPR); + JITCompiler::Jump notPowerOfTwo = m_jit.branchTest32(JITCompiler::NonZero, scratchGPR, scratchGPR2); + m_jit.and32(scratchGPR, resultGPR); + exitBranch.append(m_jit.jump()); + + notPowerOfTwo.link(&m_jit); + +#if CPU(X86) + m_jit.move(resultGPR, eax.gpr()); + m_jit.assembler().cdq(); + m_jit.assembler().idivl_r(scratchGPR2); +#elif CPU(ARM_THUMB2) + GPRTemporary scratch3(this); + GPRReg scratchGPR3 = scratch3.gpr(); + m_jit.countLeadingZeros32(scratchGPR2, scratchGPR); + m_jit.countLeadingZeros32(resultGPR, scratchGPR3); + m_jit.sub32(scratchGPR3, scratchGPR); + + JITCompiler::Jump useFullTable = m_jit.branch32(JITCompiler::Equal, scratchGPR, TrustedImm32(31)); + + m_jit.neg32(scratchGPR); + m_jit.add32(TrustedImm32(31), scratchGPR); + + int elementSizeByShift = -1; + elementSizeByShift = 3; + m_jit.relativeTableJump(scratchGPR, elementSizeByShift); + + useFullTable.link(&m_jit); + // Modulo table + for (int i = 31; i > 0; --i) { + ShiftTypeAndAmount shift(SRType_LSL, i); + m_jit.assembler().sub_S(scratchGPR, resultGPR, scratchGPR2, shift); + m_jit.assembler().it(ARMv7Assembler::ConditionCS); + m_jit.assembler().mov(resultGPR, scratchGPR); + } + + JITCompiler::Jump lower = m_jit.branch32(JITCompiler::Below, resultGPR, scratchGPR2); + m_jit.sub32(scratchGPR2, resultGPR); + lower.link(&m_jit); +#endif // CPU(X86) + + exitBranch.link(&m_jit); + + // Check for negative remainder + m_jit.pop(scratchGPR); + JITCompiler::Jump positiveResult = m_jit.branch32(JITCompiler::Equal, scratchGPR, TrustedImm32(0)); + m_jit.neg32(resultGPR); + positiveResult.link(&m_jit); + + integerResult(resultGPR, m_compileIndex); + +#if CPU(X86) + if (temp2 != InvalidGPRReg) + unlock(temp2); +#endif +} + void SpeculativeJIT::compile(Node& node) { NodeType op = node.op; @@ -2541,36 +2656,11 @@ void SpeculativeJIT::compile(Node& node) } case ArithMod: { -#if CPU(X86) if (!at(node.child1()).shouldNotSpeculateInteger() && !at(node.child2()).shouldNotSpeculateInteger() && node.canSpeculateInteger()) { - SpeculateIntegerOperand op1(this, node.child1()); - SpeculateIntegerOperand op2(this, node.child2()); - GPRTemporary eax(this, X86Registers::eax); - GPRTemporary edx(this, X86Registers::edx); - GPRReg op1Gpr = op1.gpr(); - GPRReg op2Gpr = op2.gpr(); - - speculationCheck(JSValueRegs(), NoNode, m_jit.branchTest32(JITCompiler::Zero, op2Gpr)); - - GPRReg temp2 = InvalidGPRReg; - if (op2Gpr == X86Registers::eax || op2Gpr == X86Registers::edx) { - temp2 = allocate(); - m_jit.move(op2Gpr, temp2); - op2Gpr = temp2; - } - - m_jit.move(op1Gpr, eax.gpr()); - m_jit.assembler().cdq(); - m_jit.assembler().idivl_r(op2Gpr); - - if (temp2 != InvalidGPRReg) - unlock(temp2); - - integerResult(edx.gpr(), m_compileIndex); + compileSoftModulo(node); break; } -#endif SpeculateDoubleOperand op1(this, node.child1()); SpeculateDoubleOperand op2(this, node.child2()); @@ -2584,21 +2674,6 @@ void SpeculativeJIT::compile(Node& node) callOperation(fmodAsDFGOperation, result.fpr(), op1FPR, op2FPR); -#if !CPU(X86) - if (!at(node.child1()).shouldNotSpeculateInteger() && !at(node.child2()).shouldNotSpeculateInteger() - && node.canSpeculateInteger()) { - FPRTemporary scratch(this, op2); - GPRTemporary intResult(this); - - JITCompiler::JumpList failureCases; - m_jit.branchConvertDoubleToInt32(result.fpr(), intResult.gpr(), failureCases, scratch.fpr()); - speculationCheck(JSValueRegs(), NoNode, failureCases); - - integerResult(intResult.gpr(), m_compileIndex); - break; - } -#endif - doubleResult(result.fpr(), m_compileIndex); break; } diff --git a/Source/JavaScriptCore/jit/JITArithmetic32_64.cpp b/Source/JavaScriptCore/jit/JITArithmetic32_64.cpp index 71bfb737cd7..5f8fa1ebb02 100644 --- a/Source/JavaScriptCore/jit/JITArithmetic32_64.cpp +++ b/Source/JavaScriptCore/jit/JITArithmetic32_64.cpp @@ -1191,14 +1191,14 @@ void JIT::emitSlow_op_div(Instruction* currentInstruction, Vector /* ------------------------------ BEGIN: OP_MOD ------------------------------ */ -#if CPU(X86) || CPU(X86_64) || CPU(MIPS) - void JIT::emit_op_mod(Instruction* currentInstruction) { unsigned dst = currentInstruction[1].u.operand; unsigned op1 = currentInstruction[2].u.operand; unsigned op2 = currentInstruction[3].u.operand; +#if ENABLE(JIT_USE_SOFT_MODULO) + #if CPU(X86) || CPU(X86_64) // Make sure registers are correct for x86 IDIV instructions. ASSERT(regT0 == X86Registers::eax); @@ -1207,74 +1207,6 @@ void JIT::emit_op_mod(Instruction* currentInstruction) ASSERT(regT3 == X86Registers::ebx); #endif - if (isOperandConstantImmediateInt(op2) && getConstantOperand(op2).asInt32() != 0) { - emitLoad(op1, regT1, regT0); - move(Imm32(getConstantOperand(op2).asInt32()), regT2); - addSlowCase(branch32(NotEqual, regT1, TrustedImm32(JSValue::Int32Tag))); - if (getConstantOperand(op2).asInt32() == -1) - addSlowCase(branch32(Equal, regT0, TrustedImm32(0x80000000))); // -2147483648 / -1 => EXC_ARITHMETIC - } else { - emitLoad2(op1, regT1, regT0, op2, regT3, regT2); - addSlowCase(branch32(NotEqual, regT1, TrustedImm32(JSValue::Int32Tag))); - addSlowCase(branch32(NotEqual, regT3, TrustedImm32(JSValue::Int32Tag))); - - addSlowCase(branch32(Equal, regT0, TrustedImm32(0x80000000))); // -2147483648 / -1 => EXC_ARITHMETIC - addSlowCase(branch32(Equal, regT2, TrustedImm32(0))); // divide by 0 - } - - move(regT0, regT3); // Save dividend payload, in case of 0. -#if CPU(X86) || CPU(X86_64) - m_assembler.cdq(); - m_assembler.idivl_r(regT2); -#elif CPU(MIPS) - m_assembler.div(regT0, regT2); - m_assembler.mfhi(regT1); -#endif - - // If the remainder is zero and the dividend is negative, the result is -0. - Jump storeResult1 = branchTest32(NonZero, regT1); - Jump storeResult2 = branchTest32(Zero, regT3, TrustedImm32(0x80000000)); // not negative - emitStore(dst, jsNumber(-0.0)); - Jump end = jump(); - - storeResult1.link(this); - storeResult2.link(this); - emitStoreInt32(dst, regT1, (op1 == dst || op2 == dst)); - end.link(this); -} - -void JIT::emitSlow_op_mod(Instruction* currentInstruction, Vector::iterator& iter) -{ - unsigned dst = currentInstruction[1].u.operand; - unsigned op1 = currentInstruction[2].u.operand; - unsigned op2 = currentInstruction[3].u.operand; - - if (isOperandConstantImmediateInt(op2) && getConstantOperand(op2).asInt32() != 0) { - linkSlowCase(iter); // int32 check - if (getConstantOperand(op2).asInt32() == -1) - linkSlowCase(iter); // 0x80000000 check - } else { - linkSlowCase(iter); // int32 check - linkSlowCase(iter); // int32 check - linkSlowCase(iter); // 0 check - linkSlowCase(iter); // 0x80000000 check - } - - JITStubCall stubCall(this, cti_op_mod); - stubCall.addArgument(op1); - stubCall.addArgument(op2); - stubCall.call(dst); -} - -#else // CPU(X86) || CPU(X86_64) || CPU(MIPS) - -void JIT::emit_op_mod(Instruction* currentInstruction) -{ - unsigned dst = currentInstruction[1].u.operand; - unsigned op1 = currentInstruction[2].u.operand; - unsigned op2 = currentInstruction[3].u.operand; - -#if ENABLE(JIT_USE_SOFT_MODULO) emitLoad2(op1, regT1, regT0, op2, regT3, regT2); addSlowCase(branch32(NotEqual, regT1, TrustedImm32(JSValue::Int32Tag))); addSlowCase(branch32(NotEqual, regT3, TrustedImm32(JSValue::Int32Tag))); @@ -1312,8 +1244,6 @@ void JIT::emitSlow_op_mod(Instruction* currentInstruction, Vector #endif } -#endif // CPU(X86) || CPU(X86_64) - /* ------------------------------ END: OP_MOD ------------------------------ */ } // namespace JSC diff --git a/Source/JavaScriptCore/jit/JITOpcodes32_64.cpp b/Source/JavaScriptCore/jit/JITOpcodes32_64.cpp index cf25fb9b6ea..e056f8c4885 100644 --- a/Source/JavaScriptCore/jit/JITOpcodes32_64.cpp +++ b/Source/JavaScriptCore/jit/JITOpcodes32_64.cpp @@ -1660,11 +1660,10 @@ void JIT::emitSlow_op_get_argument_by_val(Instruction* currentInstruction, Vecto #if ENABLE(JIT_USE_SOFT_MODULO) void JIT::softModulo() { - push(regT1); - push(regT3); move(regT2, regT3); move(regT0, regT2); move(TrustedImm32(0), regT1); + JumpList exitBranch; // Check for negative result reminder Jump positiveRegT3 = branch32(GreaterThanOrEqual, regT3, TrustedImm32(0)); @@ -1680,19 +1679,26 @@ void JIT::softModulo() // Save the condition for negative reminder push(regT1); - Jump exitBranch = branch32(LessThan, regT2, regT3); + exitBranch.append(branch32(LessThan, regT2, regT3)); // Power of two fast case move(regT3, regT0); sub32(TrustedImm32(1), regT0); - Jump powerOfTwo = branchTest32(NonZero, regT0, regT3); + Jump notPowerOfTwo = branchTest32(NonZero, regT0, regT3); and32(regT0, regT2); - powerOfTwo.link(this); - - and32(regT3, regT0); + exitBranch.append(jump()); - Jump exitBranch2 = branchTest32(Zero, regT0); + notPowerOfTwo.link(this); +#if CPU(X86) || CPU(X86_64) + move(regT2, regT0); + m_assembler.cdq(); + m_assembler.idivl_r(regT3); + move(regT1, regT2); +#elif CPU(MIPS) + m_assembler.div(regT2, regT3); + m_assembler.mfhi(regT2); +#else countLeadingZeros32(regT2, regT0); countLeadingZeros32(regT3, regT1); sub32(regT0, regT1); @@ -1729,9 +1735,9 @@ void JIT::softModulo() Jump lower = branch32(Below, regT2, regT3); sub32(regT3, regT2); lower.link(this); +#endif exitBranch.link(this); - exitBranch2.link(this); // Check for negative reminder pop(regT1); @@ -1740,9 +1746,6 @@ void JIT::softModulo() positiveResult.link(this); move(regT2, regT0); - - pop(regT3); - pop(regT1); ret(); } #endif // ENABLE(JIT_USE_SOFT_MODULO) diff --git a/Source/JavaScriptCore/wtf/Platform.h b/Source/JavaScriptCore/wtf/Platform.h index 20d3d09a1a7..a686616422e 100644 --- a/Source/JavaScriptCore/wtf/Platform.h +++ b/Source/JavaScriptCore/wtf/Platform.h @@ -955,6 +955,12 @@ #endif #endif +#if CPU(X86) || CPU(X86_64) || CPU(MIPS) +#if !defined(ENABLE_JIT_USE_SOFT_MODULO) +#define ENABLE_JIT_USE_SOFT_MODULO 1 +#endif +#endif + #if CPU(X86) && COMPILER(MSVC) #define JSC_HOST_CALL __fastcall #elif CPU(X86) && COMPILER(GCC) -- GitLab