Improve modulo operation on 32bit platforms

https://bugs.webkit.org/show_bug.cgi?id=72501

Reviewed by Filip Pizlo.

Extend softModulo to support X86 and MIPS in baseline JIT.
Apply the same optimization to 32bit DFG JIT.
1% gain on Kraken, tested on Linux Core i7 Nehalem 32bit.

* dfg/DFGSpeculativeJIT.h:
* dfg/DFGSpeculativeJIT32_64.cpp:
(JSC::DFG::SpeculativeJIT::compileSoftModulo):
(JSC::DFG::SpeculativeJIT::compile):
* jit/JITArithmetic32_64.cpp:
(JSC::JIT::emit_op_mod):
(JSC::JIT::emitSlow_op_mod):
* jit/JITOpcodes32_64.cpp:
(JSC::JIT::softModulo):
* wtf/Platform.h:


git-svn-id: http://svn.webkit.org/repository/webkit/trunk@100881 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent a147a4d3
2011-11-20 Yuqiang Xian <yuqiang.xian@intel.com>
Improve modulo operation on 32bit platforms
https://bugs.webkit.org/show_bug.cgi?id=72501
Reviewed by Filip Pizlo.
Extend softModulo to support X86 and MIPS in baseline JIT.
Apply the same optimization to 32bit DFG JIT.
1% gain on Kraken, tested on Linux Core i7 Nehalem 32bit.
* dfg/DFGSpeculativeJIT.h:
* dfg/DFGSpeculativeJIT32_64.cpp:
(JSC::DFG::SpeculativeJIT::compileSoftModulo):
(JSC::DFG::SpeculativeJIT::compile):
* jit/JITArithmetic32_64.cpp:
(JSC::JIT::emit_op_mod):
(JSC::JIT::emitSlow_op_mod):
* jit/JITOpcodes32_64.cpp:
(JSC::JIT::softModulo):
* wtf/Platform.h:
2011-11-18 Filip Pizlo <fpizlo@apple.com>
Inline caches that refer to otherwise dead objects should be cleared
......@@ -1917,7 +1917,10 @@ private:
void compileValueToInt32(Node&);
void compileGetByValOnByteArray(Node&);
void compilePutByValForByteArray(GPRReg base, GPRReg property, Node&);
#if USE(JSVALUE32_64)
void compileSoftModulo(Node&);
#endif
// It is acceptable to have structure be equal to scratch, so long as you're fine
// with the structure GPR being clobbered.
template<typename T>
......
......@@ -2052,6 +2052,121 @@ void SpeculativeJIT::emitBranch(Node& node)
}
}
void SpeculativeJIT::compileSoftModulo(Node& node)
{
SpeculateIntegerOperand op1(this, node.child1());
SpeculateIntegerOperand op2(this, node.child2());
GPRReg op1Gpr = op1.gpr();
GPRReg op2Gpr = op2.gpr();
speculationCheck(JSValueRegs(), NoNode, m_jit.branchTest32(JITCompiler::Zero, op2Gpr));
#if CPU(X86)
GPRTemporary eax(this, X86Registers::eax);
GPRTemporary edx(this, X86Registers::edx);
GPRReg temp2 = InvalidGPRReg;
if (op2Gpr == X86Registers::eax || op2Gpr == X86Registers::edx) {
temp2 = allocate();
m_jit.move(op2Gpr, temp2);
op2Gpr = temp2;
}
GPRReg resultGPR = edx.gpr();
GPRReg scratchGPR = eax.gpr();
#else
GPRTemporary result(this);
GPRTemporary scratch(this);
GPRReg resultGPR = result.gpr();
GPRReg scratchGPR = scratch.gpr();
#endif
GPRTemporary scratch2(this);
GPRReg scratchGPR2 = scratch2.gpr();
JITCompiler::JumpList exitBranch;
// resultGPR is to hold the ABS value of the dividend before final result is produced
m_jit.move(op1Gpr, resultGPR);
// scratchGPR2 is to hold the ABS value of the divisor
m_jit.move(op2Gpr, scratchGPR2);
// Check for negative result remainder
// According to ECMA-262, the sign of the result equals the sign of the dividend
JITCompiler::Jump positiveDividend = m_jit.branch32(JITCompiler::GreaterThanOrEqual, op1Gpr, TrustedImm32(0));
m_jit.neg32(resultGPR);
m_jit.move(TrustedImm32(1), scratchGPR);
JITCompiler::Jump saveCondition = m_jit.jump();
positiveDividend.link(&m_jit);
m_jit.move(TrustedImm32(0), scratchGPR);
// Save the condition for negative remainder
saveCondition.link(&m_jit);
m_jit.push(scratchGPR);
JITCompiler::Jump positiveDivisor = m_jit.branch32(JITCompiler::GreaterThanOrEqual, op2Gpr, TrustedImm32(0));
m_jit.neg32(scratchGPR2);
positiveDivisor.link(&m_jit);
exitBranch.append(m_jit.branch32(JITCompiler::LessThan, resultGPR, scratchGPR2));
// Power of two fast case
m_jit.move(scratchGPR2, scratchGPR);
m_jit.sub32(TrustedImm32(1), scratchGPR);
JITCompiler::Jump notPowerOfTwo = m_jit.branchTest32(JITCompiler::NonZero, scratchGPR, scratchGPR2);
m_jit.and32(scratchGPR, resultGPR);
exitBranch.append(m_jit.jump());
notPowerOfTwo.link(&m_jit);
#if CPU(X86)
m_jit.move(resultGPR, eax.gpr());
m_jit.assembler().cdq();
m_jit.assembler().idivl_r(scratchGPR2);
#elif CPU(ARM_THUMB2)
GPRTemporary scratch3(this);
GPRReg scratchGPR3 = scratch3.gpr();
m_jit.countLeadingZeros32(scratchGPR2, scratchGPR);
m_jit.countLeadingZeros32(resultGPR, scratchGPR3);
m_jit.sub32(scratchGPR3, scratchGPR);
JITCompiler::Jump useFullTable = m_jit.branch32(JITCompiler::Equal, scratchGPR, TrustedImm32(31));
m_jit.neg32(scratchGPR);
m_jit.add32(TrustedImm32(31), scratchGPR);
int elementSizeByShift = -1;
elementSizeByShift = 3;
m_jit.relativeTableJump(scratchGPR, elementSizeByShift);
useFullTable.link(&m_jit);
// Modulo table
for (int i = 31; i > 0; --i) {
ShiftTypeAndAmount shift(SRType_LSL, i);
m_jit.assembler().sub_S(scratchGPR, resultGPR, scratchGPR2, shift);
m_jit.assembler().it(ARMv7Assembler::ConditionCS);
m_jit.assembler().mov(resultGPR, scratchGPR);
}
JITCompiler::Jump lower = m_jit.branch32(JITCompiler::Below, resultGPR, scratchGPR2);
m_jit.sub32(scratchGPR2, resultGPR);
lower.link(&m_jit);
#endif // CPU(X86)
exitBranch.link(&m_jit);
// Check for negative remainder
m_jit.pop(scratchGPR);
JITCompiler::Jump positiveResult = m_jit.branch32(JITCompiler::Equal, scratchGPR, TrustedImm32(0));
m_jit.neg32(resultGPR);
positiveResult.link(&m_jit);
integerResult(resultGPR, m_compileIndex);
#if CPU(X86)
if (temp2 != InvalidGPRReg)
unlock(temp2);
#endif
}
void SpeculativeJIT::compile(Node& node)
{
NodeType op = node.op;
......@@ -2541,36 +2656,11 @@ void SpeculativeJIT::compile(Node& node)
}
case ArithMod: {
#if CPU(X86)
if (!at(node.child1()).shouldNotSpeculateInteger() && !at(node.child2()).shouldNotSpeculateInteger()
&& node.canSpeculateInteger()) {
SpeculateIntegerOperand op1(this, node.child1());
SpeculateIntegerOperand op2(this, node.child2());
GPRTemporary eax(this, X86Registers::eax);
GPRTemporary edx(this, X86Registers::edx);
GPRReg op1Gpr = op1.gpr();
GPRReg op2Gpr = op2.gpr();
speculationCheck(JSValueRegs(), NoNode, m_jit.branchTest32(JITCompiler::Zero, op2Gpr));
GPRReg temp2 = InvalidGPRReg;
if (op2Gpr == X86Registers::eax || op2Gpr == X86Registers::edx) {
temp2 = allocate();
m_jit.move(op2Gpr, temp2);
op2Gpr = temp2;
}
m_jit.move(op1Gpr, eax.gpr());
m_jit.assembler().cdq();
m_jit.assembler().idivl_r(op2Gpr);
if (temp2 != InvalidGPRReg)
unlock(temp2);
integerResult(edx.gpr(), m_compileIndex);
compileSoftModulo(node);
break;
}
#endif
SpeculateDoubleOperand op1(this, node.child1());
SpeculateDoubleOperand op2(this, node.child2());
......@@ -2584,21 +2674,6 @@ void SpeculativeJIT::compile(Node& node)
callOperation(fmodAsDFGOperation, result.fpr(), op1FPR, op2FPR);
#if !CPU(X86)
if (!at(node.child1()).shouldNotSpeculateInteger() && !at(node.child2()).shouldNotSpeculateInteger()
&& node.canSpeculateInteger()) {
FPRTemporary scratch(this, op2);
GPRTemporary intResult(this);
JITCompiler::JumpList failureCases;
m_jit.branchConvertDoubleToInt32(result.fpr(), intResult.gpr(), failureCases, scratch.fpr());
speculationCheck(JSValueRegs(), NoNode, failureCases);
integerResult(intResult.gpr(), m_compileIndex);
break;
}
#endif
doubleResult(result.fpr(), m_compileIndex);
break;
}
......
......@@ -1191,14 +1191,14 @@ void JIT::emitSlow_op_div(Instruction* currentInstruction, Vector<SlowCaseEntry>
/* ------------------------------ BEGIN: OP_MOD ------------------------------ */
#if CPU(X86) || CPU(X86_64) || CPU(MIPS)
void JIT::emit_op_mod(Instruction* currentInstruction)
{
unsigned dst = currentInstruction[1].u.operand;
unsigned op1 = currentInstruction[2].u.operand;
unsigned op2 = currentInstruction[3].u.operand;
#if ENABLE(JIT_USE_SOFT_MODULO)
#if CPU(X86) || CPU(X86_64)
// Make sure registers are correct for x86 IDIV instructions.
ASSERT(regT0 == X86Registers::eax);
......@@ -1207,74 +1207,6 @@ void JIT::emit_op_mod(Instruction* currentInstruction)
ASSERT(regT3 == X86Registers::ebx);
#endif
if (isOperandConstantImmediateInt(op2) && getConstantOperand(op2).asInt32() != 0) {
emitLoad(op1, regT1, regT0);
move(Imm32(getConstantOperand(op2).asInt32()), regT2);
addSlowCase(branch32(NotEqual, regT1, TrustedImm32(JSValue::Int32Tag)));
if (getConstantOperand(op2).asInt32() == -1)
addSlowCase(branch32(Equal, regT0, TrustedImm32(0x80000000))); // -2147483648 / -1 => EXC_ARITHMETIC
} else {
emitLoad2(op1, regT1, regT0, op2, regT3, regT2);
addSlowCase(branch32(NotEqual, regT1, TrustedImm32(JSValue::Int32Tag)));
addSlowCase(branch32(NotEqual, regT3, TrustedImm32(JSValue::Int32Tag)));
addSlowCase(branch32(Equal, regT0, TrustedImm32(0x80000000))); // -2147483648 / -1 => EXC_ARITHMETIC
addSlowCase(branch32(Equal, regT2, TrustedImm32(0))); // divide by 0
}
move(regT0, regT3); // Save dividend payload, in case of 0.
#if CPU(X86) || CPU(X86_64)
m_assembler.cdq();
m_assembler.idivl_r(regT2);
#elif CPU(MIPS)
m_assembler.div(regT0, regT2);
m_assembler.mfhi(regT1);
#endif
// If the remainder is zero and the dividend is negative, the result is -0.
Jump storeResult1 = branchTest32(NonZero, regT1);
Jump storeResult2 = branchTest32(Zero, regT3, TrustedImm32(0x80000000)); // not negative
emitStore(dst, jsNumber(-0.0));
Jump end = jump();
storeResult1.link(this);
storeResult2.link(this);
emitStoreInt32(dst, regT1, (op1 == dst || op2 == dst));
end.link(this);
}
void JIT::emitSlow_op_mod(Instruction* currentInstruction, Vector<SlowCaseEntry>::iterator& iter)
{
unsigned dst = currentInstruction[1].u.operand;
unsigned op1 = currentInstruction[2].u.operand;
unsigned op2 = currentInstruction[3].u.operand;
if (isOperandConstantImmediateInt(op2) && getConstantOperand(op2).asInt32() != 0) {
linkSlowCase(iter); // int32 check
if (getConstantOperand(op2).asInt32() == -1)
linkSlowCase(iter); // 0x80000000 check
} else {
linkSlowCase(iter); // int32 check
linkSlowCase(iter); // int32 check
linkSlowCase(iter); // 0 check
linkSlowCase(iter); // 0x80000000 check
}
JITStubCall stubCall(this, cti_op_mod);
stubCall.addArgument(op1);
stubCall.addArgument(op2);
stubCall.call(dst);
}
#else // CPU(X86) || CPU(X86_64) || CPU(MIPS)
void JIT::emit_op_mod(Instruction* currentInstruction)
{
unsigned dst = currentInstruction[1].u.operand;
unsigned op1 = currentInstruction[2].u.operand;
unsigned op2 = currentInstruction[3].u.operand;
#if ENABLE(JIT_USE_SOFT_MODULO)
emitLoad2(op1, regT1, regT0, op2, regT3, regT2);
addSlowCase(branch32(NotEqual, regT1, TrustedImm32(JSValue::Int32Tag)));
addSlowCase(branch32(NotEqual, regT3, TrustedImm32(JSValue::Int32Tag)));
......@@ -1312,8 +1244,6 @@ void JIT::emitSlow_op_mod(Instruction* currentInstruction, Vector<SlowCaseEntry>
#endif
}
#endif // CPU(X86) || CPU(X86_64)
/* ------------------------------ END: OP_MOD ------------------------------ */
} // namespace JSC
......
......@@ -1660,11 +1660,10 @@ void JIT::emitSlow_op_get_argument_by_val(Instruction* currentInstruction, Vecto
#if ENABLE(JIT_USE_SOFT_MODULO)
void JIT::softModulo()
{
push(regT1);
push(regT3);
move(regT2, regT3);
move(regT0, regT2);
move(TrustedImm32(0), regT1);
JumpList exitBranch;
// Check for negative result reminder
Jump positiveRegT3 = branch32(GreaterThanOrEqual, regT3, TrustedImm32(0));
......@@ -1680,19 +1679,26 @@ void JIT::softModulo()
// Save the condition for negative reminder
push(regT1);
Jump exitBranch = branch32(LessThan, regT2, regT3);
exitBranch.append(branch32(LessThan, regT2, regT3));
// Power of two fast case
move(regT3, regT0);
sub32(TrustedImm32(1), regT0);
Jump powerOfTwo = branchTest32(NonZero, regT0, regT3);
Jump notPowerOfTwo = branchTest32(NonZero, regT0, regT3);
and32(regT0, regT2);
powerOfTwo.link(this);
and32(regT3, regT0);
exitBranch.append(jump());
Jump exitBranch2 = branchTest32(Zero, regT0);
notPowerOfTwo.link(this);
#if CPU(X86) || CPU(X86_64)
move(regT2, regT0);
m_assembler.cdq();
m_assembler.idivl_r(regT3);
move(regT1, regT2);
#elif CPU(MIPS)
m_assembler.div(regT2, regT3);
m_assembler.mfhi(regT2);
#else
countLeadingZeros32(regT2, regT0);
countLeadingZeros32(regT3, regT1);
sub32(regT0, regT1);
......@@ -1729,9 +1735,9 @@ void JIT::softModulo()
Jump lower = branch32(Below, regT2, regT3);
sub32(regT3, regT2);
lower.link(this);
#endif
exitBranch.link(this);
exitBranch2.link(this);
// Check for negative reminder
pop(regT1);
......@@ -1740,9 +1746,6 @@ void JIT::softModulo()
positiveResult.link(this);
move(regT2, regT0);
pop(regT3);
pop(regT1);
ret();
}
#endif // ENABLE(JIT_USE_SOFT_MODULO)
......
......@@ -955,6 +955,12 @@
#endif
#endif
#if CPU(X86) || CPU(X86_64) || CPU(MIPS)
#if !defined(ENABLE_JIT_USE_SOFT_MODULO)
#define ENABLE_JIT_USE_SOFT_MODULO 1
#endif
#endif
#if CPU(X86) && COMPILER(MSVC)
#define JSC_HOST_CALL __fastcall
#elif CPU(X86) && COMPILER(GCC)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment