Commit 90e5f0ea authored by fpizlo@apple.com's avatar fpizlo@apple.com

DFG JIT does not support to_primitive or strcat

https://bugs.webkit.org/show_bug.cgi?id=68582

Reviewed by Darin Adler.
        
This adds functional support for to_primitive and strcat. It focuses
on minimizing the amount of code emitted on to_primitive (if we know
that it is a primitive or can speculate cheaply, then we omit the
slow path) and on keeping the implementation of strcat simple while
leveraging whatever optimizations we have already. In particular,
unlike the Call and Construct nodes which require extending the size
of the DFG's callee registers, StrCat takes advantage of the fact
that no JS code can run while StrCat is in progress and uses a
scratch buffer, rather than the register file, to store the list of
values to concatenate. This was done mainly to keep the code simple,
but there are probably other benefits to keeping call frame sizes
down. Essentially, this patch ensures that the presence of an
op_strcat does not mess up any other optimizations we might do while
ensuring that if you do execute it, it'll work about as well as you'd
expect.
        
When combined with the previous patch for integer division, this is a
14% speed-up on Kraken. Without it, it would have been a 2% loss.

* assembler/AbstractMacroAssembler.h:
(JSC::AbstractMacroAssembler::TrustedImmPtr::TrustedImmPtr):
* dfg/DFGByteCodeParser.cpp:
(JSC::DFG::ByteCodeParser::parseBlock):
* dfg/DFGCapabilities.h:
(JSC::DFG::canCompileOpcode):
* dfg/DFGJITCodeGenerator.h:
(JSC::DFG::JITCodeGenerator::callOperation):
* dfg/DFGJITCompiler.cpp:
(JSC::DFG::JITCompiler::exitSpeculativeWithOSR):
* dfg/DFGNode.h:
* dfg/DFGOperations.cpp:
* dfg/DFGOperations.h:
* dfg/DFGPropagator.cpp:
(JSC::DFG::Propagator::propagateNodePredictions):
(JSC::DFG::Propagator::performNodeCSE):
* dfg/DFGSpeculativeJIT.cpp:
(JSC::DFG::SpeculativeJIT::compile):
* runtime/JSGlobalData.cpp:
(JSC::JSGlobalData::JSGlobalData):
(JSC::JSGlobalData::~JSGlobalData):
* runtime/JSGlobalData.h:
(JSC::JSGlobalData::scratchBufferForSize):



git-svn-id: http://svn.webkit.org/repository/webkit/trunk@95758 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent cabbc295
2011-09-21 Filip Pizlo <fpizlo@apple.com>
DFG JIT does not support to_primitive or strcat
https://bugs.webkit.org/show_bug.cgi?id=68582
Reviewed by Darin Adler.
This adds functional support for to_primitive and strcat. It focuses
on minimizing the amount of code emitted on to_primitive (if we know
that it is a primitive or can speculate cheaply, then we omit the
slow path) and on keeping the implementation of strcat simple while
leveraging whatever optimizations we have already. In particular,
unlike the Call and Construct nodes which require extending the size
of the DFG's callee registers, StrCat takes advantage of the fact
that no JS code can run while StrCat is in progress and uses a
scratch buffer, rather than the register file, to store the list of
values to concatenate. This was done mainly to keep the code simple,
but there are probably other benefits to keeping call frame sizes
down. Essentially, this patch ensures that the presence of an
op_strcat does not mess up any other optimizations we might do while
ensuring that if you do execute it, it'll work about as well as you'd
expect.
When combined with the previous patch for integer division, this is a
14% speed-up on Kraken. Without it, it would have been a 2% loss.
* assembler/AbstractMacroAssembler.h:
(JSC::AbstractMacroAssembler::TrustedImmPtr::TrustedImmPtr):
* dfg/DFGByteCodeParser.cpp:
(JSC::DFG::ByteCodeParser::parseBlock):
* dfg/DFGCapabilities.h:
(JSC::DFG::canCompileOpcode):
* dfg/DFGJITCodeGenerator.h:
(JSC::DFG::JITCodeGenerator::callOperation):
* dfg/DFGJITCompiler.cpp:
(JSC::DFG::JITCompiler::exitSpeculativeWithOSR):
* dfg/DFGNode.h:
* dfg/DFGOperations.cpp:
* dfg/DFGOperations.h:
* dfg/DFGPropagator.cpp:
(JSC::DFG::Propagator::propagateNodePredictions):
(JSC::DFG::Propagator::performNodeCSE):
* dfg/DFGSpeculativeJIT.cpp:
(JSC::DFG::SpeculativeJIT::compile):
* runtime/JSGlobalData.cpp:
(JSC::JSGlobalData::JSGlobalData):
(JSC::JSGlobalData::~JSGlobalData):
* runtime/JSGlobalData.h:
(JSC::JSGlobalData::scratchBufferForSize):
2011-09-22 Filip Pizlo <fpizlo@apple.com>
DFG JIT should support integer division
......@@ -161,6 +161,19 @@ public:
: m_value(value)
{
}
// This is only here so that TrustedImmPtr(0) does not confuse the C++
// overload handling rules.
explicit TrustedImmPtr(int value)
: m_value(0)
{
ASSERT_UNUSED(value, !value);
}
explicit TrustedImmPtr(size_t value)
: m_value(reinterpret_cast<void*>(value))
{
}
intptr_t asIntptr()
{
......
......@@ -968,6 +968,21 @@ bool ByteCodeParser::parseBlock(unsigned limit)
set(currentInstruction[1].u.operand, addToGraph(LogicalNot, value));
NEXT_OPCODE(op_not);
}
case op_to_primitive: {
NodeIndex value = get(currentInstruction[2].u.operand);
set(currentInstruction[1].u.operand, addToGraph(ToPrimitive, value));
NEXT_OPCODE(op_to_primitive);
}
case op_strcat: {
int startOperand = currentInstruction[2].u.operand;
int numOperands = currentInstruction[3].u.operand;
for (int operandIdx = startOperand; operandIdx < startOperand + numOperands; ++operandIdx)
addVarArgChild(get(operandIdx));
set(currentInstruction[1].u.operand, addToGraph(Node::VarArg, StrCat, OpInfo(0), OpInfo(0)));
NEXT_OPCODE(op_strcat);
}
case op_less: {
NodeIndex op1 = get(currentInstruction[2].u.operand);
......
......@@ -116,6 +116,8 @@ inline bool canCompileOpcode(OpcodeID opcodeID)
case op_call_put_result:
case op_resolve:
case op_resolve_base:
case op_strcat:
case op_to_primitive:
case op_throw:
case op_throw_reference_error:
return true;
......
......@@ -889,6 +889,17 @@ protected:
{
callOperation((J_DFGOperation_EP)operation, result, identifier);
}
void callOperation(J_DFGOperation_EPS operation, GPRReg result, void* pointer, size_t size)
{
ASSERT(isFlushed());
m_jit.move(JITCompiler::TrustedImmPtr(size), GPRInfo::argumentGPR2);
m_jit.move(JITCompiler::TrustedImmPtr(pointer), GPRInfo::argumentGPR1);
m_jit.move(GPRInfo::callFrameRegister, GPRInfo::argumentGPR0);
appendCallWithExceptionCheck(operation);
m_jit.move(GPRInfo::returnValueGPR, result);
}
void callOperation(J_DFGOperation_EJP operation, GPRReg result, GPRReg arg1, void* pointer)
{
ASSERT(isFlushed());
......
......@@ -218,7 +218,7 @@ void JITCompiler::exitSpeculativeWithOSR(const OSRExit& exit, SpeculationRecover
}
}
EncodedJSValue* scratchBuffer = static_cast<EncodedJSValue*>(globalData()->osrScratchBufferForSize(sizeof(EncodedJSValue) * (numberOfPoisonedVirtualRegisters + (numberOfDisplacedVirtualRegisters <= GPRInfo::numberOfRegisters ? 0 : numberOfDisplacedVirtualRegisters))));
EncodedJSValue* scratchBuffer = static_cast<EncodedJSValue*>(globalData()->scratchBufferForSize(sizeof(EncodedJSValue) * (numberOfPoisonedVirtualRegisters + (numberOfDisplacedVirtualRegisters <= GPRInfo::numberOfRegisters ? 0 : numberOfDisplacedVirtualRegisters))));
// From here on, the code assumes that it is profitable to maximize the distance
// between when something is computed and when it is stored.
......
......@@ -296,6 +296,8 @@ static inline const char* arithNodeFlagsAsString(ArithNodeFlags flags)
macro(CheckHasInstance, NodeMustGenerate) \
macro(InstanceOf, NodeResultBoolean) \
macro(LogicalNot, NodeResultBoolean | NodeMightClobber) \
macro(ToPrimitive, NodeResultJS | NodeMustGenerate | NodeClobbersWorld) \
macro(StrCat, NodeResultJS | NodeMustGenerate | NodeHasVarArgs | NodeClobbersWorld) \
\
/* Block terminals. */\
macro(Jump, NodeMustGenerate | NodeIsTerminal | NodeIsJump) \
......
......@@ -674,6 +674,16 @@ EncodedJSValue operationResolveBaseStrictPut(ExecState* exec, Identifier* proper
return JSValue::encode(base);
}
EncodedJSValue operationToPrimitive(ExecState* exec, EncodedJSValue value)
{
return JSValue::encode(JSValue::decode(value).toPrimitive(exec));
}
EncodedJSValue operationStrCat(ExecState* exec, void* start, size_t size)
{
return JSValue::encode(jsString(exec, static_cast<Register*>(start), size));
}
void operationThrowHasInstanceError(ExecState* exec, EncodedJSValue encodedBase)
{
JSValue base = JSValue::decode(encodedBase);
......
......@@ -45,6 +45,7 @@ typedef EncodedJSValue (*J_DFGOperation_EJ)(ExecState*, EncodedJSValue);
typedef EncodedJSValue (*J_DFGOperation_EJP)(ExecState*, EncodedJSValue, void*);
typedef EncodedJSValue (*J_DFGOperation_EJI)(ExecState*, EncodedJSValue, Identifier*);
typedef EncodedJSValue (*J_DFGOperation_EP)(ExecState*, void*);
typedef EncodedJSValue (*J_DFGOperation_EPS)(ExecState*, void*, size_t);
typedef EncodedJSValue (*J_DFGOperation_EI)(ExecState*, Identifier*);
typedef RegisterSizedBoolean (*Z_DFGOperation_EJ)(ExecState*, EncodedJSValue);
typedef RegisterSizedBoolean (*Z_DFGOperation_EJJ)(ExecState*, EncodedJSValue, EncodedJSValue);
......@@ -74,6 +75,8 @@ EncodedJSValue operationInstanceOf(ExecState*, EncodedJSValue value, EncodedJSVa
EncodedJSValue operationResolve(ExecState*, Identifier*);
EncodedJSValue operationResolveBase(ExecState*, Identifier*);
EncodedJSValue operationResolveBaseStrictPut(ExecState*, Identifier*);
EncodedJSValue operationToPrimitive(ExecState*, EncodedJSValue);
EncodedJSValue operationStrCat(ExecState*, void* start, size_t);
void operationThrowHasInstanceError(ExecState*, EncodedJSValue base);
void operationPutByValStrict(ExecState*, EncodedJSValue encodedBase, EncodedJSValue encodedProperty, EncodedJSValue encodedValue);
void operationPutByValNonStrict(ExecState*, EncodedJSValue encodedBase, EncodedJSValue encodedProperty, EncodedJSValue encodedValue);
......
......@@ -546,6 +546,31 @@ private:
changed |= setPrediction(makePrediction(PredictFinalObject, StrongPrediction));
break;
}
case StrCat: {
changed |= setPrediction(makePrediction(PredictString, StrongPrediction));
break;
}
case ToPrimitive: {
PredictedType child = m_predictions[node.child1()];
if (isStrongPrediction(child)) {
if (isObjectPrediction(child)) {
// I'd love to fold this case into the case below, but I can't, because
// removing PredictObjectMask from something that only has an object
// prediction and nothing else means we have an ill-formed PredictedType
// (strong predict-none). This should be killed once we remove all traces
// of static (aka weak) predictions.
changed |= mergePrediction(makePrediction(PredictString, StrongPrediction));
} else if (child & PredictObjectMask) {
// Objects get turned into strings. So if the input has hints of objectness,
// the output will have hinsts of stringiness.
changed |= mergePrediction(mergePredictions(child & ~PredictObjectMask, makePrediction(PredictString, StrongPrediction)));
} else
changed |= mergePrediction(child);
}
break;
}
#ifndef NDEBUG
// These get ignored because they don't return anything.
......@@ -1010,7 +1035,16 @@ private:
#if ENABLE(DFG_DEBUG_PROPAGATION_VERBOSE)
printf(" %s @%u: ", Graph::opName(m_graph[m_compileIndex].op), m_compileIndex);
#endif
// NOTE: there are some nodes that we deliberately don't CSE even though we
// probably could, like StrCat and ToPrimitive. That's because there is no
// evidence that doing CSE on these nodes would result in a performance
// progression. Hence considering these nodes in CSE would just mean that this
// code does more work with no win. Of course, we may want to reconsider this,
// since StrCat is trivially CSE-able. It's not trivially doable for
// ToPrimitive, but we could change that with some speculations if we really
// needed to.
switch (node.op) {
// Handle the pure nodes. These nodes never have any side-effects.
......
......@@ -1569,6 +1569,99 @@ void SpeculativeJIT::compile(Node& node)
terminateSpeculativeExecution();
break;
}
case ToPrimitive: {
if (shouldSpeculateInteger(node.child1())) {
// It's really profitable to speculate integer, since it's really cheap,
// it means we don't have to do any real work, and we emit a lot less code.
SpeculateIntegerOperand op1(this, node.child1());
GPRTemporary result(this, op1);
m_jit.move(op1.gpr(), result.gpr());
if (op1.format() == DataFormatInteger)
m_jit.orPtr(GPRInfo::tagTypeNumberRegister, result.gpr());
jsValueResult(result.gpr(), m_compileIndex);
break;
}
// FIXME: Add string speculation here.
bool wasPrimitive = isKnownNumeric(node.child1()) || isKnownBoolean(node.child1());
JSValueOperand op1(this, node.child1());
GPRTemporary result(this, op1);
GPRReg op1GPR = op1.gpr();
GPRReg resultGPR = result.gpr();
op1.use();
if (wasPrimitive)
m_jit.move(op1GPR, resultGPR);
else {
MacroAssembler::JumpList alreadyPrimitive;
alreadyPrimitive.append(m_jit.branchTestPtr(MacroAssembler::NonZero, op1GPR, GPRInfo::tagMaskRegister));
alreadyPrimitive.append(m_jit.branchPtr(MacroAssembler::Equal, MacroAssembler::Address(op1GPR), MacroAssembler::TrustedImmPtr(m_jit.globalData()->jsStringVPtr)));
silentSpillAllRegisters(resultGPR);
m_jit.move(op1GPR, GPRInfo::argumentGPR1);
m_jit.move(GPRInfo::callFrameRegister, GPRInfo::argumentGPR0);
appendCallWithExceptionCheck(operationToPrimitive);
m_jit.move(GPRInfo::returnValueGPR, resultGPR);
silentFillAllRegisters(resultGPR);
MacroAssembler::Jump done = m_jit.jump();
alreadyPrimitive.link(&m_jit);
m_jit.move(op1GPR, resultGPR);
done.link(&m_jit);
}
jsValueResult(resultGPR, m_compileIndex, UseChildrenCalledExplicitly);
break;
}
case StrCat: {
// We really don't want to grow the register file just to do a StrCat. Say we
// have 50 functions on the stack that all have a StrCat in them that has
// upwards of 10 operands. In the DFG this would mean that each one gets
// some random virtual register, and then to do the StrCat we'd need a second
// span of 10 operands just to have somewhere to copy the 10 operands to, where
// they'd be contiguous and we could easily tell the C code how to find them.
// Ugly! So instead we use the scratchBuffer infrastructure in JSGlobalData. That
// way, those 50 functions will share the same scratchBuffer for offloading their
// StrCat operands. It's about as good as we can do, unless we start doing
// virtual register coalescing to ensure that operands to StrCat get spilled
// in exactly the place where StrCat wants them, or else have the StrCat
// refer to those operands' SetLocal instructions to force them to spill in
// the right place. Basically, any way you cut it, the current approach
// probably has the best balance of performance and sensibility in the sense
// that it does not increase the complexity of the DFG JIT just to make StrCat
// fast and pretty.
EncodedJSValue* buffer = static_cast<EncodedJSValue*>(m_jit.globalData()->scratchBufferForSize(sizeof(EncodedJSValue) * node.numChildren()));
for (unsigned operandIdx = 0; operandIdx < node.numChildren(); ++operandIdx) {
JSValueOperand operand(this, m_jit.graph().m_varArgChildren[node.firstChild() + operandIdx]);
GPRReg opGPR = operand.gpr();
operand.use();
m_jit.storePtr(opGPR, buffer + operandIdx);
}
flushRegisters();
GPRResult result(this);
callOperation(operationStrCat, result.gpr(), buffer, node.numChildren());
jsValueResult(result.gpr(), m_compileIndex, UseChildrenCalledExplicitly);
break;
}
case ConvertThis: {
SpeculateCellOperand thisValue(this, node.child1());
......
......@@ -186,7 +186,7 @@ JSGlobalData::JSGlobalData(GlobalDataType globalDataType, ThreadStackType thread
, interpreter(0)
, heap(this, heapSize)
#if ENABLE(DFG_JIT)
, sizeOfLastOSRScratchBuffer(0)
, sizeOfLastScratchBuffer(0)
#endif
, dynamicGlobalObject(0)
, cachedUTCOffset(std::numeric_limits<double>::quiet_NaN())
......@@ -352,8 +352,8 @@ JSGlobalData::~JSGlobalData()
#endif
#if ENABLE(DFG_JIT)
for (unsigned i = 0; i < osrScratchBuffers.size(); ++i)
fastFree(osrScratchBuffers[i]);
for (unsigned i = 0; i < scratchBuffers.size(); ++i)
fastFree(scratchBuffers[i]);
#endif
}
......
......@@ -233,25 +233,25 @@ namespace JSC {
int64_t debugDataBuffer[64];
#endif
#if ENABLE(DFG_JIT)
Vector<void*> osrScratchBuffers;
size_t sizeOfLastOSRScratchBuffer;
Vector<void*> scratchBuffers;
size_t sizeOfLastScratchBuffer;
void* osrScratchBufferForSize(size_t size)
void* scratchBufferForSize(size_t size)
{
if (!size)
return 0;
if (size > sizeOfLastOSRScratchBuffer) {
if (size > sizeOfLastScratchBuffer) {
// Protect against a N^2 memory usage pathology by ensuring
// that at worst, we get a geometric series, meaning that the
// total memory usage is somewhere around
// max(scratch buffer size) * 4.
sizeOfLastOSRScratchBuffer = size * 2;
sizeOfLastScratchBuffer = size * 2;
osrScratchBuffers.append(fastMalloc(sizeOfLastOSRScratchBuffer));
scratchBuffers.append(fastMalloc(sizeOfLastScratchBuffer));
}
return osrScratchBuffers.last();
return scratchBuffers.last();
}
#endif
#endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment