Commit 9fe2f6a6 authored by jcivelli@chromium.org's avatar jcivelli@chromium.org

2011-06-27 Jay Civelli <jcivelli@chromium.org>

        Reviewed by Darin Fisher.

        Adding binary part support to MHTML.
        https://bugs.webkit.org/show_bug.cgi?id=63310

        * mhtml/multi_frames_binary.mht: Added.
        * platform/chromium/mhtml/multi_frames_binary-expected.txt: Added.
2011-06-27  Jay Civelli  <jcivelli@chromium.org>

        Reviewed by Darin Fisher.

        Adding binary part support to MHTML.
        https://bugs.webkit.org/show_bug.cgi?id=63310

        * loader/archive/mhtml/MHTMLArchive.cpp:
        (WebCore::MHTMLArchive::generateMHTMLData):
        (WebCore::MHTMLArchive::generateMHTMLDataUsingBinaryEncoding):
        * loader/archive/mhtml/MHTMLArchive.h:
        * loader/archive/mhtml/MHTMLParser.cpp:
        (WebCore::MHTMLParser::parseNextPart):
        * platform/SharedBuffer.cpp:
        (WebCore::SharedBuffer::append):
        * platform/SharedBuffer.h:
        * platform/network/MIMEHeader.cpp:
        (WebCore::MIMEHeader::parseContentTransferEncoding):
        * platform/network/MIMEHeader.h:
2011-06-27  Jay Civelli  <jcivelli@chromium.org>

        Reviewed by Darin Fisher.

        Adding binary part support to MHTML.
        https://bugs.webkit.org/show_bug.cgi?id=63310

        * public/WebPageSerializer.h:
        * src/WebPageSerializer.cpp:
        (WebKit::WebPageSerializer::serializeToMHTML):
        (WebKit::WebPageSerializer::serializeToMHTMLUsingBinaryEncoding):

git-svn-id: http://svn.webkit.org/repository/webkit/trunk@89869 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent 9ac7ad2f
2011-06-27 Jay Civelli <jcivelli@chromium.org>
Reviewed by Darin Fisher.
Adding binary part support to MHTML.
https://bugs.webkit.org/show_bug.cgi?id=63310
* mhtml/multi_frames_binary.mht: Added.
* platform/chromium/mhtml/multi_frames_binary-expected.txt: Added.
2011-06-27 Levi Weintraub <leviw@chromium.org>
Reviewed by Eric Seidel.
......
This page contains several frames.
And a red square:
--------
Frame: '<!--framePath //<!--frame0-->-->'
--------
The first frame!
--------
Frame: '<!--framePath //<!--frame1-->-->'
--------
The second frame!
--------
Frame: '<!--framePath //<!--frame2-->-->'
--------
The third frame!
This one contains yet another frame. What a twist!
--------
Frame: '<!--framePath //<!--frame2-->/<!--frame0-->-->'
--------
This is frame 4!
2011-06-27 Jay Civelli <jcivelli@chromium.org>
Reviewed by Darin Fisher.
Adding binary part support to MHTML.
https://bugs.webkit.org/show_bug.cgi?id=63310
* loader/archive/mhtml/MHTMLArchive.cpp:
(WebCore::MHTMLArchive::generateMHTMLData):
(WebCore::MHTMLArchive::generateMHTMLDataUsingBinaryEncoding):
* loader/archive/mhtml/MHTMLArchive.h:
* loader/archive/mhtml/MHTMLParser.cpp:
(WebCore::MHTMLParser::parseNextPart):
* platform/SharedBuffer.cpp:
(WebCore::SharedBuffer::append):
* platform/SharedBuffer.h:
* platform/network/MIMEHeader.cpp:
(WebCore::MIMEHeader::parseContentTransferEncoding):
* platform/network/MIMEHeader.h:
2011-06-27 Levi Weintraub <leviw@chromium.org>
Reviewed by Eric Seidel.
......
......@@ -57,6 +57,7 @@ namespace WebCore {
const char* const quotedPrintable = "quoted-printable";
const char* const base64 = "base64";
const char* const binary = "binary";
static String generateRandomBoundary()
{
......@@ -123,6 +124,16 @@ PassRefPtr<MHTMLArchive> MHTMLArchive::create(const KURL& url, SharedBuffer* dat
}
PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page)
{
return generateMHTMLData(page, false);
}
PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLDataUsingBinaryEncoding(Page* page)
{
return generateMHTMLData(page, true);
}
PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page, bool useBinaryEncoding)
{
Vector<PageSerializer::Resource> resources;
PageSerializer pageSerializer(&resources);
......@@ -167,7 +178,7 @@ PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page)
stringBuilder.append("Content-Type: ");
stringBuilder.append(resource.mimeType);
const char* contentEncoding = MIMETypeRegistry::isSupportedJavaScriptMIMEType(resource.mimeType) || MIMETypeRegistry::isSupportedNonImageMIMEType(resource.mimeType) ? quotedPrintable : base64;
const char* contentEncoding = useBinaryEncoding ? binary : base64;
stringBuilder.append("\r\nContent-Transfer-Encoding: ");
stringBuilder.append(contentEncoding);
stringBuilder.append("\r\nContent-Location: ");
......@@ -177,27 +188,36 @@ PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page)
asciiString = stringBuilder.toString().utf8();
mhtmlData->append(asciiString.data(), asciiString.length());
// FIXME: ideally we would encode the content as a stream without having to fetch it all.
const char* data = resource.data->data();
size_t dataLength = resource.data->size();
Vector<char> encodedData;
if (!strcmp(contentEncoding, quotedPrintable)) {
quotedPrintableEncode(data, dataLength, encodedData);
mhtmlData->append(encodedData.data(), encodedData.size());
mhtmlData->append("\r\n", 2);
if (!strcmp(contentEncoding, binary)) {
const char* data;
size_t position = 0;
while (size_t length = resource.data->getSomeData(data, position)) {
mhtmlData->append(data, length);
position += length;
}
} else {
ASSERT(!strcmp(contentEncoding, base64));
// We are not specifying insertLFs = true below as it would cut the lines with LFs and MHTML requires CRLFs.
base64Encode(data, dataLength, encodedData);
const size_t maximumLineLength = 76;
size_t index = 0;
size_t encodedDataLength = encodedData.size();
do {
size_t lineLength = std::min(encodedDataLength - index, maximumLineLength);
mhtmlData->append(encodedData.data() + index, lineLength);
// FIXME: ideally we would encode the content as a stream without having to fetch it all.
const char* data = resource.data->data();
size_t dataLength = resource.data->size();
Vector<char> encodedData;
if (!strcmp(contentEncoding, quotedPrintable)) {
quotedPrintableEncode(data, dataLength, encodedData);
mhtmlData->append(encodedData.data(), encodedData.size());
mhtmlData->append("\r\n", 2);
index += maximumLineLength;
} while (index < encodedDataLength);
} else {
ASSERT(!strcmp(contentEncoding, base64));
// We are not specifying insertLFs = true below as it would cut the lines with LFs and MHTML requires CRLFs.
base64Encode(data, dataLength, encodedData);
const size_t maximumLineLength = 76;
size_t index = 0;
size_t encodedDataLength = encodedData.size();
do {
size_t lineLength = std::min(encodedDataLength - index, maximumLineLength);
mhtmlData->append(encodedData.data() + index, lineLength);
mhtmlData->append("\r\n", 2);
index += maximumLineLength;
} while (index < encodedDataLength);
}
}
}
......
......@@ -49,8 +49,12 @@ public:
static PassRefPtr<MHTMLArchive> create(const KURL&, SharedBuffer*);
static PassRefPtr<SharedBuffer> generateMHTMLData(Page*);
// Binary encoding results in smaller MHTML files but they might not work in other browsers.
static PassRefPtr<SharedBuffer> generateMHTMLDataUsingBinaryEncoding(Page*);
private:
static PassRefPtr<SharedBuffer> generateMHTMLData(Page*, bool useBinaryEncoding);
friend class MHTMLParser;
MHTMLArchive();
};
......
......@@ -146,18 +146,48 @@ PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHea
RefPtr<SharedBuffer> content = SharedBuffer::create();
const bool checkBoundary = !endOfPartBoundary.isEmpty();
bool endOfPartReached = false;
String line;
while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
if (checkBoundary && (line == endOfPartBoundary || line == endOfDocumentBoundary)) {
endOfArchiveReached = (line == endOfDocumentBoundary);
endOfPartReached = true;
break;
if (mimeHeader.contentTransferEncoding() == MIMEHeader::Binary) {
if (!checkBoundary) {
LOG_ERROR("Binary contents requires end of part");
return 0;
}
// Note that we use line.utf8() and not line.ascii() as ascii turns special characters (such as tab, line-feed...) into '?'.
content->append(line.utf8().data(), line.length());
if (mimeHeader.contentTransferEncoding() == MIMEHeader::QuotedPrintable) {
// The line reader removes the \r\n, but we need them for the content in this case as the QuotedPrintable decoder expects CR-LF terminated lines.
content->append("\r\n", 2);
m_lineReader.setSeparator(endOfPartBoundary.utf8().data());
Vector<char> part;
if (!m_lineReader.nextChunk(part)) {
LOG_ERROR("Binary contents requires end of part");
return 0;
}
content->append(part);
m_lineReader.setSeparator("\r\n");
Vector<char> nextChars;
if (m_lineReader.peek(nextChars, 2) != 2) {
LOG_ERROR("Invalid seperator.");
return 0;
}
endOfPartReached = true;
ASSERT(nextChars.size() == 2);
endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-');
if (!endOfArchiveReached) {
String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback();
if (!line.isEmpty()) {
LOG_ERROR("No CRLF at end of binary section.");
return 0;
}
}
} else {
String line;
while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
endOfArchiveReached = (line == endOfDocumentBoundary);
if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReached)) {
endOfPartReached = true;
break;
}
// Note that we use line.utf8() and not line.ascii() as ascii turns special characters (such as tab, line-feed...) into '?'.
content->append(line.utf8().data(), line.length());
if (mimeHeader.contentTransferEncoding() == MIMEHeader::QuotedPrintable) {
// The line reader removes the \r\n, but we need them for the content in this case as the QuotedPrintable decoder expects CR-LF terminated lines.
content->append("\r\n", 2);
}
}
}
if (!endOfPartReached && checkBoundary) {
......@@ -177,6 +207,7 @@ PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHea
quotedPrintableDecode(content->data(), content->size(), data);
break;
case MIMEHeader::SevenBit:
case MIMEHeader::Binary:
data.append(content->data(), content->size());
break;
default:
......
......@@ -165,6 +165,11 @@ void SharedBuffer::append(const char* data, unsigned length)
}
}
void SharedBuffer::append(const Vector<char>& data)
{
append(data.data(), data.size());
}
void SharedBuffer::clear()
{
clearPlatformData();
......
......@@ -86,6 +86,8 @@ public:
void append(SharedBuffer*);
void append(const char*, unsigned);
void append(const Vector<char>&);
void clear();
const char* platformData() const;
unsigned platformDataSize() const;
......
......@@ -123,6 +123,8 @@ MIMEHeader::Encoding MIMEHeader::parseContentTransferEncoding(const String& text
return QuotedPrintable;
if (encoding == "7bit")
return SevenBit;
if (encoding == "binary")
return Binary;
LOG_ERROR("Unknown encoding '%s' found in MIME header.", text.ascii().data());
return Unknown;
}
......
......@@ -46,6 +46,7 @@ public:
QuotedPrintable,
Base64,
SevenBit,
Binary,
Unknown
};
......
2011-06-27 Jay Civelli <jcivelli@chromium.org>
Reviewed by Darin Fisher.
Adding binary part support to MHTML.
https://bugs.webkit.org/show_bug.cgi?id=63310
* public/WebPageSerializer.h:
* src/WebPageSerializer.cpp:
(WebKit::WebPageSerializer::serializeToMHTML):
(WebKit::WebPageSerializer::serializeToMHTMLUsingBinaryEncoding):
2011-05-17 Nat Duca <nduca@chromium.org>
Reviewed by James Robinson.
......
......@@ -61,6 +61,10 @@ public:
// Serializes the WebView contents to a MHTML representation.
WEBKIT_API static WebCString serializeToMHTML(WebView*);
// Similar to serializeToMHTML but uses binary encoding for the MHTML parts.
// This results in a smaller MHTML file but it might not be supported by other browsers.
WEBKIT_API static WebCString serializeToMHTMLUsingBinaryEncoding(WebView*);
// IMPORTANT:
// The API below is an older implementation of a pageserialization that
// will be removed soon.
......
......@@ -208,6 +208,13 @@ WebCString WebPageSerializer::serializeToMHTML(WebView* view)
return WebCString(mhtml->data(), mhtml->size());
}
WebCString WebPageSerializer::serializeToMHTMLUsingBinaryEncoding(WebView* view)
{
RefPtr<SharedBuffer> mhtml = MHTMLArchive::generateMHTMLDataUsingBinaryEncoding(static_cast<WebViewImpl*>(view)->page());
// FIXME: we are copying all the data here. Idealy we would have a WebSharedData().
return WebCString(mhtml->data(), mhtml->size());
}
bool WebPageSerializer::serialize(WebFrame* frame,
bool recursive,
WebPageSerializerClient* client,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment