` Code Snippets - DHTML Snapshot

Snapshot

Rich Internet Applications (RIAs) offer a highly interactive user experience, but I sometimes wish it would be possible to save a self-contained snapshot of the page state, that I could keep for reference or e-mail as an attachment.

Snapshot is a code snippet which demonstrates a very simple, client-side method for creating a copy of a page with the following changes.

The embedding of png or gif images is achieved using data URIs (wiki page). This useful feature is supported by Opera and Firefox, but apparently only in Internet Explorer 8.

To see how snapshot works, open this simple example page. In this page, clicking on the coloured block toggles the colour. Clicking on the "Take snapshot" link at the bottom of the page calls snapshot.js to create a snapshot of the page, which is then base 64 encoded and embedded into an "Open snapshot" link using a data URI (the base64 encoding is handled using Masanao Izumo's handy script base64encode.js). You can click on this link and view the page source to compare the HTML in the copied page and that of the original.

Snapshot does not require any server-side support except that a copy of png and gif images in base64 encoded format needs to be created with the original suffix replaced by .txt (this is trivial to do in python, see data2base64.py). It is possible to do base64 encoding in javascript, however the difficulty lies in transfering the binary contents of the image files to the client.

Caveat: Data URIs are not supported by some browsers (notably internet explorer 6 and 7). Other browsers do place a restriction on the overall length of data URIs. Both of these factors do limit the usefulness of this approach.

snapshot.js
// snapshot.js - take a snapshot of a dynamic web site
// Copyright (c) 2008 Niall McCarroll  
// Distributed under the MIT/X11 License (http://www.mccarroll.net/snippets/license.txt)
//
// Simple javascript utility which demonstrates a way
// to extract a static snapshot of the current page
//
// * all javascript is removed
// * external stylesheets are embedded
// * png and gif image files are embedded as data URIs  
//
// main entrypoint is the snapshot function (see end of this file)

// fetch_url utility function
// 
// use XHR to synchronously fetch an object from the server
// identified by url
//
// returns NULL if the object could not be fetched

function fetch_url(url) {

    var req = null;
    try { req = new ActiveXObject("Msxml2.XMLHTTP"); } catch(e) {}
    if (req == null) {
        try { req = new ActiveXObject("Microsoft.XMLHTTP"); } catch(e) {}
    }
    if (req == null) {
        try { req = new XMLHttpRequest(); } catch(e) {}
    }
    if (req == null) {
        return null;
    }

    try {
        req.open("get",url,false);
        req.send("");
        return req.responseText;
    } catch(e) {
        return null;
    }
}

// TagHandler is a class for transforming HTML tags 
function TagHandler() {
    return this;
}

// Utility function - is a URL relative or absolute
TagHandler.prototype.is_local_url = function(url) {
    // simplistic check
    return (url.slice(0,4) != "http");
}

// remove scripts from the document	
TagHandler.prototype.transform_script = function(parsedTag) {
    return null;
}

// replace stylesheet links with embedded stylesheets
TagHandler.prototype.transform_link = function(parsedTag) {
    var href = parsedTag.attrs['href'];
    var rel = parsedTag.attrs['rel'];
    var type = parsedTag.attrs['type'];
    if (type && href && rel == "stylesheet" && this.is_local_url(href)) {
        var contents = fetch_url(href);
        if (contents) {
            return { 
                'tagName':'style', 
                'attrs': { 'type':type }, 
                'children': [document.createTextNode(contents)] 
                };
        }
    }
    return parsedTag;
}

// replace gif or ping image files with embedded data URIs
TagHandler.prototype.transform_img = function(parsedTag) {
    var src = parsedTag.attrs['src'];
    if (src && this.is_local_url(src)) {
        var index = src.lastIndexOf('.');
        if (index != -1) {            
            var suffix = src.slice(index+1);
            if (suffix == "png" || suffix == "gif") {          
                // try to fetch the base-64 encoded version of the image file
                // by swapping the suffix with .txt      
                src = src.slice(0,src.lastIndexOf('.'))+".txt";
                var b64contents = fetch_url(src);
                if (b64contents) {
                    // success - manipulate the src tag to embed the data URI
                    src = "data:image/"+suffix+";base64,"+b64contents;
                    parsedTag.attrs['src'] = src;
                }
            }
        }
    }
    return parsedTag;
}

// some general (default) transforms which apply to all tags
// unless they belong to class "snapshot-include"
TagHandler.prototype.general_transform = function(parsedTag) {
    // remove all javascript handlers beginning with "on"
    var attrname;
    for(attrname in parsedTag.attrs) {
        if (attrname.slice(0,2) == "on") {
            delete parsedTag.attrs[attrname];
        }
    }
    return parsedTag;
}

// TagHandler.parse is the main entry point
// use it to parse an element and return either:
//  a result object with attributes tagName,attrs,children
//  or null (meaning, ignore this element)
TagHandler.prototype.parse = function(ele) {
    var tagName = ele.tagName.toLowerCase();
    var attrs = {};
    var children = [];
    var i;
    for ( i = 0; i < ele.attributes.length; i++ ) {
        var a = ele.attributes[i];
        if (a.name != undefined && a.value != undefined) {
            attrs[a.name] = a.value;
        }
    }
    for ( i = 0; i < ele.childNodes.length; i++ ) {    
        var n = ele.childNodes[i];
        if ((n instanceof Element) || (n instanceof Text)) {
            children.push(n);
        } 
    }
    // collect the results
    var parsedTag = { 'tagName':tagName, 'attrs':attrs, 'children':children };

    // check to see if the tag is in the snapshot-include or snapshot-exclude class    
    if ('class' in attrs) {
        var element_class = attrs["class"];
        if (element_class.indexOf("snapshot-exclude") != -1) {
            return null;    // ignore this tag
        }
        if (element_class.indexOf("snapshot-include") != -1) {
            return parsedTag;   // include this tag, without transformations
        }
    }

    var transformerFN = "transform_"+tagName;
    // apply any tag specific transforms
    if (transformerFN in this) {
        parsedTag = this[transformerFN](parsedTag);
    }

    // apply general transforms
    if (parsedTag) {
        parsedTag = this.general_transform(parsedTag);
    }
    return parsedTag;
}
		
// DocHandler is a class which serializes a Document object to text
// applying transformations
function DocHandler(doc) {
    this.doc = doc;
    this.tagHandler = new TagHandler();
    return this;
}

// DocHandler.toString is the main entry point
DocHandler.prototype.toString = function() {
    return this.element2String(this.doc.documentElement);
}

// Serialize an attribute to text
DocHandler.prototype.attr2String = function(aname,avalue) {
    return aname + "=" + '"' + avalue + '"';
}

// Serialize an element (and its attributes and children) to text
DocHandler.prototype.element2String = function(ele) {
    var parsedTag = this.tagHandler.parse(ele);
    if (parsedTag == null) { return ""; }
	
    var i;
    var result = "<"+parsedTag.tagName;
    var content = "";
    var aname;
    for ( aname in parsedTag.attrs ) {
        result += " " + this.attr2String(aname,parsedTag.attrs[aname]);
    }
    // alert(result);
    for( i in parsedTag.children ) {
        var c = parsedTag.children[i];
        if (c instanceof Element) {
            content += this.element2String(c);
        }	
        else if (c instanceof Text) {
            content += c.data;
        }
    }
    result += ">"+content+"</"+parsedTag.tagName+">";
    return result;
}


function createLink(pagecontents) {
    
}

// Takes a snapshot of the current document and either 
// displays it in a new window (if openWindow parameter is true)
// or returns it as a string otherwise
function snapshot() {
    var doc = new DocHandler(window.document);
    var pagecontents = doc.toString();
    var b64link = "data:text/html;base64,"+base64encode(pagecontents);
    var dl = document.getElementById("download_snap");
    dl.href = b64link;
    dl.style.display = 'inline';
}

 

Leave a comment

Anti-Spam Check
Comment