# ametadata.py - microcms plugin for collecting metadata from HTML pages # Copyright (c) 2009 Niall McCarroll # Distributed under the MIT/X11 License (http://www.mccarroll.net/snippets/license.txt) import sys import os import os.path import StringIO from xml.dom import minidom from datetime import datetime from time import strptime def consume(cfile,contents): metadata = parsemeta(contents) if metadata != None: cfile.metadata = metadata cfile.metadata["path"] = cfile.outpath def parsemeta(contents): f = StringIO.StringIO(contents) dom = minidom.parse(f) meta = {} for node in dom.getElementsByTagName("title"): meta['microcms-title'] = node.firstChild.data meta['title'] = node.firstChild.data for node in dom.getElementsByTagName("meta"): metaname = node.getAttribute("name") if metaname == 'description': meta['microcms-description'] = node.getAttribute("content") if metaname.startswith("microcms"): meta[metaname] = node.getAttribute("content") if metaname == "microcms-tags": meta[metaname] = meta[metaname].split(",") elif metaname == 'microcms-date': meta[metaname] = datetime(*strptime(meta[metaname], "%Y-%m-%dT%H:%M:%S")[0:6]) return meta