#!/usr/bin/env python import md5 import sgmllib def escape(data, escape_quote = False): data = data.replace("&", "&") data = data.replace("<", "<") if escape_quote: data = data.replace('"', """) return data class Parser(sgmllib.SGMLParser): from htmlentitydefs import entitydefs def __init__(self, fp, verbose = False): sgmllib.SGMLParser.__init__(self, verbose) self.fp = fp def reset(self): sgmllib.SGMLParser.reset(self) self.hashes = md5.new(), md5.new() self.passthrough = False self.row = None def feed(self, data): sgmllib.SGMLParser.feed(self, data) self.hashes[0].update(data) def write(self, data): self.fp.write(data) self.hashes[1].update(data) def digests(self): return [hash.hexdigest() for hash in self.hashes] # handle passthrough in generic overrides def handle_starttag(self, tag, method, attrs): sgmllib.SGMLParser.handle_starttag(self, tag, method, attrs) if self.passthrough: self.__write_tag(tag, attrs) def unknown_starttag(self, tag, attrs): sgmllib.SGMLParser.unknown_starttag(self, tag, attrs) if self.passthrough: self.__write_tag(tag, attrs) def handle_endtag(self, tag, method): if self.passthrough: self.__write_tag("/" + tag) sgmllib.SGMLParser.handle_endtag(self, tag, method) def unknown_endtag(self, tag): if self.passthrough: self.__write_tag("/" + tag) sgmllib.SGMLParser.unknown_endtag(self, tag) def handle_data(self, data): if self.passthrough: self.write(data) def __write_tag(self, tag, attrs = ()): self.write("<%s%s>" % (tag, "".join( [' %s="%s"' % (name, escape(value, True)) for name, value in attrs]))) # handle everything else in tag-specific overrides def start_table(self, attrs): for name, value in attrs: if name == "summary": if value == "methods and the premissions they require": self.passthrough = True self.row = 0 break if self.passthrough: self.write("\n
\n ") def end_table(self): if self.passthrough: self.write("\n \n\n") self.passthrough = False def start_tr(self, attrs): if self.passthrough: if self.row == 29: self.passthrough = False self.row_tagged = False elif self.row == 29: self.passthrough = True def end_tr(self): if self.passthrough: self.row += 1 def start_th(self, attrs): if self.passthrough: if not self.row_tagged: self.write("