引流用的电影网站怎么做,wordpress浏览速度,文字图片设计制作在线,网站后台模板论坛动手写的Python的HTML语法分析器#xff08;面向对象#xff09; 主要包括4个文件#xff0c;util.py文件主要负责截取每个块。 rules.py文件定义两个类#xff0c;超类Rule和分别对应的子类#xff0c;子类定义了不同的划分块的要求,子类包换action函数#xff0c;调用h… 动手写的Python的HTML语法分析器面向对象 主要包括4个文件util.py文件主要负责截取每个块。 rules.py文件定义两个类超类Rule和分别对应的子类子类定义了不同的划分块的要求,子类包换action函数调用handler处理 handlers.py定义了处理类超类定义了方法子类通过名字调用 markup.py定义了超类parser定义了子类basicTextParser,超类主要负责创造过滤器添加规则对每个块执行处理。 #handler.py# -*- coding: cp936 -*-class Handler: 处理从parser调用的方法对象 这个解析器会在每个块的开始部分调用start和end()方法使用合适的块名作为参数。sub()会用于 正则表达式替换中当使用了emphasis这样的名字调用时它会返回核实的替换函数def callback(self,prefix,name , *args): method getattr(self , prefix name , None)if callable(method):return method(*args)def start(self ,name): self.callback(start_,name)def end(self,name): self.callback(end_ ,name)def sub(self,name):def substitution(match): result self.callback(sub_, name,match)if result is None: result match.group(1)return resultreturn substitutionclass HTMLRenderer(Handler): 用于生成HTML的具体处理程序 类中所有的方法都可以通过超类处理程序的START、end()、sub()方法来访问他们实现了HTML的基本标签def start_document(self):print htmlheadtitle.../title/headbodydef end_document(self):print /body/htmldef start_paragraph(self):print pdef end_paragraph(self):print /pdef start_heading(self):print h2def end_heading(self):print h2def start_list(self):print uldef end_list(self):print /uldef start_listitem(self):print lidef end_listitem(self):print /lidef start_title(self):print h1def end_title(self):print /h1def sub_emphasis(self,match):return em%sem % match.group(1)def sub_url(self ,match):return a href%s%s/a % (match.group(1),match.group(1))def sub_mail(self,match):return a hrefmailto:%s%s/a % (match.group(1),match.group(1))def feed(self,data):print data # -*- coding: cp936 -*-#rules.pyclass Rule:所有规则的基类def action(self,block ,handler): handler.start(self.type) handler.feed(block) handler.end(self.type)return Trueclass HeadingRule(Rule): 标题占一行且标题的数目不大于70个字符且最后不能以冒号结尾 type headingdef condition(self,block):return not \n in block and len(block)70 and not block[-1]:class TitleRule(HeadingRule): 题目是文档的第一个块前提他是大标题 type title first Truedef condition(self,block):if not self.first:return False self.first Falsereturn HeadingRule.condition(self, block)class ListItemRule(Rule): type listitemdef condition(self,block):return block[0]-def action(self,block,handler): handler.start(self.type) handler.feed(block[1:].strip()) handler.end(self.type)return Trueclass ListRule(ListItemRule): type list inside Falsedef condition(self,block):return Truedef action(self,block,handler):if not self.inside and ListItemRule.condition(self, block): handler.start(self.type) self.insideTrueelif self.inside and not ListItemRule.condition(self, block): handler.end(self.type) self.insideFalsereturn Falseclass ParagraphRule(Rule): type paragraphdef condition(self,block):return True #util.pydef lines(file):for line in file:yield lineyield \ndef blocks(file): block []for line in lines(file):if line.strip(): block.append(line)elif block:yield .join(block).strip() block[] #markup.pyimport sys , re from handlers import *from util import *from rules import *class Parser: the processer of this , read data ,then use rule , and control to process data block#initial def __init__(self,handler): self.handler handler self.rules [] self.filters []#addrule to the Parser def addRule(self,rule): self.rules.append(rule)#add filter to the Parser def addFilters(self,patten,name):def filter(block , handler):return re.sub(patten,handler.sub(name),block) self.filters.append(filter)def parse(self,file): self.handler.start(document)for block in blocks(file):for filter in self.filters: block filter(block , self.handler)for rule in self.rules:if rule.condition(block): last rule.action(block,self.handler)if last:break self.handler.end(document)class BasicTextParser(Parser):def __init__(self,hanler): Parser.__init__(self,handler) self.addRule(ListRule()) self.addRule(ListItemRule()) self.addRule(TitleRule()) self.addRule(HeadingRule()) self.addRule(ParagraphRule()) self.addFilters(r\*(.?)\*, emphasis) self.addFilters(r(http://[\.a-zA-z/]), url) self.addFilters(r([\.a-zA-Z/][\.a-zA-z][a-zA-Z]), mail)handler HTMLRenderer()parser BasicTextParser(handler)f open(rD://python27/input.txt)parser.parse(f) posted on 2012-03-06 15:47 lzhenf 阅读(...) 评论(...) 编辑 收藏 转载于:https://www.cnblogs.com/lzhenf/archive/2012/03/06/2382056.html