`
canofy
  • 浏览: 820977 次
  • 性别: Icon_minigender_1
  • 来自: 北京、四川
社区版块
存档分类
最新评论

xml的解析例子

阅读更多
xml解析的一个例子,如下所示:
#!/usr/bin/env python
#-*-coding:utf-8-*-
import xml.sax.handler
class TestHander(xml.sax.handler.ContentHandler):
    def __init__(self):
        self.mapping={}
        self.content=""
        
    #标签的开始    
    def startElement(self, name, attributes):
        self.buffer=""
        #if name=="link":
        #    self.mapping[self.buffer.encode("utf-8")]=""
        
    #数据内容        
    def characters(self, data):
        self.buffer+=data+"\n"
    
    #标签的结束 ,在这里可以做数据的处理等   
    def endElement(self, name):
        ss=self.buffer        
        if name=="title":
            self.content+=deleteCharacter(u"标题:"+ss);
            #print self.buffer.encode("utf-8")
        if name=="link":
            self.content+=deleteCharacter(u"链接地址:"+ss);
            #print self.buffer.encode("utf-8")
        if name=="description":
            self.content+=deleteCharacter(u"内容:"+ss);
            #print self.buffer.encode("utf-8")
        if name=="pubDate":
            self.content+=deleteCharacter(u"发布时间:"+ss);
            #print self.buffer.encode("utf-8")    
    
def deleteCharacter(string):
    ret=""
    #sub=("&nbsp","","nbsp;","","<br />","\n","<br/>","\n","<p>","","</p>","")
    ret=string.encode("utf-8")
    ret=ret.replace("&nbsp;","")
    ret=ret.replace("nbsp;","")
    ret=ret.replace("<br />","\n")
    ret=ret.replace("<br/>","\n")
    ret=ret.replace("<p>","")
    ret=ret.replace("</p>","")
    return ret

if __name__=="__main__":
    import xml.sax.handler
    parser=xml.sax.make_parser()
    hander=TestHander()
    parser.setContentHandler(hander)
    parser.parse("http://blog.sina.com.cn/rss/soundfragment.xml")
    #parser.parse("c:\\rss.xml")
    #print hander.content
    #写入文件
    file_path="c:\\wt.txt";
    infile=file(file_path,"w")
    infile.write(hander.content)
    infile.close()
    #parser.parse(unicode(file('c:\\rss.xml', 'r', 'utf-8').read(),'utf-8').encode('utf-8'))
    
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics