{"id":1041,"date":"2023-03-24T09:47:07","date_gmt":"2023-03-24T01:47:07","guid":{"rendered":""},"modified":"2023-03-24T09:47:07","modified_gmt":"2023-03-24T01:47:07","slug":"Python XML","status":"publish","type":"post","link":"https:\/\/bianchenghao6.com\/1041.html","title":{"rendered":"Python XML"},"content":{"rendered":"
\n
\u641e\u61c2Python\u64cd\u4f5c\u5e38\u7528\u7684\u7528\u6cd5<\/span>\n <\/div>\n 1\u3001\u5bf9\u5927\u578b\u6587\u4ef6\u8fdb\u884c\u5904\u7406\uff1b<\/span> characters(content)\uff1a<\/b>\u9047\u5230\u4e0b\u4e00\u4e2a\u6807\u7b7e\u4e4b\u524d\uff0ccontent \u7684\u503c\u4e3a\u8fd9\u4e9b\u5b57\u7b26\u4e32\u3002\u9047\u5230\u884c\u7ed3\u675f\u7b26\u4e4b\u524d\uff0c\u5b58\u5728\u5b57\u7b26\uff0ccontent \u7684\u503c\u4e3a\u8fd9\u4e9b\u5b57\u7b26\u4e32\u3002<\/span> parser_list - \u53ef\u9009\u53c2\u6570\uff0c\u89e3\u6790\u5668\u5217\u8868<\/span> <\/p>\n xmlfile - xml\u6587\u4ef6\u540d<\/span> xmlstring - xml\u5b57\u7b26\u4e32<\/span> <\/body>\u4ec0\u4e48\u662f XML\uff1f<\/h2>\n
\n
XML \u662f\u4e00\u5957\u5b9a\u4e49\u8bed\u4e49\u6807\u8bb0\u7684\u89c4\u5219\uff0c\u8fd9\u4e9b\u6807\u8bb0\u5c06\u6587\u6863\u5206\u6210\u8bb8\u591a\u90e8\u4ef6\u5e76\u5bf9\u8fd9\u4e9b\u90e8\u4ef6\u52a0\u4ee5\u6807\u8bc6\u3002
\n
\u5b83\u4e5f\u662f\u5143\u6807\u8bb0\u8bed\u8a00\uff0c\u5373\u5b9a\u4e49\u4e86\u7528\u4e8e\u5b9a\u4e49\u5176\u4ed6\u4e0e\u7279\u5b9a\u9886\u57df\u6709\u5173\u7684\u3001\u8bed\u4e49\u7684\u3001\u7ed3\u6784\u5316\u7684\u6807\u8bb0\u8bed\u8a00\u7684\u53e5\u6cd5\u8bed\u8a00\u3002\n <\/div>\nPython\u5bf9XML \u7684\u89e3\u6790<\/h2>\n
\n
Python \u6709\u4e09\u79cd\u65b9\u6cd5\u89e3\u6790 XML\uff0cSAX\uff0cDOM\uff0c\u4ee5\u53ca ElementTree:\n <\/div>\n1.SAX (simple API for XML )<\/h3>\n
2.DOM(Document Object Model)<\/h3>\n
Python \u4f7f\u7528 SAX \u89e3\u6790 xml<\/h3>\n
\n
\u5229\u7528 SAX \u89e3\u6790 XML \u6587\u6863\u7275\u6d89\u5230\u4e24\u4e2a\u90e8\u5206: \u89e3\u6790\u5668\u548c\u4e8b\u4ef6\u5904\u7406\u5668\u3002
\n
\u89e3\u6790\u5668\u8d1f\u8d23\u8bfb\u53d6 XML \u6587\u6863\uff0c\u5e76\u5411\u4e8b\u4ef6\u5904\u7406\u5668\u53d1\u9001\u4e8b\u4ef6\uff0c\u5982\u5143\u7d20\u5f00\u59cb\u8ddf\u5143\u7d20\u7ed3\u675f\u4e8b\u4ef6\u3002
\n
\u800c\u4e8b\u4ef6\u5904\u7406\u5668\u5219\u8d1f\u8d23\u5bf9\u4e8b\u4ef6\u4f5c\u51fa\u54cd\u5e94\uff0c\u5bf9\u4f20\u9012\u7684 XML \u6570\u636e\u8fdb\u884c\u5904\u7406\u3002\n <\/div>\n
\n 2\u3001\u53ea\u9700\u8981\u6587\u4ef6\u7684\u90e8\u5206\u5185\u5bb9\uff0c\u6216\u8005\u53ea\u9700\u4ece\u6587\u4ef6\u4e2d\u5f97\u5230\u7279\u5b9a\u4fe1\u606f\u3002<\/span>
\n 3\u3001\u60f3\u5efa\u7acb\u81ea\u5df1\u7684\u5bf9\u8c61\u6a21\u578b\u7684\u65f6\u5019\u3002<\/span>
\n \u5728 Python \u4e2d\u4f7f\u7528 sax \u65b9\u5f0f\u5904\u7406 xml \u8981\u5148\u5f15\u5165 xml.sax \u4e2d\u7684 parse \u51fd\u6570\uff0c\u8fd8\u6709 xml.sax.handler \u4e2d\u7684 ContentHandler\u3002<\/span> <\/p>\nContentHandler \u7c7b\u65b9\u6cd5\u4ecb\u7ecd<\/h3>\n
\n startDocument()\uff1a<\/b>\u6587\u6863\u542f\u52a8\u7684\u65f6\u5019\u8c03\u7528\u3002<\/span>
\n endDocument()\uff1a<\/b>\u6587\u6863\u542f\u52a8\u7684\u65f6\u5019\u8c03\u7528\u3002<\/span>
\n startElement(name, attrs)\uff1a<\/b>\u9047\u5230XML\u5f00\u59cb\u6807\u7b7e\u65f6\u8c03\u7528\uff0cname \u662f\u6807\u7b7e\u7684\u540d\u5b57\uff0cattrs \u662f\u6807\u7b7e\u7684\u5c5e\u6027\u503c\u5b57\u5178\u3002<\/span>
\n endElement(name)\uff1a<\/b>\u9047\u5230XML\u7ed3\u675f\u6807\u7b7e\u65f6\u8c03\u7528\u3002<\/span> <\/p>\nmake_parser \u65b9\u6cd5<\/h3>\n
xml.sax.<\/span>make_parser( [parser_list] )<\/span>
<\/span><\/code><\/pre>\nparser \u65b9\u6cd5<\/h3>\n
xml.sax.<\/span>parse(xmlfile, contenthandler[, errorhandler])<\/span>
<\/span><\/code><\/pre>\n
\n contenthandler - \u5fc5\u987b\u662f\u4e00\u4e2a ContentHandler \u7684\u5bf9\u8c61<\/span>
\n errorhandler - \u5982\u679c\u6307\u5b9a\u8be5\u53c2\u6570\uff0cerrorhandler \u5fc5\u987b\u662f\u4e00\u4e2a SAX ErrorHandler \u5bf9\u8c61<\/span> <\/p>\nparseString \u65b9\u6cd5<\/h3>\n
xml.sax.<\/span>parseString(xmlstring, contenthandler[, errorhandler])<\/span>
<\/span><\/code><\/pre>\n
\n contenthandler - \u5fc5\u987b\u662f\u4e00\u4e2a ContentHandler \u7684\u5bf9\u8c61<\/span>
\n errorhandler - \u5982\u679c\u6307\u5b9a\u8be5\u53c2\u6570\uff0cerrorhandler \u5fc5\u987b\u662f\u4e00\u4e2a SAX ErrorHandler\u5bf9\u8c61<\/span> <\/p>\nPython\u89e3\u6790XML\u5b9e\u4f8b<\/h2>\n
<?xml version=\"1.0\" encoding=\"utf-8\"?><\/span>
<collection shelf=\"New Arrivals\"<\/span>><\/span>
<people title=\"Jack\"<\/span>><\/span>
<name><\/span>Jack<\/name><\/span>
<age><\/span>18<\/age><\/span>
<sex><\/span>Man<\/sex><\/span>
<address><\/span>China<\/address><\/span>
<\/people><\/span>
<people title=\"Tom\"<\/span>><\/span>
<name><\/span>Tom<\/name><\/span>
<age><\/span>25<\/age><\/span>
<sex><\/span>Woman<\/sex><\/span>
<address><\/span>American<\/address><\/span>
<\/people><\/span>
<\/collection><\/span> <\/span><\/code><\/pre>\n # -*- coding=utf-8 -*-
import xml.sax<\/span>
class <\/span>XmlHandler(xml.sax.ContentHandler):<\/span>
def<\/span> __init__(self):
self.CurrentData<\/span> = \"\"
self.name<\/span> = \"\"
self.age<\/span> = 0
self.sex<\/span> = \"\"
self.address<\/span> = \"\"
def<\/span> startElement(self, tag, attributes):
self.CurrentData<\/span> = tag
if tag<\/span> == \"people\":
print(\"Title:\", attributes[\"title\"]<\/span>)
def<\/span> endElement(self, tag):
if self.CurrentData<\/span> == \"name\":
print(\"name:\", self.name<\/span>)
elif self.CurrentData<\/span> == \"age\":
print(\"age:\", self.age<\/span>)
elif self.CurrentData<\/span> == \"sex\":
print(\"sex:\", self.sex<\/span>)
elif self.CurrentData<\/span> == \"address\":
print(\"address:\", self.address<\/span>)
self.<\/span>CurrentData = \"\"
def<\/span> characters(self, content):
if self.CurrentData<\/span> == \"name\":
self.name = content<\/span>
elif self.CurrentData<\/span> == \"age\":
self.age = content<\/span>
elif self.CurrentData<\/span> == \"sex\":
self.sex = content<\/span>
elif self.CurrentData<\/span> == \"address\":
self.address = content<\/span>
if<\/span> ( __name__ == \"__main__\"):
# \u521b\u5efa\u4e00\u4e2a XMLReader<\/span>
parser<\/span> = xml.sax.make_parser()
# \u5173\u95ed\u547d\u540d\u7a7a\u95f4<\/span>
parser.<\/span>setFeature(xml.sax.handler.feature_namespaces, 0)
# \u91cd\u5199 ContextHandler<\/span>
Handler<\/span> = XmlHandler()
parser.<\/span>setContentHandler(Handler)
parser.<\/span>parse(\"people.xml\")<\/span><\/code><\/pre>\n('Title:', u'Jack')
('name:', u'Jack')
('age:', u'18')
('sex:', u'Man')
('address:', u'China')
('Title:', u'Tom')
('name:', u'Tom')
('age:', u'25')
('sex:', u'Woman')
('address:', u'American')<\/span><\/code><\/pre>\n<\/p><\/div>\n\u4f7f\u7528xml.dom\u89e3\u6790xml<\/h2>\n
from<\/span> xml.dom.minidom import parse<\/span>
import<\/span> xml.dom.minidom<\/span>
# \u4f7f\u7528minidom\u89e3\u6790\u5668\u6253\u5f00 XML \u6587\u6863<\/span>
DOMTree = xml.dom.minidom.parse(\"people.xml\")<\/span>
collection = DOMTree.documentElement<\/span>
if collection.hasAttribute(\"shelf\"):<\/span>
print(\"Root element : %s\" % collection.getAttribute(\"shelf\")<\/span>)
# \u5728\u96c6\u5408\u4e2d\u83b7\u53d6\u6240\u6709\u7535\u5f71<\/span>
list = collection.getElementsByTagName(\"people\")<\/span>
# \u6253\u5370\u6bcf\u90e8\u7535\u5f71\u7684\u8be6\u7ec6\u4fe1\u606f<\/span>
for<\/span> people in list:
if people.hasAttribute(\"title\"):<\/span>
print(\"Title: %s\" % people.getAttribute(\"title\")<\/span>)
name = people.getElementsByTagName('name')[0]<\/span>
print(\"name: %s\" % name.childNodes[0].data<\/span>)
age = people.getElementsByTagName('age')[0]<\/span>
print(\"age: %s\" % age.childNodes[0].data<\/span>)
sex = people.getElementsByTagName('sex')[0]<\/span>
print(\"sex: %s\" % sex.childNodes[0].data<\/span>)
address = people.getElementsByTagName('address')[0]<\/span>
print(\"address: %s\" % address.childNodes[0].data<\/span>)
<\/span><\/code><\/pre>\nRoot element : New Arrivals
Title: Jack
name: Jack
age: 18
sex: Man
address: China
Title: Tom
name: Tom
age: 25
sex: Woman
address: American <\/span><\/code><\/pre>\n<\/p><\/div>\n
\n<\/html><\/p>\n","protected":false},"excerpt":{"rendered":"Python XMLzh-cn","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[126],"tags":[],"class_list":["post-1041","post","type-post","status-publish","format-standard","hentry","category-python3"],"_links":{"self":[{"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/posts\/1041"}],"collection":[{"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/comments?post=1041"}],"version-history":[{"count":0,"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/posts\/1041\/revisions"}],"wp:attachment":[{"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/media?parent=1041"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/categories?post=1041"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/tags?post=1041"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}