{"id":1560,"date":"2023-03-25T11:26:10","date_gmt":"2023-03-25T03:26:10","guid":{"rendered":""},"modified":"2023-03-25T11:26:10","modified_gmt":"2023-03-25T03:26:10","slug":"Python\u722c\u866bPyQuery","status":"publish","type":"post","link":"https:\/\/bianchenghao6.com\/1560.html","title":{"rendered":"Python\u722c\u866bPyQuery"},"content":{"rendered":"
\n
Python\u722c\u866bPyQuery\u8be6\u7ec6\u6559\u7a0b<\/span>\n <\/div>\n PyQuery \u662f Python \u4eff\u7167 jQuery \u7684\u4e25\u683c\u5b9e\u73b0\u3002\u8bed\u6cd5\u4e0e jQuery \u51e0\u4e4e\u5b8c\u5168\u76f8\u540c\uff0cpyquery \u53ef\u8ba9\u4f60\u7528 jQuery \u7684\u8bed\u6cd5\u6765\u5bf9 xml \u8fdb\u884c\u64cd\u4f5c\u3002\u8fd9I\u548c jQuery \u5341\u5206\u7c7b\u4f3c\u3002\u5982\u679c\u5229\u7528 lxml\uff0cpyquery \u5bf9 xml \u548c html \u7684\u5904\u7406\u5c06\u66f4\u5feb\u3002<\/p>\n pyquery\u73af\u5883\u642d\u5efa\u53ca\u914d\u7f6e\u8be6\u7ec6\u8bf4\u660e\u8bf7\u70b9\u51fb\uff1ahttps:\/\/www.lidihuo.com\/python\/python-spider-install-pyquery.html<\/p>\n \u5728\u8fd9\u91cc\u4ecb\u7ecd\u56db\u79cd\u521d\u59cb\u5316\u65b9\u5f0f\uff0c\u5177\u4f53\u5982\u4e0b\uff1a<\/p>\n \u8fd0\u884c\u7ed3\u679c<\/p>\n \u5728\u8fd9\u91cc\u6211\u4eec\u6ce8\u610f\u5230\u4e86\u4e00\u70b9\uff0cPyQuery \u521d\u59cb\u5316\u4e4b\u540e\uff0c\u8fd4\u56de\u7c7b\u578b\u662f PyQuery\uff0c\u5229\u7528\u4e86\u9009\u62e9\u5668\u7b5b\u9009\u4e00\u6b21\u4e4b\u540e\uff0c\u8fd4\u56de\u7ed3\u679c\u7684\u7c7b\u578b\u4f9d\u7136\u8fd8\u662f PyQuery\uff0c\u8fd9\u548c jQuery \u5982\u51fa\u4e00\u8f99\u3002<\/p>\n \u4f60\u53ef\u4ee5\u5b8c\u5168\u6309\u7167 jQuery \u7684\u8bed\u6cd5\u6765\u8fdb\u884c PyQuery \u7684\u64cd\u4f5c\u3002<\/p>\n \u8fd0\u884c\u7ed3\u679c<\/p>\n \u8fd0\u884c\u7ed3\u679c\uff1a<\/p>\n \u56e0\u6b64\u6267\u884c\u4e0a\u8ff0\u64cd\u4f5c\u4e4b\u540e\uff0cp \u672c\u8eab\u4e5f\u53d1\u751f\u4e86\u53d8\u5316\u3002<\/p>\n \u8fd0\u884c\u7ed3\u679c<\/p>\n \u904d\u5386\u7528\u5230 items \u65b9\u6cd5\u8fd4\u56de\u5bf9\u8c61\u5217\u8868\uff0c\u6216\u8005\u7528 lambda<\/p>\n <\/body>PyQuery<\/h2>\n
pyquery\u73af\u5883\u642d\u5efa\u53ca\u914d\u7f6e<\/h2>\n
\u521d\u59cb\u5316<\/h2>\n
# Filename : example.py<\/span>
# Copyright : 2020 By Lidihuo<\/span>
# Author by : www.lidihuo.com<\/span>
# Date : 2020-08-21<\/span>
# \u76f4\u63a5\u5b57\u7b26\u4e32\uff0cpq \u53c2\u6570\u53ef\u4ee5\u76f4\u63a5\u4f20\u5165 HTML \u4ee3\u7801\uff0cdoc \u73b0\u5728\u5c31\u76f8\u5f53\u4e8e jQuery \u91cc\u9762\u7684 $ \u7b26\u53f7\u4e86\u3002
<\/span> from <\/span>pyquery import <\/span>PyQuery as <\/span>pq
doc = pq(\" \"<\/span><\/span>)
# lxml.etree\uff0c\u53ef\u4ee5\u9996\u5148\u7528 lxml \u7684 etree \u5904\u7406\u4e00\u4e0b\u4ee3\u7801\u3002
<\/span> from <\/span>lxml import <\/span>etree
doc = pq(etree.fromstring<\/span>(\" \"))
# \u76f4\u63a5\u4f20URL\uff0c\u8fd9\u91cc\u5c31\u50cf\u76f4\u63a5\u8bf7\u6c42\u4e86\u4e00\u4e2a\u7f51\u9875\u4e00\u6837\uff0c\u5f97\u5230 HTML \u4ee3\u7801\u3002
<\/span> from <\/span>pyquery import <\/span>PyQuery as <\/span>pq
doc = pq('http:\/\/www.baidu.com'<\/span>)
# \u4f20\u6587\u4ef6\uff0c\u53ef\u4ee5\u76f4\u63a5\u4f20\u67d0\u4e2a\u8def\u5f84\u7684\u6587\u4ef6\u540d\u3002
<\/span> from <\/span>pyquery import <\/span>PyQuery as <\/span>pq
doc = pq(filename='example.html'<\/span>)
<\/span><\/code><\/pre>\n<\/p><\/div>\n\u5b9e\u4f8b\u6f14\u793a<\/h2>\n
<div> \n
<ul> \n
<li class=\"item-0\">first item<\/li> \n
<li class=\"item-1\"><a href=\"link2.html\">second item<\/a><\/li> \n
<li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item<\/span><\/a><\/li> \n
<li class=\"item-1 active\"><a href=\"link4.html\">fourth item<\/a><\/li> \n
<li class=\"item-0\"><a href=\"link5.html\">fifth item<\/a><\/li> \n
<\/ul> \n
<\/div><\/code><\/pre>\n<\/p><\/div>\n # Filename : example.py<\/span>
# Copyright : 2020 By Lidihuo<\/span>
# Author by : www.lidihuo.com<\/span>
# Date : 2020-08-21<\/span>
from <\/span>pyquery import <\/span>PyQuery as <\/span>pq
doc = pq(filename='example.html'<\/span>)
print doc.html<\/span>()
print type(doc)
li = doc('li'<\/span>)
print type(li)
print li.text<\/span>()
<\/span><\/code><\/pre>\n<\/p><\/div>\n<div> \n
<ul> \n
<li class=\"item-0\">first item<\/li> \n
<li class=\"item-1\"><a href=\"link2.html\">second item<\/a><\/li> \n
<li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item<\/span><\/a><\/li> \n
<li class=\"item-1 active\"><a href=\"link4.html\">fourth item<\/a><\/li> \n
<li class=\"item-0\"><a href=\"link5.html\">fifth item<\/a><\/li> \n
<\/ul> \n
<\/div> \n
< \nclass <\/span> \n'pyquery.pyquery.PyQuery'<\/span><\/span>> \n
< \nclass <\/span>'pyquery.pyquery.PyQuery'> \n
first item second item third item fourth item fifth item \n
<\/code><\/pre>\n<\/p><\/div>\n\u5c5e\u6027\u64cd\u4f5c<\/h2>\n
from pyquery import PyQuery as pq \n
p = pq('<p id=\"example. class=\"example.><\/p>')('p') \n
print p.attr(\"id\") \n
print p.attr(\"id\", \"plop\") \n
print p.attr(\"id\", \"example.)<\/code><\/pre>\n<\/p><\/div>\nexample.br \/> <p id=\"plop\" class=\"example.\/> \n
<p id=\"example. class=\"example.\/><\/code><\/pre>\n<\/p><\/div>\nfrom pyquery import PyQuery as pq \n
p = pq('<p id=\"example. class=\"example.><\/p>')('p') \n
print p.addClass('beauty') \n
print p.removeClass('example.) \n
print p.css('font-size', '16px') \n
print p.css({'background-color': 'yellow'})<\/code><\/pre>\n<\/p><\/div>\n<p id=\"example. class=\"example.beauty\"\/> \n
<p id=\"example. class=\"beauty\"\/> \n
<p id=\"example. class=\"beauty\" style=\"font-size: 16px\"\/> \n
<p id=\"example. class=\"beauty\" style=\"font-size: 16px; background-color: yellow\"\/><\/code><\/pre>\n<\/p><\/div>\nDOM\u64cd\u4f5c<\/h2>\n
from pyquery import PyQuery as pq \n
p = pq('<p id=\"example. class=\"example.><\/p>')('p') \n
print p.append(' check out <a href=\"http:\/\/xxxx.com\"><span>reddit<\/span><\/a>') \n
print p.prepend('Oh yes!') \n
d = pq('<div class=\"wrap\"><div id=\"test\"><a href=\"http:\/\/xxxxx.com\">Germy<\/a><\/div><\/div>') \n
p.prependTo(d('#test')) \n
print p \n
s print d \n
d.empty() \n
print d<\/code><\/pre>\n<\/p><\/div>\n<p id=\"example. class=\"example.> check out <a href=\"http:\/\/xxxxx.com\"><span>reddit<\/span><\/a><\/p> \n
<p id=\"example. class=\"example.>Oh yes! check out <a href=\"http:\/\/xxxxx.com\"><span>reddit<\/span><\/a><\/p> \n
<p id=\"example. class=\"example.>Oh yes! check out <a href=\"http:\/\/xxxxx.com\"><span>reddit<\/span><\/a><\/p> \n
<div class=\"wrap\"><div id=\"test\"><p id=\"example. class=\"example.>Oh yes! check out <a href=\"http:\/\/xxxxx.com\"><span>reddit<\/span><\/a><\/p><a href=\"http:\/\/xxxxx.com\">Germy<\/a><\/div><\/div> <div class=\"wrap\"\/><\/code><\/pre>\n<\/p><\/div>\n\u904d\u5386<\/h2>\n
# Filename : example.py<\/span>
# Copyright : 2020 By Lidihuo<\/span>
# Author by : www.lidihuo.com<\/span>
# Date : 2020-08-21<\/span>
from <\/span>pyquery import <\/span>PyQuery as <\/span>pq
doc = pq(filename='example.html'<\/span>)
lis = doc('li'<\/span>)
for <\/span>li in <\/span>lis.items<\/span>():
print li.html<\/span>()
print lis.each<\/span>(lambda e: e)
<\/span><\/code><\/pre>\n<\/p><\/div>\n\u7f51\u9875\u8bf7\u6c42<\/h2>\n
# Filename : example.py<\/span>
# Copyright : 2020 By Lidihuo<\/span>
# Author by : www.lidihuo.com<\/span>
# Date : 2020-08-21<\/span>
from <\/span>pyquery import <\/span>PyQuery as <\/span>pq
print(pq('https:\/\/www.lidihuo.com\/'<\/span>, headers={'user-agent'<\/span>: 'pyquery'<\/span>}))
print(pq('https:\/\/www.lidihuo.com'<\/span>, {'foo'<\/span>: 'bar'<\/span>}, method='post'<\/span>, verify=True))
<\/span><\/code><\/pre>\n<\/p><\/div>\n
\n<\/html><\/p>\n","protected":false},"excerpt":{"rendered":"Python\u722c\u866bPyQueryzh-cn","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[145],"tags":[],"class_list":["post-1560","post","type-post","status-publish","format-standard","hentry","category-pythonpcjc"],"_links":{"self":[{"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/posts\/1560"}],"collection":[{"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/comments?post=1560"}],"version-history":[{"count":0,"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/posts\/1560\/revisions"}],"wp:attachment":[{"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/media?parent=1560"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/categories?post=1560"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/bianchenghao6.com\/wp-json\/wp\/v2\/tags?post=1560"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}