第四次作业

import requests

import json

from lxmlimport etree

import xlwt

info_list=[]

headers = {

‘User-Agent’:’Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36 Query String Parameters view source view URL encoded’,

    ‘Cookie’:’__utmt=1; __utma=212754963.192174688.1557973843.1559182595.1560477585.3; __utmb=212754963.5.10.1560477585; __utmc=212754963; __utmz=212754963.1557973843.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); PageState=%7B%22dictionary%22%3A%22uk%22%2C%22lookup%22%3A%22%22%2C%22advboxopen%22%3Afalse%2C%22hideOffensiveWords%22%3Afalse%2C%22showMore%22%3Afalse%2C%22c%22%3A%7B%22301%22%3Atrue%7D%2C%22pageSize%22%3A%2240%22%2C%22q%22%3A%22%22%7D’,

    ‘Host’:’vocabulary.englishprofile.org’,

    ‘Referer’:’http://vocabulary.englishprofile.org/dictionary/search/uk/?c=301&c=301&pageSize=40&q=&wl=301&p=3′,

    ‘Authorization’:’Basic ZW5nbGlzaHByb2ZpbGU6dm9jYWJ1bGFyeQ==’

  }

url=’http://vocabulary.englishprofile.org/dictionary/search/uk/?c=301&c=301&c=301&pageSize=40&q=&wl=301&p=4′

def get_info(url):

res = requests.get(url, headers=headers)

html = etree.HTML(res.text)

vocabluary_infos = html.xpath(‘//div[@class=”search-block”]//li’)

for infoin vocabluary_infos:

# // *[ @ id = “search-results”] / div[3] / ul / li[1] / a / span

        a = info.xpath(‘a’)

base = info.xpath(‘a/span/span[@class=”base”]’)

pos = info.xpath(‘a/span/span[@class=”pos”]’)

gw = info.xpath(‘a/span/span[@class=”gw”]’)

info_list = [base[0].text,pos[0].text,gw[0].text,base[0].text]

#

#

#

if __name__ ==’__main__’:

book = xlwt.Workbook(encoding=’utf-8′)

sheet = book.add_sheet(‘Sheet1’)

header = [‘vocabulary’,’pos’,’gw’]

for tin range(len(header)):

sheet.write(0, t, header[t])

urls = [‘http://vocabulary.englishprofile.org/dictionary/search/uk/?c=301&c=301&c=301&pageSize=40&q=&wl=301&p={}’.format(str(i))for iin range(1,2)]

for urlin urls:

get_info(url)

i =1

    for listin info_list:

j =0

        for datain list:

sheet.write(i, j, data)

j +=1

        i +=1

    book.save(‘E:/python/第五讲/test.xlsx’)

「点点赞赏,手留余香」

    还没有人赞赏,快来当第一个赞赏的人吧!