scrapy downloading images

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import scrapy
# import codecs
import os
from bingproxy import BingProxy
class ImagesSpider(scrapy.Spider):
name = “images”
dir_path = “huaban_bingproxy_big_images”
if not os.path.exists(dir_path):
os.makedirs(dir_path)
# allowed_domains = [“tyst.migu.cn”]
start_urls = []
bingProxy = BingProxy()
def start_requests(self):
with open(‘processing_threading_huaban_big_images_all_urls_part3.txt’) as url_list:
for url in url_list:
url = url.strip()
#yield scrapy.Request(url = self.bingProxy.get_proxy_url(url), meta = {“origin_rul”: url}, callback = self.parse )
if url != “” and url != None:
yield scrapy.Request(url = url, callback=self.parse,method=”get”)
#def __init__(self, urlfile=None,*args, **kwargs):
# super(MusicSpider, self).__init__(*args, **kwargs)
# uf = codecs.open(urlfile, ‘r’, ‘utf-8’)
# urls = [line.strip() for line in uf.readlines()]
#self.start_urls = urls

def parse(self, response):
path = “huaban_bingproxy_big_images” +”/”+response.url.split(‘/’)[-1] + “.png”
# path = path.split(‘?’)[0]
# self.logger.info(‘Saving mp3 %s’, path)
with open(path, ‘wb’) as f:
f.write(response.body)

你或许想:《去原作者写文章的地方

「点点赞赏,手留余香」

    还没有人赞赏,快来当第一个赞赏的人吧!
0 条回复 A 作者 M 管理员
    所有的伟大,都源于一个勇敢的开始!
欢迎您,新朋友,感谢参与互动!欢迎您 {{author}},您在本站有{{commentsCount}}条评论