|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
ubuntu 16.04优麒麟
python 2.7
用scrapy框架。。。。。刚学爬虫三天
写个爬虫爬学校新闻,并且想将其导入mysql数据库中,其中前面大半过程都以解决,最后写入数据库不知为何写不进去
用了大量的print来检查。。。
execute后面的print语句通通不执行,不报ERROR
如果变成这样
纯字符串参数
结果
貌似是执行了(后面的print语句都执行了),然而数据库中并没有添加新数据!!!
最关键的是所有conn.execute('select ******')的语句都从数据库里拿到了数据(证明数据库连接没问题),并且能够print出来,然而所有conn.execute('insert******')的语句全部不执行?!?!
研究了好几个小时,弄得整个人都要崩溃了,还是觉得莫名奇妙
ps:本人水平:python没咋学,这次是有任务强行要求做爬虫,之前照着别人的代码改一改已经做好了一个能够正常导入mysql数据库的爬虫,结果(这是第二个)就卡在了这里。。。
最后附上pipeline.py部分的代码,如果有人想要其它部分“爬虫主体/setting.py"亦可
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
from scrapy import signals
import json
import codecs
from twisted.enterprise import adbapi
import MySQLdb
import MySQLdb.cursors
class HitwhNewsInfoPipeline(object):
def __init__(self):
self.file = codecs.open('news_info.json', 'w', encoding='utf-8')
def process_item(self, item, spider):
line = json.dumps(dict(item), ensure_ascii=False) + "\n"
self.file.write(line)
return item
def spider_closed(self, spider):
self.file.close()
class HitwhNewsInfoMySQLPipeline(object):
print "21312321321321312312312333333333312312"
def __init__(self, dbpool):
self.dbpool = dbpool
@classmethod
def from_settings(cls, settings):
dbargs = dict(
host=settings['MYSQL_HOST'],
db=settings['MYSQL_DBNAME'],
user=settings['MYSQL_USER'],
passwd=settings['MYSQL_PASSWD'],
charset='utf8',
cursorclass = MySQLdb.cursors.DictCursor,
use_unicode= True,
)
dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs)
return cls(dbpool)
def process_item(self, item, spider):
d = self.dbpool.runInteraction(self._do_upinsert, item, spider)
d.addErrback(self._handle_error, item, spider)
d.addBoth(lambda _: item)
return d
def _do_upinsert(self, conn, item, spider):
print "21312321321321312312312333333333312312"
conn.execute("select 1 from hitwh_news where title =%s",(item['title'],))
ret = conn.fetchone()
print ret
try:
conn.execute('insert into hitwh_news_info(id,title,info,img) values(1,"23323","123", "234");')
except MySQLdb.Error,e:
print "######################ERROR######################"
# if ret:
# conn.execute(" update hitwh_news_info set info = %s,img = %s where title = %s",(item['info'],item['img'],item['title']))
# print "@@@@@@@@@@@@@@@@@@@@#################%%%%%%%%%%%%%%%"
# else:
print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
conn.execute("select id from hitwh_news where title = %s;",(item['title'],))
rets = conn.fetchone()
print rets
print item['title']
conn.execute(' insert into hitwh_news_info(id,title,info,img) values(1,%s, "123", "234")',(item['title']))
# conn.commit()
def _handle_error(self, failue, item, spider):
log.err(failure)
感谢各位了,,实在是想不出为啥才求解的。。![](static/image/smiley/ARU/aru-1x-1_036.png) |
|