|  | 
 
| 
py的代码:
x
马上注册,结交更多好友,享用更多功能^_^您需要 登录 才可以下载或查看,没有账号?立即注册  
 
 
 # -*- coding:utf-8 -*-
 import urllib2,re,MySQLdb
 
 urla = 'http://www.quanshu.net/book/9/9055/'  #这个是盗墓笔记
 
 def getlist():
 html = urllib2.urlopen(urla).read()
 html = html.decode('gb2312').encode('utf-8')
 reg = re.compile(r'<li><a href="(.*?)" title="(.*?)">(.*?)</a>')
 urls = re.findall(reg,html)
 return urls
 
 
 def getcontent(url):
 html = urllib2.urlopen(url).read()
 html = html.decode('gbk').encode('utf-8')
 reg = re.compile(r'style5\(\);</script>(.*?)<script type="text/javascript">')
 content = re.findall(reg,html)
 return content
 
 class Sql(object):
 conn = MySQLdb.connect(
 host = 'localhost',
 port = 3306,
 user = 'root',
 passwd = 'passwd',
 db = "xiaoshuo",
 charset = "utf8",
 )
 
 
 def adddata(self,title,content):
 cur = self.conn.cursor()
 cur.execute("insert into books values(NULL,'%s','%s')" %(title,content))
 cur.close()
 self.conn.commit()
 mysql = Sql()
 
 
 for i in getlist():
 print '正在爬去%s' %i[1]
 title = i[1]
 content = getcontent(urla + i[0])
 print '正在插入数据库 %s' %i[1]
 mysql.adddata(title,content)
 break
 
 
 
 
 报错:
 
 
 
 Python 2.7.12 (v2.7.12:d33e0cf91556, Jun 27 2016, 15:24:40) [MSC v.1500 64 bit (AMD64)] on win32
 Type "copyright", "credits" or "license()" for more information.
 >>>
 ==== RESTART: C:\Users\Administrator\Desktop\shujuwajue\全书网小说爬取.py ====
 正在爬去国庆贺文,非盗墓笔记,免费奉送。,共6035字
 正在插入数据库 国庆贺文,非盗墓笔记,免费奉送。,共6035字
 
 Traceback (most recent call last):
 File "C:\Users\Administrator\Desktop\shujuwajue\全书网小说爬取.py", line 45, in <module>
 mysql.adddata(title,content)
 File "C:\Users\Administrator\Desktop\shujuwajue\全书网小说爬取.py", line 34, in adddata
 cur.execute("insert into books values(NULL,'%s','%s')" %(title,content))
 File "build\bdist.win-amd64\egg\MySQLdb\cursors.py", line 205, in execute
 self.errorhandler(self, exc, value)
 File "build\bdist.win-amd64\egg\MySQLdb\connections.py", line 36, in defaulterrorhandler
 raise errorclass, errorvalue
 ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ';   \\xe6\\x88\\x91\\xe6\\x8e\\xa5\\xe4\\xb8\\x8b\\xe6\\x9d\\xa5\\xe8\\xa6\\x81\\' at line 1")
 >>>
 
 
 
 
 
 
 
 
 
 
 
 
import urllib2,re,MySQLdb 之后 加上这个试试reload(sys)
 sys.setdefaultencoding('utf8')
 | 
 |