您当前的位置:首页 > 计算机 > 编程开发 > Python

python爬虫10--文件存储之关系型数据库存储

时间:05-19来源:作者:点击数:

关系型数据库基于关系模型,而关系模型通过二维表存储,关系型数据库有:SQLite、MySQL、Oracle、SQL Server、DB2等。

MySQL数据库存储

1.连接数据库

import pymysql
#用connect()方法申明一个mysql连接对象db
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306)
#获得mysql操作游标,利用游标来执行sql语句
cursor = db.cursor()
#新建数据库,并调用execute()方法执行
cursor.execute('CREATE DATABASE spider DEFAULT CHARACTER SET utf8')
#关闭数据库
db.close()

2.创建表

import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
#创建表
sql = 'CREATE TABLE IF NOT EXISTS user (id VARCHAR(255) NOT NULL,name VARCHAR(255) NOT NULL,age INT NOT NULL,PRIMARY KEY (id))'
cursor.execute(sql)
db.close()

3.插入数据

import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
sql = 'INSERT INTO user(id,name,age) values("001","xxx",18)'
try:
    cursor.execute(sql)
    db.commit()    #真正将语句提交到数据执行的方法,对数据库插入、更新、删除都需调用
except:
    db.rollback()
db.close()

动态插入:

import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
data = {
    "id":"002",
    "name":'ss',
    "age":20
}
table = 'user'
keys = ','.join(data.keys())
values = ','.join(['%s']*len(data))
sql = 'INSERT INTO {table}({keys}) values({values})'.format(table=table,keys=keys,values=values)
try:
    if cursor.execute(sql,tuple(data.values())):
        print('成功')
        db.commit()
except:
    print('失败')
    db.rollback()
db.close()

4.更新数据

import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
sql = 'UPDATE  user SET age=25 WHERE id="001"'
try:
    cursor.execute(sql)
    db.commit()
except:
    db.rollback()
db.close()

动态更新并去重,如果数据存在,则更新,如果数据不存在则插入:

import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
data = {
    "id":"002",
    "name":'ss',
    "age":40
}
table = 'user'
keys = ','.join(data.keys())
values = ','.join(['%s']*len(data))
sql = 'INSERT INTO {table}({keys}) values({values}) ON DUPLICATE KEY UPDATE'.format(table=table,keys=keys,values=values)
update = ','.join([" {key}=%s".format(key=key) for key in data])
sql += update
try:
    if cursor.execute(sql,tuple(data.values())*2):
        print('成功')
        db.commit()
except:
    print('失败')
    db.rollback()
db.close()

5.删除

import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
table = 'user'
cond = 'age > 30'
sql = 'DELETE FROM {table} WHERE {cond}'.format(table=table,cond=cond)
try:
    cursor.execute(sql)
    db.commit()
except:
    db.rollback()
db.close()

6.查询

import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
table = 'user'
cond = 'age > 20'
sql = 'SELECT * FROM {table} WHERE {cond}'.format(table=table,cond=cond)
try:
    cursor.execute(sql)
    print(cursor.rowcount)    #获取查询条数
    print(cursor.fetchone())   #查询一条,以元组形式展示结果
    print(cursor.fetchall())   #查询所有,不含上面已查询的,fetchall()内部实现偏移指针用来指向查询结果,获取一条后,指针指向下一条,以二元组形式全部展示结果。若数量大开销高,可用while加fetchone()代替,循环一次,指针偏移一次
except:
    db.rollback()
db.close()

7.案例

#练习:爬取豆瓣电影TOP250,电影名称,评分,推荐语信息并保存到MySql中
from pyquery import PyQuery as pq
import requests
import pymysql

url = 'https://movie.douban.com/top250'
headers = {
    'User-Agent': 'Mozilla/5.0(Windows NT 6.1;Win64;x64)AppleWebKit/537.36(KHTML,like Gecko)Chrome/79.0.3945.88 Safari/537.36'
}
res = requests.get(url=url,headers=headers)
doc = pq(res.text)
items = doc('.info').items()
list_all = []
for item in items:
    movies_dict = {}
    name = pq(item.find('.hd').html()).find('span:first-child').text()   #此处的find找到所有符合条件的
    score = pq(item.find('.star').html()).find('span:nth-child(2)').text()
    comment = item.find('.quote').text()
    movies_dict['name'] = name
    movies_dict['score'] = score
    movies_dict['comment'] = comment
    list_all.append(movies_dict)

db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
sql = 'CREATE TABLE movies(name VARCHAR(255) NOT NULL,score FLOAT(6,1),comment VARCHAR(255) NOT NULL)'
cursor.execute(sql)
for movie in list_all:
    keys = ','.join(movie.keys())
    values = ','.join(['%s']*len(movie))
    sql = 'INSERT INTO movies({keys}) values({values})'.format(keys=keys,values=values)
    try:
        if cursor.execute(sql,tuple(movie.values())):
            print('成功')
            db.commit()
    except:
        print('失败')
        db.rollback()
db.close()
方便获取更多学习、工作、生活信息请关注本站微信公众号城东书院 微信服务号城东书院 微信订阅号
推荐内容
相关内容
栏目更新
栏目热门