python爬虫案例--------爬取youdao在线翻译内容
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # @Time : 2018/3/4 20:45
- # @Author : hyang
- # @Site :
- # @File : scrapy_youdao.py
- # @Software: PyCharm
- import json
- import hashlib as hasher
- import requests
- import random
- import time
- import ssl
- import urllib3
- # 解决某些环境下报<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed
- ssl._create_default_https_context = ssl._create_unverified_context
- urllib3.disable_warnings() # 关闭警告
- start_url = 'http://fanyi.youdao.com/'
- post_url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
-
- headers = {
- "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36",
- "Referer": "http://fanyi.youdao.com/"
- }
-
- # 得到js加密串
- def get_JSKey(r_word):
- salt = int(time.time()* 1000) + random.randint(0,9)
- md = hasher.md5()
- md5_str = ("fanyideskweb" + r_word + str(salt) + "ebSeFb%=XZ%T[KZ)c(sy!").encode('utf-8')
- md.update(md5_str);
- sign = md.hexdigest()
- return {"salt":salt,"sign":sign}
-
- def get_cookies(url):
- return requests.get(url).cookies
-
- def get_content(r_word,url,cookies,js_key):
- post_data = {
- "i": r_word,
- "from": "AUTO",
- "to": "AUTO",
- "smartresult": "dict",
- "client": "fanyideskweb",
- "salt": js_key["salt"], # salt
- "sign":js_key["sign"], # sign
- "doctype": "json",
- "version": "2.1",
- "keyfrom": "fanyi.web",
- "action": "FY_BY_REALTIME",
- "typoResult": "false"
- }
- response = requests.post(url, headers=headers, data=post_data, cookies=cookies)
- json_str = response.json()
- print(json_str)
-
-
- if __name__ == '__main__':
- #js 加密文件fanyi.min.js
- '''
- t.asyRequest = function(e) {
- var t = e.i,
- i = "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10)),
- o = n.md5("fanyideskweb" + t + i + "ebSeFb%=XZ%T[KZ)c(sy!");
- r && r.abort(),
- r = n.ajax({
- type: "POST",
- contentType: "application/x-www-form-urlencoded; charset=UTF-8",
- url: "/bbk/translate_m.do",
- data: {
- i: e.i,
- client: "fanyideskweb",
- salt: i,
- sign: o,
- tgt: e.tgt,
- from: e.from,
- to: e.to,
- doctype: "json",
- version: "3.0",
- cache: !0
- },
- '''
- r_word = input("please input the word you want to translate : ")
- cookies = get_cookies(start_url) # 得到cookies
- print('cookies=>', cookies)
- js_key = get_JSKey(r_word)
- print("js_key=>",js_key)
- get_content(r_word,post_url,cookies,js_key) # 得到请求内容后返回的json