步骤索引
爬取网址 :https://eth.btc.com/home
首页(输入代币代号)
数据页(点击翻页按钮测试请求:F12-Headers-Response)
获取到Header URL
获取到Response并分析格式
接下来便可通过通过以上请求头Header以及返回值Response获取并解析结果
爬取网址 :https://eth.btc.com/home
首页(输入代币代号)
数据页(点击翻页按钮测试请求:F12-Headers-Response)
获取到Header URL
Request URL:
https://explorer-web.api.btc.com/v1/eth/tokentxns/0x4f878c0852722b0976a955d68b376e4cd4ae99e5?page=2&size=10
Request Method: GET Status Code: 200 Remote Address:
18.141.83.207:443 Referrer Policy: no-referrer-when-downgrade
获取到Response并分析格式
Out[164]: {‘err_no’: 0, ‘data’: {‘page’: 1, ‘pagesize’: 10,
‘total_count’: 113881, ‘list’: [
{‘id’: 1,
‘tx_hash’: ‘0xa7c1873e0b070614f107e73ffaece6e66df8029267326c05b4366068cb896fdd’,
‘block_height’: 10443343,
‘created_ts’: 1594538711,
‘time_in_sec’: 1569898,
‘sender_hash’: ‘0xd81133299dc33329ab6cdf93434662dd0ae21810’,
‘receiver_hash’: ‘0x2317e431b3766abd24e770ba81c503371f4e5a8b’,
‘amount’: ‘1000000000’,
‘token_hash’: ‘0x4f878c0852722b0976a955d68b376e4cd4ae99e5’,
‘token_name’: ‘WaykiChain’,
‘token_decimal’: 8,
‘unit_name’: ‘WICC’,
‘sender_name’: ‘’,
‘receiver_name’: ‘’,
‘token_url’: None,
‘token_icon_url’: None,
‘token_found’: True},
{‘id’: 2,
‘tx_hash’: ‘0xe80e0048134155af74916e3e31054a783be3ba8cf6ea7a3b8fe6d8df2f53ed93’,
‘block_height’: 10366778,
‘created_ts’: 1593515753,
‘time_in_sec’: 2592856,
‘sender_hash’: ‘0xfdaccb5ecd9ddaf3ec2d7a49bc19ef0b21217760’,
‘receiver_hash’: ‘0xa15768d846b631154d2394a17e42eaefae26eaf3’,
‘amount’: ‘100000000000’,
‘token_hash’: ‘0x4f878c0852722b0976a955d68b376e4cd4ae99e5’,
‘token_name’: ‘WaykiChain’,
‘token_decimal’: 8,
‘unit_name’: ‘WICC’,
‘sender_name’: ‘’,
‘receiver_name’: ‘’,
‘token_url’: None,
‘token_icon_url’: None,
‘token_found’: True}
…
…省略重复部分…
…
]},
‘err_msg’: None}
接下来便可通过通过以上请求头Header以及返回值Response获取并解析结果
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 30 14:30:46 2020
@author: Songyou Han
"""
import requests
import pymysql
import json
# 打开数据库连接 #连接本地爬虫数据库
database = pymysql.connect("localhost", "root", "root", "reptitles", charset='utf8' )
cursor = database.cursor()
table = 'reptitles_of_ethereum_brower_wicc'
#爬虫部分
#设置以太坊区块链浏览器连接头部/尾部,欧意交易所app官方下载中间留待循环拼接页码插入位置
url_head = "https://explorer-web.api.btc.com/v1/eth/tokentxns/0x4f878c0852722b0976a955d68b376e4cd4ae99e5?page="
url_foot = "&size=500"
#循环提取200页的数据
for page in range(200):
url = url_head+str(page+1)+url_foot
r = requests.get(url)
datas = json.loads(r.text) #将字符串序列转换为json
list = datas["data"]["list"] #获取数据列表
print("get data of page %d"%(page+1))
print("=====================================================")
#向数据库插入数据
column = 1 #计数器
while list:
data = list.pop()
keys = ','.join(data.keys())
values = ','.join(['%s'] * len(data))
sql = 'INSERT INTO {table}({keys}) VALUES ({values}) ON DUPLICATE KEY UPDATE'.format(table=table, keys=keys, values=values)
update = ','.join([" {key} = %s".format(key=key) for key in data])
sql += update
try:
cursor.execute(sql, tuple(data.values())*2)
print('Successful insert cloumn '+str(page+1)+'.'+str(column))
column += 1
database.commit()
except:
print('Failed')
database.rollback()
print("https://eth.btc.com/home :WICC以太坊代币:转账数据爬取完毕!!!")
# 关闭数据库连接
cursor.close()
database.close()
(责任编辑:)
|