0%

PyQt5-百度迁徙数据爬取

记录迁徙数据爬取,Pyqt5表格制作的笔记

bdqx

代码记录

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import requests, json, sys, time, html, re
from jsonpath import jsonpath
from PyQt5.QtWidgets import *
from PyQt5.QtCore import *
from PyQt5.uic import loadUi
import arrow


def isLeapYear(years):
'''
通过判断闰年,获取年份years下一年的总天数
:param years: 年份,int
:return:days_sum,一年的总天数
'''
# 断言:年份不为整数时,抛出异常。
assert isinstance(years, int), "请输入整数年,如 2018"

if ((years % 4 == 0 and years % 100 != 0) or (years % 400 == 0)): # 判断是否是闰年
# print(years, "是闰年")
days_sum = 366
return days_sum
else:
# print(years, '不是闰年')
days_sum = 365
return days_sum


def getAllDayPerYear(years):
'''
获取一年的所有日期
:param years:年份
:return:全部日期列表
'''
start_date = '%s-1-1' % years
a = 0
all_date_list = []
days_sum = isLeapYear(int(years))
print()
while a < days_sum:
b = arrow.get(start_date).shift(days=a).format("YYYY-MM-DD")
a += 1
all_date_list.append(b)
# print(all_date_list)
return all_date_list


class MainWindow(QMainWindow):
# 在主线程中设置自定义的信号 这里主要是给子线程发送信号
signal_m = pyqtSignal()

def __init__(self, *args, **kwargs):
super(MainWindow, self).__init__(*args, **kwargs)
self.ui = loadUi('bd.ui') # 之后控件都在ui下使用
# 自适应表格宽度
self.ui.tableWidget.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
# self.ui.tableWidget.horizontalHeader().setSectionResizeMode(3, QHeaderView.ResizeToContents)

# 设置窗口标题为启动时间
self.ui.setWindowTitle('百度迁徙 启动时间:' + str(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())) + ' by:包子')
# 设置透明度 # 0到1,1表示不透明,0表示完全透明
self.ui.setWindowOpacity(1)

self.initUi()
# # 设置窗口背景色
# self.ui.setStyleSheet("background-color: white")

def initUi(self):
# 获取本年的所有日期
self.t = time.strftime('%Y', time.localtime(time.time()))
self.t_1 = time.strftime('%Y-%m-%d', time.localtime(time.time()))
self._date_time = getAllDayPerYear(self.t)

t_ = []
for i in self._date_time:
if self.t_1 == i:
break
t_.append(i)
t_.reverse()
self.ui.date_comb.addItems(t_)

city_id = "北京|110000,天津|120000,兴安盟|152200,巢湖|340181,定安|469021,屯昌|469022,澄迈|469023,临高|469024,海东地区|630200," \
"香港|810000,澳门|820000,昌都|540300,昌都地区|540300,山南|540500,山南地区|540500,日喀则|540200,日喀则地区|540200,那曲|540600," \
"那曲地区|540600,林芝|540400,林芝地区|540400,吐鲁番|650400,吐鲁番地区|650400,铜仁|520600,铜仁地区|520600,毕节|520500," \
"毕节地区|520500,广西|450000,广西壮族自治区|450000,内蒙古|150000,内蒙古自治区|150000,宁夏|640000,宁夏回族自治区|640000,新疆|650000," \
"新疆维吾尔自治区|650000,西藏|540000,西藏自治区|540000,石家庄|130100,唐山|130200,秦皇岛|130300,邯郸|130400,邢台|130500," \
"保定|130600,张家口|130700,承德|130800,沧州|130900,廊坊|131000,衡水|131100,太原|140100,大同|140200,阳泉|140300," \
"长治|140400,晋城|140500,朔州|140600,晋中|140700,运城|140800,忻州|140900,临汾|141000,吕梁|141100,呼和浩特|150100," \
"包头|150200,乌海|150300,赤峰|150400,通辽|150500,鄂尔多斯|150600,呼伦贝尔|150700,巴彦淖尔|150800,乌兰察布|150900,沈阳|210100," \
"大连|210200,鞍山|210300,抚顺|210400,本溪|210500,丹东|210600,锦州|210700,营口|210800,阜新|210900,辽阳|211000," \
"盘锦|211100,铁岭|211200,朝阳|211300,葫芦岛|211400,长春|220100,吉林市|220200,四平|220300,辽源|220400,通化|220500," \
"白山|220600,松原|220700,白城|220800,哈尔滨|230100,齐齐哈尔|230200,鸡西|230300,鹤岗|230400,双鸭山|230500,大庆|230600," \
"伊春|230700,佳木斯|230800,七台河|230900,牡丹江|231000,黑河|231100,绥化|231200,上海|310000,南京|320100,无锡|320200," \
"徐州|320300,常州|320400,苏州|320500,南通|320600,连云港|320700,淮安|320800,盐城|320900,扬州|321000,镇江|321100," \
"泰州|321200,宿迁|321300,浙江|330000,杭州|330100,宁波|330200,温州|330300,嘉兴|330400,湖州|330500,绍兴|330600," \
"金华|330700,衢州|330800,舟山|330900,台州|331000,丽水|331100,合肥|340100,芜湖|340200,蚌埠|340300,淮南|340400," \
"马鞍山|340500,淮北|340600,铜陵|340700,安庆|340800,黄山|341000,滁州|341100,阜阳|341200,宿州|341300,六安|341500," \
"亳州|341600,池州|341700,宣城|341800,福州|350100,厦门|350200,莆田|350300,三明|350400,泉州|350500,漳州|350600," \
"南平|350700,龙岩|350800,宁德|350900,南昌|360100,景德镇|360200,萍乡|360300,九江|360400,新余|360500,鹰潭|360600," \
"赣州|360700,吉安|360800,宜春|360900,抚州|361000,上饶|361100,济南|370100,莱芜|370100,青岛|370200,淄博|370300," \
"枣庄|370400,东营|370500,烟台|370600,潍坊|370700,济宁|370800,泰安|370900,威海|371000,日照|371100,临沂|371300," \
"德州|371400,聊城|371500,滨州|371600,菏泽|371700,郑州|410100,开封|410200,洛阳|410300,平顶山|410400,安阳|410500," \
"鹤壁|410600,新乡|410700,焦作|410800,濮阳|410900,许昌|411000,漯河|411100,三门峡|411200,南阳|411300,商丘|411400," \
"信阳|411500,周口|411600,驻马店|411700,武汉|420100,黄石|420200,十堰|420300,宜昌|420500,襄阳|420600,鄂州|420700," \
"荆门|420800,孝感|420900,荆州|421000,黄冈|421100,咸宁|421200,随州|421300,仙桃|429004,潜江|429005,天门|429006," \
"长沙|430100,株洲|430200,湘潭|430300,衡阳|430400,邵阳|430500,岳阳|430600,常德|430700,张家界|430800,益阳|430900," \
"郴州|431000,永州|431100,怀化|431200,娄底|431300,广州|440100,韶关|440200,深圳|440300,珠海|440400,汕头|440500," \
"佛山|440600,江门|440700,湛江|440800,茂名|440900,肇庆|441200,惠州|441300,梅州|441400,汕尾|441500,河源|441600," \
"阳江|441700,清远|441800,东莞|441900,济源|419001,中山|442000,潮州|445100,揭阳|445200,云浮|445300,南宁|450100," \
"柳州|450200,桂林|450300,梧州|450400,北海|450500,防城港|450600,钦州|450700,贵港|450800,玉林|450900,百色|451000," \
"贺州|451100,河池|451200,来宾|451300,崇左|451400,海口|460100,三亚|460200,三亚|460300,五指山|469001,琼海|469002," \
"儋州|460400,文昌|469005,万宁|469006,东方|469007,重庆|500000,成都|510100,自贡|510300,攀枝花|510400,泸州|510500," \
"德阳|510600,绵阳|510700,广元|510800,遂宁|510900,内江|511000,乐山|511100,南充|511300,眉山|511400,宜宾|511500," \
"广安|511600,达州|511700,雅安|511800,巴中|511900,资阳|512000,贵阳|520100,六盘水|520200,遵义|520300,安顺|520400," \
"昆明|530100,曲靖|530300,玉溪|530400,保山|530500,昭通|530600,丽江|530700,临沧|530900,普洱|530800,拉萨|540100," \
"西安|610100,铜川|610200,宝鸡|610300,咸阳|610400,渭南|610500,延安|610600,汉中|610700,榆林|610800,安康|610900," \
"商洛|611000,兰州|620100,嘉峪关|620200,金昌|620300,白银|620400,天水|620500,武威|620600,张掖|620700,平凉|620800," \
"酒泉|620900,庆阳|621000,定西|621100,陇南|621200,西宁|630100,银川|640100,石嘴山|640200,吴忠|640300,固原|640400," \
"中卫|640500,乌鲁木齐|650100,克拉玛依|650200,石河子|659001,阿拉尔|659002,图木舒克|659003,五家渠|659004,北屯|659005," \
"铁门关|659006,双河|659007,可克达拉|659008,昆玉|659009,恩施|422800,恩施土家族苗族自治州|422800,延边|222400,延边朝鲜族自治州|222400," \
"神农架地区|429021,神农架林区|429021,湘西州|433100,湘西土家族苗族自治州|433100,大兴安岭地区|232700,白沙县|469025,白沙黎族自治县|469025," \
"昌江黎族自治县|469026,乐东黎族自治县|469027,陵水黎族自治县|469028,保亭黎族苗族自治县|469029,琼中黎族苗族自治县|469030,阿坝州|513200," \
"阿坝藏族羌族自治州|513200,甘孜州|513300,甘孜藏族自治州|513300,凉山州|513400,凉山彝族自治州|513400,黔西南布依族苗族自治州|522300," \
"黔东南苗族侗族自治州|522600,黔南布依族苗族自治州|522700,楚雄州|532300,楚雄彝族自治州|532300,红河州|532500,红河哈尼族彝族自治州|532500," \
"文山|532600,文山壮族苗族自治州|532600,西双版纳傣族自治州|532800,大理州|532900,大理白族自治州|532900,德宏州|533100," \
"德宏傣族景颇族自治州|533100,怒江州|533300,怒江傈僳族自治州|533300,迪庆州|533400,迪庆藏族自治州|533400,阿里地区|542500,临夏州|622900," \
"临夏回族自治州|622900,甘南州|623000,甘南藏族自治州|623000,海北州|632200,海北藏族自治州|632200,黄南州|632300,黄南藏族自治州|632300," \
"海南州|632500,海南藏族自治州|632500,果洛州|632600,果洛藏族自治州|632600,玉树州|632700,玉树藏族自治州|632700,海西州|632800," \
"海西蒙古族藏族自治州|632800,昌吉州|652300,昌吉回族自治州|652300,博尔塔拉州|652700,博尔塔拉蒙古自治州|652700,巴音郭楞蒙古自治州|652800," \
"哈密|650500,哈密地区|650500,阿克苏地区|652900,克孜勒苏州|653000,克孜勒苏柯尔克孜自治州|653000,伊犁州|654000,伊犁哈萨克自治州|654000," \
"喀什地区|653100,和田地区|653200,塔城地区|654200,阿勒泰地区|654300,锡林郭勒盟|152500,阿拉善盟|152900,安徽|340000,福建|350000," \
"甘肃|620000,广东|440000,贵州|520000,海南|460000,河北|130000,黑龙江|230000,河南|410000,湖北|420000,湖南|430000," \
"江苏|320000,江西|360000,吉林|220000,辽宁|210000,青海|630000,山东|370000,山西|140000,陕西|610000,四川|510000,云南|530000 "

c_id = city_id.split(',')
num = 0
c_id.reverse()
for i in c_id:
num += 1
c = i.strip().split('|')
city = c[0].strip() # 城市
city_id = c[1]

# 插入数据钱先插入一行表格
self.ui.tableWidget.insertRow(0)

# 城市
newItem_cyti = QTableWidgetItem(city)
self.ui.tableWidget.setItem(0, 0, newItem_cyti)
# id
newItem_id = QTableWidgetItem(city_id)
self.ui.tableWidget.setItem(0, 1, newItem_id)

# 线程Qtimer
self.timer = QTimer()
self.timer.timeout.connect(self.get_qr)
# 加入选项
self.b1 = QPushButton('获取迁徙数据')
self.ui.tableWidget.setCellWidget(0, 2, self.b1)
self.b1.clicked.connect(self.timing)

self.ui.btn.clicked.connect(self.find_tab)

# 查找表格
def find_tab(self):
text = self.ui.lineEdit.text()
items = self.ui.tableWidget.findItems(text, Qt.MatchExactly)
item = items[0]
# 选中单元格
item.setSelected(True)
row = item.row()
# 通过鼠标滚轮定位,快速定位到第十一行
self.ui.tableWidget.verticalScrollBar().setSliderPosition(row)

# 定时器
def timing(self):
self.timer.start(1000)

# 迁入
def get_qr(self):
self.ui.city_qr.clear()
self.ui.city_qc.clear()

index_ = self.ui.tableWidget.currentRow() # 获取当前列表行索引
id = self.ui.tableWidget.item(index_, 1).text() # 获取id
tmp = self.ui.date_comb.currentText() # 获取下拉框日期
# 迁入 迁出请求
url_qr = f'http://huiyan.baidu.com/migration/cityrank.jsonp?dt=city&id={id}&type=move_in&date={str(tmp).replace("-", "")}'
url_qc = f'http://huiyan.baidu.com/migration/cityrank.jsonp?dt=city&id={id}&type=move_out&date={str(tmp).replace("-", "")}'

# 迁入内容
# header1 = {'user-agent': UserAgent().chrome}
b_qr = html.unescape(requests.get(url_qr))
data_qr = json.loads(re.findall('\((.*?)\)', b_qr.text)[0])
qr_city = jsonpath(data_qr, '$..city_name') # 城市
qr_province = jsonpath(data_qr, '$..province_name') # 省
qr_bl = jsonpath(data_qr, '$..value') # 比例
qr = []
try:
for c, p, b in zip(qr_province, qr_city, qr_bl):
val = f'{c} {p} 比例:{b}%'
qr.append(val)
self.ui.city_qr.addItems(qr) # 加入迁入地列表
except Exception:
self.ui.city_qr.addItems(['啥也没获取到']) # 加入迁入地列表

# 迁出内容
# header2 = {'user-agent': UserAgent().chrome}

b_qc = html.unescape(requests.get(url_qc))

data_qc = json.loads(re.findall('\((.*?)\)', b_qc.text)[0])
qc_city = jsonpath(data_qc, '$..city_name') # 城市
qc_province = jsonpath(data_qc, '$..province_name') # 省
qc_bl = jsonpath(data_qc, '$..value') # 比例
qc = []

try:
for c, p, b in zip(qc_province, qc_city, qc_bl):
val = f'{c} {p} 比例:{b}%'
qc.append(val)
self.ui.city_qc.addItems(qc) # 加入迁出地
except Exception:
self.ui.city_qc.addItems(['没获取到 - -'])
self.timer.stop()


if __name__ == '__main__':
app = QApplication(sys.argv)
stats = MainWindow() # 调用show方法 打开窗口
stats.ui.show()
sys.exit(app.exec_()) # 调用sys库的exit退出方法,条件是app.exec_()也就是整个窗口关闭