七仔的博客

七仔的博客GithubPages分博

0%

Pyhton3爬取全国航班信息

可以查询到家的航班有哪些,找到每个城市最便宜的,然后看看去那个城市火车要多久,整一整可以便宜很多

Pyhton3爬取全国航班信息

起因

快过年了,要回家过年啊。。但是机票太贵,买不起啊。。。

想法

可以查询到家的航班有哪些,找到每个城市最便宜的,然后看看去那个城市火车要多久,整一整可以便宜很多

说做就做

找了个现成的爬虫修改,原爬虫链接:python爬虫——爬取携程机票信息

结果源代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import json

import requests

# 城市字典
cityDict={"YIE":"阿尔山","AKU":"阿克苏","RHT":"阿拉善右旗","AXF":"阿拉善左旗","AAT":"阿勒泰","NGQ":"阿里","MFM":"澳门"
,"AQG":"安庆","AVA":"安顺","AOG":"鞍山","RLK":"巴彦淖尔","AEB":"百色","BAV":"包头","BSD":"保山","BHY":"北海","BJS":"北京"
,"DBC":"白城","NBS":"白山","BFJ":"毕节","BPL":"博乐","CKG":"重庆","BPX":"昌都","CGD":"常德","CZX":"常州"
,"CHG":"朝阳","CTU":"成都","JUH":"池州","CIF":"赤峰","SWA":"潮州","CGQ":"长春","CSX":"长沙","CIH":"长治","CDE":"承德"
,"CWJ":"沧源","DAX":"达州","DLU":"大理","DLC":"大连","DQA":"大庆","DAT":"大同","DDG":"丹东","DCY":"稻城","DOY":"东营"
,"DNH":"敦煌","DAX":"达县","LUM":"德宏","EJN":"额济纳旗","DSN":"鄂尔多斯","ENH":"恩施","ERL":"二连浩特","FUO":"佛山"
,"FOC":"福州","FYJ":"抚远","FUG":"阜阳","KOW":"赣州","GOQ":"格尔木","GYU":"固原","GYS":"广元","CAN":"广州","KWE":"贵阳"
,"KWL":"桂林","HRB":"哈尔滨","HMI":"哈密","HAK":"海口","HLD":"海拉尔","HDG":"邯郸","HZG":"汉中","HGH":"杭州","HFE":"合肥"
,"HTN":"和田","HEK":"黑河","HET":"呼和浩特","HIA":"淮安","HJJ":"怀化","TXN":"黄山","HUZ":"惠州","JXA":"鸡西","TNA":"济南"
,"JNG":"济宁","JGD":"加格达奇","JMU":"佳木斯","JGN":"嘉峪关","SWA":"揭阳","JIC":"金昌","KNH":"金门","JNZ":"锦州"
,"CYI":"嘉义","JHG":"景洪","JSJ":"建三江","JJN":"晋江","JGS":"井冈山","JDZ":"景德镇","JIU":"九江","JZH":"九寨沟","KHG":"喀什"
,"KJH":"凯里","KGT":"康定","KRY":"克拉玛依","KCA":"库车","KRL":"库尔勒","KMG":"昆明","LXA":"拉萨","LHW":"兰州","HZH":"黎平"
,"LJG":"丽江","LLB":"荔波","LYG":"连云港","LPF":"六盘水","LFQ":"临汾","LZY":"林芝","LNJ":"临沧","LYI":"临沂","LZH":"柳州"
,"LZO":"泸州","LYA":"洛阳","LLV":"吕梁","JMJ":"澜沧","LCX":"龙岩","NZH":"满洲里","LUM":"芒市","MXZ":"梅州","MIG":"绵阳"
,"OHE":"漠河","MDG":"牡丹江","MFK":"马祖" ,"KHN":"南昌","NAO":"南充","NKG":"南京","NNG":"南宁","NTG":"南通","NNY":"南阳"
,"NGB":"宁波","NLH":"宁蒗","PZI":"攀枝花","SYM":"普洱","NDG":"齐齐哈尔","JIQ":"黔江","IQM":"且末","BPE":"秦皇岛","TAO":"青岛"
,"IQN":"庆阳","JUZ":"衢州","RKZ":"日喀则","RIZ":"日照","SYX":"三亚","XMN":"厦门","SHA":"上海","SZX":"深圳","HPG":"神农架"
,"SHE":"沈阳","SJW":"石家庄","TCG":"塔城","HYN":"台州","TYN":"太原","YTY":"泰州","TVS":"唐山","TCZ":"腾冲","TSN":"天津"
,"THQ":"天水","TGO":"通辽","TEN":"铜仁","TLQ":"吐鲁番","WXN":"万州","WEH":"威海","WEF":"潍坊","WNZ":"温州","WNH":"文山"
,"WUA":"乌海","HLH":"乌兰浩特","URC":"乌鲁木齐","WUX":"无锡","WUZ":"梧州","WUH":"武汉","WUS":"武夷山","SIA":"西安","XIC":"西昌"
,"XNN":"西宁","JHG":"西双版纳","XIL":"锡林浩特","DIG":"香格里拉(迪庆)","XFN":"襄阳","ACX":"兴义","XUZ":"徐州","HKG":"香港"
,"YNT":"烟台","ENY":"延安","YNJ":"延吉","YNZ":"盐城","YTY":"扬州","LDS":"伊春","YIN":"伊宁","YBP":"宜宾","YIH":"宜昌"
,"YIC":"宜春","YIW":"义乌","INC":"银川","LLF":"永州","UYN":"榆林","YUS":"玉树","YCU":"运城","ZHA":"湛江","DYG":"张家界"
,"ZQZ":"张家口","YZY":"张掖","ZAT":"昭通","CGO":"郑州","ZHY":"中卫","HSN":"舟山","ZUH":"珠海","WMT":"遵义(茅台)","ZYI":"遵义(新舟)"}
# 要排除的城市
excludeCitys = ['乌鲁木齐','克拉玛依','吐鲁番','哈密','昌吉','阜康','米泉','博乐','库尔勒','阿克苏','阿图什','喀什','和田','奎屯',
'伊宁','塔城','乌苏','阿勒泰','库车']
url = "https://flights.ctrip.com/itinerary/api/12808/products"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0",
"Referer": "https://flights.ctrip.com/itinerary/oneway/bjs-sha?date=2019-07-18",
"Content-Type": "application/json"
# 此处可以增加cookies
}


# 获取这个城市当天最便宜的航班
def getCheapFlight(dcity, acity, date):
request_payload = {
"flightWay": "Oneway",
"classType": "ALL",
"hasChild": False,
"hasBaby": False,
"searchIndex": 1,
"airportParams": [
{"dcity": dcity, "acity": acity, "dcityname": cityDict[dcity], "acityname": cityDict[acity],
"date": date, "dcityid": 1, "acityid": 2}
]
}
# post请求
response = requests.post(url, data=json.dumps(request_payload), headers=headers).text
# 很多航班信息在此分一下
routeList = json.loads(response).get('data').get('routeList')
# 航班信息列表
cheapFlight = {}
# 依次读取每条信息
if routeList is None: return {}
for route in routeList:
# 判断是否有信息,有时候没有会报错
if len(route.get('legs')) == 1:
legs = route.get('legs')
flight = legs[0].get('flight')
# 提取想要的信息
airlineName = flight.get('airlineName') # 航空公司
flightNumber = flight.get('flightNumber') # 航班号
departureDate = flight.get('departureDate') # 起飞时间
arrivalDate = flight.get('arrivalDate') # 到达时间
departureCityName = flight.get('departureAirportInfo').get('cityName') # 起飞城市
departureAirportName = flight.get('departureAirportInfo').get('airportName') # 起飞机场
arrivalCityName = flight.get('arrivalAirportInfo').get('cityName') # 到达城市
arrivalAirportName = flight.get('arrivalAirportInfo').get('airportName') # 到达机场
price = legs[0].get('cabins')[0].get('price').get('price') # 航班价格
# 选出最便宜的航班
if ('price' not in cheapFlight) or (price < cheapFlight['price']):
cheapFlight['airlineName'] = airlineName
cheapFlight['flightNumber'] = flightNumber
cheapFlight['departureDate'] = departureDate
cheapFlight['arrivalDate'] = arrivalDate
cheapFlight['departureCityName'] = departureCityName
cheapFlight['departureAirportName'] = departureAirportName
cheapFlight['arrivalCityName'] = arrivalCityName
cheapFlight['arrivalAirportName'] = arrivalAirportName
cheapFlight['price'] = price
return cheapFlight


# 开始
if __name__ == "__main__":
# 重点城市
endCity = 'URC'
# 航班日期
time = '2020-01-20'
cityList = list(cityDict.keys())
for city in cityList:
cityFlight = getCheapFlight(city, endCity, time)
# 判空
if not cityFlight:
continue
# 价格筛选
if cityFlight['price'] > 1000:
continue
# 删除要排除的城市
isPrint = True
for excludeCity in excludeCitys:
if excludeCity == cityFlight['departureCityName']:
isPrint = False
break
if isPrint:
print(cityFlight)


此为博主副博客,留言请去主博客,转载请注明出处:https://www.baby7blog.com/myBlog/59.html

欢迎关注我的其它发布渠道