1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
| import requests import datetime import json import re
domain_name = "https://www.baby7blog.com"
project_path = "/usr/share/nginx/blog-ui/"
baidu_header = '<?xml version="1.0" encoding="utf-8"?> <urlset> ' google_header = '<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> '
baidu_datetime = '%Y-%m-%d' google_datetime = '%Y-%m-%dT%H:%M:%S+00:00'
item_url = ''' <url> <loc>%s</loc> <lastmod>%s</lastmod> <changefreq>daily</changefreq> <priority>1.0</priority> </url>
'''
def get_blog_list(): first_blog_list = [domain_name] url = domain_name + '/blog/blog/page?current=1&size=99999' response = requests.get(url=url) blog_item_list = json.loads(response.text)['data']['records'] for blog in blog_item_list: first_blog_list.append(domain_name + '/myBlog/' + str(blog['id']) + '.html') return first_blog_list
def get_blog_menu_list(): second_blog_list = [] blog_menu_list = [ {"id": "1100", "title": "前端技术"}, {"id": "1101", "title": "后端技术"}, {"id": "1103", "title": "人工智能"}, {"id": "1104", "title": "爬虫相关"}, {"id": "1107", "title": "桌面技术"}, {"id": "1108", "title": "硬件相关"}, {"id": "1109", "title": "分享"}, {"id": "1113", "title": "服务器相关"} ] for blog in blog_menu_list: second_blog_list.append( domain_name + '/list?id=[1,1000,' + str(blog['id']) + ']&type=menu&title=' + str(blog['title'])) second_blog_list.append(domain_name + '/list?id=[1,1114]&type=menu&title=魔镜') return second_blog_list
def get_blog_label_list(): third_blog_list = [] url = domain_name + '/blog/label/page?current=1&size=99999' response = requests.get(url=url) blog_label_list = json.loads(response.text)['data']['records'] for blog in blog_label_list: third_blog_list.append(domain_name + '/list?id=' + str(blog['id']) + '&type=label&title=' + str(blog['label'])) return third_blog_list
def create_sitemap(url_list): baidu_word_text = baidu_header google_word_text = google_header for url in url_list: baidu_times = datetime.datetime.now().strftime(baidu_datetime) google_times = datetime.datetime.now().strftime(google_datetime) urls = re.sub(r"&", "&", url) baidu_word_text += item_url % (urls, baidu_times) google_word_text += item_url % (urls, google_times) baidu_word_text += "</urlset>" google_word_text += "</urlset>" file = open(project_path + "sitemap.xml", 'w', encoding='utf-8') file.writelines(baidu_word_text) file.close() file = open(project_path + "sitemap-google.xml", 'w', encoding='utf-8') file.writelines(google_word_text) file.close()
if __name__ == '__main__': blog_list = get_blog_list() blog_list += get_blog_menu_list() blog_list += get_blog_label_list() create_sitemap(blog_list)
|