- __author__ = "QWERTY_52_38"
- __version__ = "0.1"
- import openpyxl
- import bs4
- import requests
- from requests.exceptions import *
- import typing
- import json
- not_success = True
- user_list_html = ""
- while not_success:
- try:
- user_list_html = requests.get("https://mcbbs-wiki.cn/index.php?title=%E7%89%B9%E6%AE%8A:%E7%94"
- "%A8%E6%88%B7%E5%88%97%E8%A1%A8&offset=&limit=5000").text
- not_success = False
- except (ConnectionError, RequestException):
- not_success = True
- print("Successfully get HTML file.")
- soup = bs4.BeautifulSoup(user_list_html, 'html.parser')
- print("Successfully create soup object.")
- user_name_li = [i.a.bdi.string for i in soup.find(id="mw-content-text").find_all("ul")[0].find_all("li")]
- print("Successfully find all user_li.")
- def get_table_from_user(user_str: str):
- not_success = True
- edit_html = ""
- while not_success:
- try:
- edit_html = requests.get("https://mcbbs-wiki.cn/wiki/%E7%89%B9%E6%AE%8A:%E7%BC%96%E8%BE%91%E8%AE%A1%E6%95"
- "%B0/" + user_str).text
- not_success = False
- except (ConnectionError, RequestException):
- not_success = True
- form = bs4.BeautifulSoup(edit_html, 'html.parser').find(id="editcount")
- print(user_str + " : Successful!")
- return form.table.find_all("tr")[1].find_all("td")[1].table
- def get_edit(table) -> typing.List[int]:
- trs = table.find_all("tr")
- result = [0, 0, 0, 0, 0, 0, 0, 0, 0]
- # all, main, template, category, help, file, talk, MCBBS_Wiki, edit score
- if len(trs) == 1:
- return result
- result[0] = int(trs[0].find_all("th")[1].string.replace(",", ""))
- result[-1] = int(table.parent.parent.b.string[7:].replace(",", ""))
- for i in trs[1:]:
- data_type = i.find_all("td")[0].string
- data = int(i.find_all("td")[1].string.replace(",", ""))
- if data_type == "(主)":
- result[1] = data
- elif data_type == "模板":
- result[2] = data
- elif data_type == "分类":
- result[3] = data
- elif data_type == "帮助":
- result[4] = data
- elif data_type == "文件":
- result[5] = data
- elif "讨论" in data_type:
- result[6] += data
- elif data_type == "MCBBS Wiki" or data_type == "MCBBS_Wiki":
- result[7] = data
- else:
- pass
- return result
- def find_uid(user_str: str) -> int:
- not_success = True
- user_json = ""
- while not_success:
- try:
- user_json = requests.get(
- "https://mcbbs-wiki.cn/api.php?action=query&format=json&list=users&usprop=&ususers="
- + user_str).text
- not_success = False
- except (ConnectionError, RequestException):
- not_success = True
- return json.loads(user_json)["query"]["users"][0]["userid"]
- user_edit_li = [[find_uid(i), i] + get_edit(get_table_from_user(i)) for i in user_name_li]
- print("Successfully create user_edit_li.")
- book = openpyxl.Workbook()
- sheet = book.create_sheet('工作表名称', 0)
- titles = ["UID", "用户名", "编辑总数", "(主)", "模板", "分类", "帮助", "文件", "讨论", "MCBBS Wiki", "编辑积分"]
- for col in range(len(titles)):
- sheet.cell(1, col + 1, value=titles[col])
- print("Successfully write titles.")
- for row in range(len(user_edit_li)):
- for column in range(1, 12):
- sheet.cell(row + 2, column, value=user_edit_li[row][column - 1])
- print("Successfully write data.")
- book.save('mcbbswiki-edit-20210128.xlsx')
- print("End!")
复制代码 |