{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 授業評価アンケートのデータ収集用スクリプト\n",
"[調査と解析班](https://r.st.ie.u-ryukyu.ac.jp/assessment/)で収集している、知能除法コース専門科目の2021年度前期科目を対象に自由記述欄のデータを収集。"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"100 2455 100 2455 0 0 11135 0 --:--:-- --:--:-- --:--:-- 11418\n"
]
}
],
"source": [
"!curl -O https://ie.u-ryukyu.ac.jp/~tnal/2022/dm/static/r_assesment_list.csv"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import pandas as pd\n",
"from bs4 import BeautifulSoup\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" title | \n",
" grade | \n",
" required | \n",
" year | \n",
" url | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 工業数学Ⅰ | \n",
" 1 | \n",
" True | \n",
" 2021 | \n",
" https://r.st.ie.u-ryukyu.ac.jp/assessment/2021... | \n",
"
\n",
" \n",
" 1 | \n",
" 技術者の倫理 | \n",
" 1 | \n",
" True | \n",
" 2021 | \n",
" https://r.st.ie.u-ryukyu.ac.jp/assessment/2021... | \n",
"
\n",
" \n",
" 2 | \n",
" 工学基礎演習 | \n",
" 1 | \n",
" True | \n",
" 2021 | \n",
" https://r.st.ie.u-ryukyu.ac.jp/assessment/2021... | \n",
"
\n",
" \n",
" 3 | \n",
" プログラミングⅠ | \n",
" 1 | \n",
" True | \n",
" 2021 | \n",
" https://r.st.ie.u-ryukyu.ac.jp/assessment/2021... | \n",
"
\n",
" \n",
" 4 | \n",
" 基礎数学Ⅰ | \n",
" 1 | \n",
" False | \n",
" 2021 | \n",
" https://r.st.ie.u-ryukyu.ac.jp/assessment/2021... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" title grade required year \\\n",
"0 工業数学Ⅰ 1 True 2021 \n",
"1 技術者の倫理 1 True 2021 \n",
"2 工学基礎演習 1 True 2021 \n",
"3 プログラミングⅠ 1 True 2021 \n",
"4 基礎数学Ⅰ 1 False 2021 \n",
"\n",
" url \n",
"0 https://r.st.ie.u-ryukyu.ac.jp/assessment/2021... \n",
"1 https://r.st.ie.u-ryukyu.ac.jp/assessment/2021... \n",
"2 https://r.st.ie.u-ryukyu.ac.jp/assessment/2021... \n",
"3 https://r.st.ie.u-ryukyu.ac.jp/assessment/2021... \n",
"4 https://r.st.ie.u-ryukyu.ac.jp/assessment/2021... "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"source_file = 'r_assesment_list.csv'\n",
"assesment_columns = ['title', 'grade', 'required', 'year', 'url']\n",
"df = pd.read_csv(source_file, names=assesment_columns)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'grade': 3, 'required': False, 'year': 2021, 'url': 'https://r.st.ie.u-ryukyu.ac.jp/assessment/2021a/result/makepage.php?kamoku=i334'}\n"
]
}
],
"source": [
"lectures = {}\n",
"for items in df.itertuples():\n",
" title = items[1]\n",
" grade = items[2]\n",
" required = items[3]\n",
" year = items[4]\n",
" url = items[5]\n",
" lectures[title] = {'grade':grade, 'required':required, 'year':year, 'url':url}\n",
"\n",
"print(lectures['データマイニング'])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# urlから '&ex=1'を削除した状態でのセレクタ\n",
"# Q21-(1), (2), (3), (4), Q22\n",
"selectors = {'Q21 (1)':'body > div:nth-child(44) > ul:nth-child(1)',\n",
" 'Q21 (2)': 'body > div:nth-child(47) > ul:nth-child(1)',\n",
" 'Q21 (3)': 'body > div:nth-child(50) > ul:nth-child(1)',\n",
" 'Q21 (4)': 'body > div:nth-child(53) > ul:nth-child(1)',\n",
" 'Q22': 'body > div:nth-child(56) > ul:nth-child(1)'}"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" title | \n",
" grade | \n",
" required | \n",
" q_id | \n",
" comment | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 工業数学Ⅰ | \n",
" 1 | \n",
" True | \n",
" Q21 (1) | \n",
" 特になし | \n",
"
\n",
" \n",
" 1 | \n",
" 工業数学Ⅰ | \n",
" 1 | \n",
" True | \n",
" Q21 (2) | \n",
" 正直わかりずらい。むだに間があるし。 | \n",
"
\n",
" \n",
" 2 | \n",
" 工業数学Ⅰ | \n",
" 1 | \n",
" True | \n",
" Q21 (2) | \n",
" 例題を取り入れて理解しやすくしてほしい。 | \n",
"
\n",
" \n",
" 3 | \n",
" 工業数学Ⅰ | \n",
" 1 | \n",
" True | \n",
" Q21 (2) | \n",
" 特になし | \n",
"
\n",
" \n",
" 4 | \n",
" 工業数学Ⅰ | \n",
" 1 | \n",
" True | \n",
" Q21 (2) | \n",
" スライドに書く文字をもう少しわかりやすくして欲しいです。 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" title grade required q_id comment\n",
"0 工業数学Ⅰ 1 True Q21 (1) 特になし\n",
"1 工業数学Ⅰ 1 True Q21 (2) 正直わかりずらい。むだに間があるし。\n",
"2 工業数学Ⅰ 1 True Q21 (2) 例題を取り入れて理解しやすくしてほしい。\n",
"3 工業数学Ⅰ 1 True Q21 (2) 特になし\n",
"4 工業数学Ⅰ 1 True Q21 (2) スライドに書く文字をもう少しわかりやすくして欲しいです。"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def get_comments(lectures, selectors):\n",
" \"\"\"授業コメントを収集\n",
" Returns dict:\n",
" {授業名: {'q_id': ['コメント1', 'コメント2']},\n",
" 授業名: {'q_id': ['コメント1', 'コメント2']},,,}\n",
" \"\"\"\n",
" result = {}\n",
" for lec_name, items in lectures.items():\n",
" #print(lec_name, items['grade'])\n",
" r = requests.get(items['url'])\n",
" r.encoding = r.apparent_encoding\n",
" soup = BeautifulSoup(r.text, 'html.parser')\n",
"\n",
" comments = {}\n",
" for q_id, selector in selectors.items():\n",
" elements = soup.select(selector)\n",
" #print(elements, '<=', items['url'], q_id, selector)\n",
" if len(elements) != 0:\n",
" for li in elements[0].find_all('li'):\n",
" if q_id in comments:\n",
" comments[q_id].append(li.text.rstrip())\n",
" else:\n",
" comments[q_id] = [li.text.rstrip()]\n",
" result[lec_name] = comments\n",
" return result\n",
"\n",
"def comment_to_dataframe(lectures, all_comments):\n",
" \"\"\"扱いやすいように変換\n",
" Returns pd.DataFrame:\n",
" columns = ['title', 'grade', 'required', 'q_id', 'comment']\n",
" \"\"\"\n",
" tables = []\n",
" for title, items in lectures.items():\n",
" grade = items['grade']\n",
" required = items['required']\n",
" for q_id, comments in all_comments[title].items():\n",
" for comment in comments:\n",
" if len(comment) != 0:\n",
" tables.append([title, grade, required, q_id, comment])\n",
"\n",
" columns_name = ['title', 'grade', 'required', 'q_id', 'comment']\n",
" df = pd.DataFrame(tables, columns=columns_name)\n",
" return df\n",
"\n",
"\n",
"all_comments = get_comments(lectures, selectors)\n",
"df = comment_to_dataframe(lectures, all_comments)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"df.to_pickle('./corpus/r_assesment.pkl')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "880b2a8c90f9e6beae80b56829e3f671fedd58b6d14887184ddce26124cedfbd"
},
"kernelspec": {
"display_name": "Python 3.8.9 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}