python 爬蟲程式 -2 加入interface視窗介面
加入pythin的interface視窗介面
這裡要使用tkinter函式庫import tkinter as tk
# button 被按下觸發的事件
def clickon():
outputtext['text'] = inputtext.get() + '\n' + inputtext.get() #將文字方塊的文字填入outputtext的text裡面
win=tk.Tk()
win.title("My First Tk GUI") # window title name
label = tk.Label(win, text="Hello World!") #建立標籤物件
label.grid(row=0, column=0) # layout
inputtext = tk.Entry(win) #建立填入文字方塊
inputtext.grid(row=0, column=1, columnspan=20)
button = tk.Button(win)#, text="OK" #建立 button
button['text'] = 'ok'
button['command'] = clickon #button event事件
button.grid(row=1, column=1)
outputtext = tk.Label(win, text = "output information")
outputtext.grid(row = 2, column = 1)
win.mainloop()
GUI test
建立的物件的屬性有兩種寫法#第一種 button = tk.Button(win)#, text="OK" #建立 button button['text'] = 'ok' button['command'] = clickon #button event事件 #第二種 button = tk.Button(win, text = "OK", command = clickon) #建立 button
一次建立多個物件
用陣列儲存在物件屬性可以設定
- text文字:string格式
- height高度, width寬度:只能輸入int的數字
- anchor(靠左、靠右、至中) = "n", "ne", "e", "se", "s", "sw", "w", "nw", and also "center".
- variable值
在layout可以設定
- row, column
- sticky(對齊) = "N","W","S","E"
all_headlist = ['職務名稱','公司名稱','學歷', '地區', '應徵人數']
default_check = [1,1,0,0,1] #預設checkbutton的值
checktext = {}
checkb={}
var = {}
for i in range(0, len(all_headlist)):
var[i] = tk.IntVar(value = default_check[i])
checkb[i] = tk.Checkbutton(win, text = all_headlist[i]+':', height = 2, anchor = 'nw', variable = var[i])
checkb[i].grid(row = i+4, column = 0, sticky = "W")
checktext[i] = tk.Label(win, width = 50, anchor = 'sw')
checktext[i].grid(row = i+4, column = 1, sticky = "W")
GUI checkbutton
讀檔方式
當讀取單一直行,以下是只顯示每一列的'職務名稱'跟'學歷'
with open('./'+filename + filetyoe,'r') as oldfile:
reader = csv.DictReader(oldfile)
for rows in reader:
print(rows['職務名稱'], rows['學歷'])
oldfile.close()
完整程式碼
功能說明: 輸入要匯出的檔案名稱,以及搜尋的資料,按下search,等待資料抓取完成,會存入你所設定的名稱 按下output my choose,匯出所選擇的資訊,存入你設定得名稱+_choose 按下load file to choose,讀取你設定的名稱的檔案,選擇完要匯出的資訊後再存入你設定得名稱+_chooseimport tkinter as tk
import tkinter as tk
import csv
import time
import load_104_newpeopleV1_0
test = False
# search_url =
#https://www.104.com.tw/area/freshman/search?keyword=%E6%A9%9F%E5%99%A8%E4%BA%BA&area=6001001000,6001002000&jobcategory=2007000000&industry=&page=1&sortField=APPEAR_DATE&sortMode=DESC
all_headlist = ['職務名稱','公司名稱','學歷', '地區', '應徵人數',
'工作說明', '職務類別', '工作待遇', '工作性質', '上班地點', '管理責任', '出差外派', '上班時段', '休假制度', '可上班日', '需求人數', '接受身份', '工作經歷', '學歷要求', '科系要求', '語文條件', '擅長工具', '工作技能', '具備駕照', '其他條件', '公司福利',
'產業類別', '產業描述', '員工', '資本額', '聯絡人', '公司地址', '電話', '傳真', '公司網址', '公司簡介', '主要商品/服務項目']
checktext = {}
filename = '104_output'
filetyoe = '.csv'
checkb={}
newfile_title = []
default_check = [1,1,1,1,1, #5
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0, #21
1,1,1,1,1,1,0,0,1,0,1] #11
# delete " with show information
def deltwocom(text, number):
tmp = list(text)
tmp[number] = ''
text = ''.join(tmp)
return text
# load total file and show information
def load_example(filename):
with open('./'+filename + filetyoe,'r') as csvfile:
title = csvfile.readline().split('","')
info = csvfile.readline().split('","')
title[0] = deltwocom(title[0], 0)
title[36] = deltwocom(title[36], len(title[36])-2)
info[0] = deltwocom(info[0], 0)
info[36] = deltwocom(info[36], len(info[36])-2)
for i in range(37):
overhead = 25 #限制最大顯示長度
if len(info[i]) >= overhead:
checktext[i]['text'] =info[i][0:overhead] + '...'
else:
checktext[i]['text'] =info[i] + ''
csvfile.close()
#output _choose file
def creatfile(filename):
with open('./'+filename + filetyoe,'r') as oldfile:
reader = csv.DictReader(oldfile)
with open('./'+filename+'_choose' + filetyoe, 'w') as newfile:
# first line title
wl = '"' + all_headlist[newfile_title[0]] + '"'
for i in range(1, len(newfile_title)):
wl += ',"' + all_headlist[newfile_title[i]] + '"'
newfile.writelines(wl + '\n')
# data
for rows in reader:
print(rows['職務名稱'], rows['學歷'])
wl = '"' + rows[all_headlist[newfile_title[0]]] + '"'
for i in range(1, len(newfile_title)):
wl += ',"' + rows[all_headlist[newfile_title[i]]] + '"'
newfile.writelines(wl + '\n')
newfile.close
oldfile.close()
#(button) search url and output total file
def clickto_search():
url = inputurl.get()
outputtext['text'] = '抓取資料中...'
time.sleep(1)
res = load_104_newpeopleV1_0.load_104_newpeople_main(url, './' + filename + filetyoe, test = test)
outputtext['text'] = '完成' + res + '\toutput file name is ' + filename + filetyoe
load_example(filename)
#(button) load file to choose
def load_to_choose():
filename = input_fname.get()
outputtext['text'] = '載入 '+ filename + filetyoe +' 資料'
load_example(filename)
outputtext['text'] = '載入 '+ filename + filetyoe +' 資料完成'
#(button) output my choose to file
def output_choose():
filename = input_fname.get()
outputtext['text'] = '存入 '+ filename + filetyoe +'_choose 中...'
for i in range(len(all_headlist)):
#print(var[i].get())
if var[i].get():
newfile_title.append(i)
creatfile(filename)
outputtext['text'] = filename +'_choose' + filetyoe + '儲存完成'
win=tk.Tk()
win.title("load_104_newpeople_job")
label_fname = tk.Label(win, text="匯出的檔案名稱: ") #建立標籤物件
label_fname.grid(row=0, column=0)
v1 = tk.StringVar(win, value='104_output') #預設文字方塊輸入值
input_fname = tk.Entry(win, width = 100, textvariable=v1)
input_fname.grid(row=0, column=1, columnspan=101, sticky = "W")
labelurl = tk.Label(win, text="輸入 search url : ") #建立標籤物件
labelurl.grid(row=1, column=0)
v2 = tk.StringVar(win, value='https://www.104.com.tw/area/freshman/search?keyword=%E6%A9%9F%E5%99%A8%E4%BA%BA&area=6001001000,6001002000&jobcategory=2007000000&industry=&page=1&sortField=APPEAR_DATE&sortMode=DESC')
inputurl = tk.Entry(win, width = 100, textvariable=v2)
inputurl.grid(row=1, column=1, columnspan=101, sticky = "W")
button = tk.Button(win)
button['text'] = 'search'
button['command'] = clickto_search
button.grid(row=2, column=0)
buttontest = tk.Button(win)
buttontest['text'] = 'load file to choose'
buttontest['command'] = load_to_choose
buttontest.grid(row=2, column=1)
buttonget = tk.Button(win)
buttonget['text'] = 'output my choose'
buttonget['command'] = output_choose
buttonget.grid(row=2, column=2)
outputtitle = tk.Label(win, text = "工作狀態")
outputtitle.grid(row = 0, column = 5)
outputtext = tk.Label(win, text = "output information")
outputtext.grid(row = 1, column = 5)
var = {}
for i in range(0, len(all_headlist)):
if i >= 26:
row = i-26+3
col = 4
elif i <= 4:
row = i+3
col = 0
else:
row = i-5+3
col = 2
var[i] = tk.IntVar(value = default_check[i])
checkb[i] = tk.Checkbutton(win, text = all_headlist[i]+':', height = 2, anchor = 'nw', variable = var[i]) #
checkb[i].grid(row = row, column = 0 + col, sticky = "W")
checktext[i] = tk.Label(win, width = 50, anchor = 'sw')
checktext[i].grid(row = row, column = 1+col, sticky = "W")
win.mainloop()
GUI



留言
張貼留言