python 爬蟲程式 -2 加入interface視窗介面
加入pythin的interface視窗介面
這裡要使用tkinter函式庫import tkinter as tk # button 被按下觸發的事件 def clickon(): outputtext['text'] = inputtext.get() + '\n' + inputtext.get() #將文字方塊的文字填入outputtext的text裡面 win=tk.Tk() win.title("My First Tk GUI") # window title name label = tk.Label(win, text="Hello World!") #建立標籤物件 label.grid(row=0, column=0) # layout inputtext = tk.Entry(win) #建立填入文字方塊 inputtext.grid(row=0, column=1, columnspan=20) button = tk.Button(win)#, text="OK" #建立 button button['text'] = 'ok' button['command'] = clickon #button event事件 button.grid(row=1, column=1) outputtext = tk.Label(win, text = "output information") outputtext.grid(row = 2, column = 1) win.mainloop()
GUI test
建立的物件的屬性有兩種寫法#第一種 button = tk.Button(win)#, text="OK" #建立 button button['text'] = 'ok' button['command'] = clickon #button event事件 #第二種 button = tk.Button(win, text = "OK", command = clickon) #建立 button
一次建立多個物件
用陣列儲存在物件屬性可以設定
- text文字:string格式
- height高度, width寬度:只能輸入int的數字
- anchor(靠左、靠右、至中) = "n", "ne", "e", "se", "s", "sw", "w", "nw", and also "center".
- variable值
在layout可以設定
- row, column
- sticky(對齊) = "N","W","S","E"
all_headlist = ['職務名稱','公司名稱','學歷', '地區', '應徵人數'] default_check = [1,1,0,0,1] #預設checkbutton的值 checktext = {} checkb={} var = {} for i in range(0, len(all_headlist)): var[i] = tk.IntVar(value = default_check[i]) checkb[i] = tk.Checkbutton(win, text = all_headlist[i]+':', height = 2, anchor = 'nw', variable = var[i]) checkb[i].grid(row = i+4, column = 0, sticky = "W") checktext[i] = tk.Label(win, width = 50, anchor = 'sw') checktext[i].grid(row = i+4, column = 1, sticky = "W")
GUI checkbutton
讀檔方式
當讀取單一直行,以下是只顯示每一列的'職務名稱'跟'學歷'with open('./'+filename + filetyoe,'r') as oldfile: reader = csv.DictReader(oldfile) for rows in reader: print(rows['職務名稱'], rows['學歷']) oldfile.close()
完整程式碼
功能說明: 輸入要匯出的檔案名稱,以及搜尋的資料,按下search,等待資料抓取完成,會存入你所設定的名稱 按下output my choose,匯出所選擇的資訊,存入你設定得名稱+_choose 按下load file to choose,讀取你設定的名稱的檔案,選擇完要匯出的資訊後再存入你設定得名稱+_chooseimport tkinter as tk import tkinter as tk import csv import time import load_104_newpeopleV1_0 test = False # search_url = #https://www.104.com.tw/area/freshman/search?keyword=%E6%A9%9F%E5%99%A8%E4%BA%BA&area=6001001000,6001002000&jobcategory=2007000000&industry=&page=1&sortField=APPEAR_DATE&sortMode=DESC all_headlist = ['職務名稱','公司名稱','學歷', '地區', '應徵人數', '工作說明', '職務類別', '工作待遇', '工作性質', '上班地點', '管理責任', '出差外派', '上班時段', '休假制度', '可上班日', '需求人數', '接受身份', '工作經歷', '學歷要求', '科系要求', '語文條件', '擅長工具', '工作技能', '具備駕照', '其他條件', '公司福利', '產業類別', '產業描述', '員工', '資本額', '聯絡人', '公司地址', '電話', '傳真', '公司網址', '公司簡介', '主要商品/服務項目'] checktext = {} filename = '104_output' filetyoe = '.csv' checkb={} newfile_title = [] default_check = [1,1,1,1,1, #5 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0, #21 1,1,1,1,1,1,0,0,1,0,1] #11 # delete " with show information def deltwocom(text, number): tmp = list(text) tmp[number] = '' text = ''.join(tmp) return text # load total file and show information def load_example(filename): with open('./'+filename + filetyoe,'r') as csvfile: title = csvfile.readline().split('","') info = csvfile.readline().split('","') title[0] = deltwocom(title[0], 0) title[36] = deltwocom(title[36], len(title[36])-2) info[0] = deltwocom(info[0], 0) info[36] = deltwocom(info[36], len(info[36])-2) for i in range(37): overhead = 25 #限制最大顯示長度 if len(info[i]) >= overhead: checktext[i]['text'] =info[i][0:overhead] + '...' else: checktext[i]['text'] =info[i] + '' csvfile.close() #output _choose file def creatfile(filename): with open('./'+filename + filetyoe,'r') as oldfile: reader = csv.DictReader(oldfile) with open('./'+filename+'_choose' + filetyoe, 'w') as newfile: # first line title wl = '"' + all_headlist[newfile_title[0]] + '"' for i in range(1, len(newfile_title)): wl += ',"' + all_headlist[newfile_title[i]] + '"' newfile.writelines(wl + '\n') # data for rows in reader: print(rows['職務名稱'], rows['學歷']) wl = '"' + rows[all_headlist[newfile_title[0]]] + '"' for i in range(1, len(newfile_title)): wl += ',"' + rows[all_headlist[newfile_title[i]]] + '"' newfile.writelines(wl + '\n') newfile.close oldfile.close() #(button) search url and output total file def clickto_search(): url = inputurl.get() outputtext['text'] = '抓取資料中...' time.sleep(1) res = load_104_newpeopleV1_0.load_104_newpeople_main(url, './' + filename + filetyoe, test = test) outputtext['text'] = '完成' + res + '\toutput file name is ' + filename + filetyoe load_example(filename) #(button) load file to choose def load_to_choose(): filename = input_fname.get() outputtext['text'] = '載入 '+ filename + filetyoe +' 資料' load_example(filename) outputtext['text'] = '載入 '+ filename + filetyoe +' 資料完成' #(button) output my choose to file def output_choose(): filename = input_fname.get() outputtext['text'] = '存入 '+ filename + filetyoe +'_choose 中...' for i in range(len(all_headlist)): #print(var[i].get()) if var[i].get(): newfile_title.append(i) creatfile(filename) outputtext['text'] = filename +'_choose' + filetyoe + '儲存完成' win=tk.Tk() win.title("load_104_newpeople_job") label_fname = tk.Label(win, text="匯出的檔案名稱: ") #建立標籤物件 label_fname.grid(row=0, column=0) v1 = tk.StringVar(win, value='104_output') #預設文字方塊輸入值 input_fname = tk.Entry(win, width = 100, textvariable=v1) input_fname.grid(row=0, column=1, columnspan=101, sticky = "W") labelurl = tk.Label(win, text="輸入 search url : ") #建立標籤物件 labelurl.grid(row=1, column=0) v2 = tk.StringVar(win, value='https://www.104.com.tw/area/freshman/search?keyword=%E6%A9%9F%E5%99%A8%E4%BA%BA&area=6001001000,6001002000&jobcategory=2007000000&industry=&page=1&sortField=APPEAR_DATE&sortMode=DESC') inputurl = tk.Entry(win, width = 100, textvariable=v2) inputurl.grid(row=1, column=1, columnspan=101, sticky = "W") button = tk.Button(win) button['text'] = 'search' button['command'] = clickto_search button.grid(row=2, column=0) buttontest = tk.Button(win) buttontest['text'] = 'load file to choose' buttontest['command'] = load_to_choose buttontest.grid(row=2, column=1) buttonget = tk.Button(win) buttonget['text'] = 'output my choose' buttonget['command'] = output_choose buttonget.grid(row=2, column=2) outputtitle = tk.Label(win, text = "工作狀態") outputtitle.grid(row = 0, column = 5) outputtext = tk.Label(win, text = "output information") outputtext.grid(row = 1, column = 5) var = {} for i in range(0, len(all_headlist)): if i >= 26: row = i-26+3 col = 4 elif i <= 4: row = i+3 col = 0 else: row = i-5+3 col = 2 var[i] = tk.IntVar(value = default_check[i]) checkb[i] = tk.Checkbutton(win, text = all_headlist[i]+':', height = 2, anchor = 'nw', variable = var[i]) # checkb[i].grid(row = row, column = 0 + col, sticky = "W") checktext[i] = tk.Label(win, width = 50, anchor = 'sw') checktext[i].grid(row = row, column = 1+col, sticky = "W") win.mainloop()
GUI
留言
張貼留言