python 爬蟲程式 -2 加入interface視窗介面
加入pythin的interface視窗介面
這裡要使用tkinter函式庫
- import tkinter as tk
- # button 被按下觸發的事件
- def clickon():
- outputtext['text'] = inputtext.get() + '\n' + inputtext.get() #將文字方塊的文字填入outputtext的text裡面
- win=tk.Tk()
- win.title("My First Tk GUI") # window title name
- label = tk.Label(win, text="Hello World!") #建立標籤物件
- label.grid(row=0, column=0) # layout
- inputtext = tk.Entry(win) #建立填入文字方塊
- inputtext.grid(row=0, column=1, columnspan=20)
- button = tk.Button(win)#, text="OK" #建立 button
- button['text'] = 'ok'
- button['command'] = clickon #button event事件
- button.grid(row=1, column=1)
- outputtext = tk.Label(win, text = "output information")
- outputtext.grid(row = 2, column = 1)
- win.mainloop()
GUI test
建立的物件的屬性有兩種寫法#第一種 button = tk.Button(win)#, text="OK" #建立 button button['text'] = 'ok' button['command'] = clickon #button event事件 #第二種 button = tk.Button(win, text = "OK", command = clickon) #建立 button
一次建立多個物件
用陣列儲存在物件屬性可以設定
- text文字:string格式
- height高度, width寬度:只能輸入int的數字
- anchor(靠左、靠右、至中) = "n", "ne", "e", "se", "s", "sw", "w", "nw", and also "center".
- variable值
在layout可以設定
- row, column
- sticky(對齊) = "N","W","S","E"
- all_headlist = ['職務名稱','公司名稱','學歷', '地區', '應徵人數']
- default_check = [1,1,0,0,1] #預設checkbutton的值
- checktext = {}
- checkb={}
- var = {}
- for i in range(0, len(all_headlist)):
- var[i] = tk.IntVar(value = default_check[i])
- checkb[i] = tk.Checkbutton(win, text = all_headlist[i]+':', height = 2, anchor = 'nw', variable = var[i])
- checkb[i].grid(row = i+4, column = 0, sticky = "W")
- checktext[i] = tk.Label(win, width = 50, anchor = 'sw')
- checktext[i].grid(row = i+4, column = 1, sticky = "W")
GUI checkbutton
讀檔方式
當讀取單一直行,以下是只顯示每一列的'職務名稱'跟'學歷'with open('./'+filename + filetyoe,'r') as oldfile: reader = csv.DictReader(oldfile) for rows in reader: print(rows['職務名稱'], rows['學歷']) oldfile.close()
完整程式碼
功能說明: 輸入要匯出的檔案名稱,以及搜尋的資料,按下search,等待資料抓取完成,會存入你所設定的名稱 按下output my choose,匯出所選擇的資訊,存入你設定得名稱+_choose 按下load file to choose,讀取你設定的名稱的檔案,選擇完要匯出的資訊後再存入你設定得名稱+_choose
- import tkinter as tk
- import tkinter as tk
- import csv
- import time
- import load_104_newpeopleV1_0
- test = False
- # search_url =
- #https://www.104.com.tw/area/freshman/search?keyword=%E6%A9%9F%E5%99%A8%E4%BA%BA&area=6001001000,6001002000&jobcategory=2007000000&industry=&page=1&sortField=APPEAR_DATE&sortMode=DESC
- all_headlist = ['職務名稱','公司名稱','學歷', '地區', '應徵人數',
- '工作說明', '職務類別', '工作待遇', '工作性質', '上班地點', '管理責任', '出差外派', '上班時段', '休假制度', '可上班日', '需求人數', '接受身份', '工作經歷', '學歷要求', '科系要求', '語文條件', '擅長工具', '工作技能', '具備駕照', '其他條件', '公司福利',
- '產業類別', '產業描述', '員工', '資本額', '聯絡人', '公司地址', '電話', '傳真', '公司網址', '公司簡介', '主要商品/服務項目']
- checktext = {}
- filename = '104_output'
- filetyoe = '.csv'
- checkb={}
- newfile_title = []
- default_check = [1,1,1,1,1, #5
- 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0, #21
- 1,1,1,1,1,1,0,0,1,0,1] #11
- # delete " with show information
- def deltwocom(text, number):
- tmp = list(text)
- tmp[number] = ''
- text = ''.join(tmp)
- return text
- # load total file and show information
- def load_example(filename):
- with open('./'+filename + filetyoe,'r') as csvfile:
- title = csvfile.readline().split('","')
- info = csvfile.readline().split('","')
- title[0] = deltwocom(title[0], 0)
- title[36] = deltwocom(title[36], len(title[36])-2)
- info[0] = deltwocom(info[0], 0)
- info[36] = deltwocom(info[36], len(info[36])-2)
- for i in range(37):
- overhead = 25 #限制最大顯示長度
- if len(info[i]) >= overhead:
- checktext[i]['text'] =info[i][0:overhead] + '...'
- else:
- checktext[i]['text'] =info[i] + ''
- csvfile.close()
- #output _choose file
- def creatfile(filename):
- with open('./'+filename + filetyoe,'r') as oldfile:
- reader = csv.DictReader(oldfile)
- with open('./'+filename+'_choose' + filetyoe, 'w') as newfile:
- # first line title
- wl = '"' + all_headlist[newfile_title[0]] + '"'
- for i in range(1, len(newfile_title)):
- wl += ',"' + all_headlist[newfile_title[i]] + '"'
- newfile.writelines(wl + '\n')
- # data
- for rows in reader:
- print(rows['職務名稱'], rows['學歷'])
- wl = '"' + rows[all_headlist[newfile_title[0]]] + '"'
- for i in range(1, len(newfile_title)):
- wl += ',"' + rows[all_headlist[newfile_title[i]]] + '"'
- newfile.writelines(wl + '\n')
- newfile.close
- oldfile.close()
- #(button) search url and output total file
- def clickto_search():
- url = inputurl.get()
- outputtext['text'] = '抓取資料中...'
- time.sleep(1)
- res = load_104_newpeopleV1_0.load_104_newpeople_main(url, './' + filename + filetyoe, test = test)
- outputtext['text'] = '完成' + res + '\toutput file name is ' + filename + filetyoe
- load_example(filename)
- #(button) load file to choose
- def load_to_choose():
- filename = input_fname.get()
- outputtext['text'] = '載入 '+ filename + filetyoe +' 資料'
- load_example(filename)
- outputtext['text'] = '載入 '+ filename + filetyoe +' 資料完成'
- #(button) output my choose to file
- def output_choose():
- filename = input_fname.get()
- outputtext['text'] = '存入 '+ filename + filetyoe +'_choose 中...'
- for i in range(len(all_headlist)):
- #print(var[i].get())
- if var[i].get():
- newfile_title.append(i)
- creatfile(filename)
- outputtext['text'] = filename +'_choose' + filetyoe + '儲存完成'
- win=tk.Tk()
- win.title("load_104_newpeople_job")
- label_fname = tk.Label(win, text="匯出的檔案名稱: ") #建立標籤物件
- label_fname.grid(row=0, column=0)
- v1 = tk.StringVar(win, value='104_output') #預設文字方塊輸入值
- input_fname = tk.Entry(win, width = 100, textvariable=v1)
- input_fname.grid(row=0, column=1, columnspan=101, sticky = "W")
- labelurl = tk.Label(win, text="輸入 search url : ") #建立標籤物件
- labelurl.grid(row=1, column=0)
- v2 = tk.StringVar(win, value='https://www.104.com.tw/area/freshman/search?keyword=%E6%A9%9F%E5%99%A8%E4%BA%BA&area=6001001000,6001002000&jobcategory=2007000000&industry=&page=1&sortField=APPEAR_DATE&sortMode=DESC')
- inputurl = tk.Entry(win, width = 100, textvariable=v2)
- inputurl.grid(row=1, column=1, columnspan=101, sticky = "W")
- button = tk.Button(win)
- button['text'] = 'search'
- button['command'] = clickto_search
- button.grid(row=2, column=0)
- buttontest = tk.Button(win)
- buttontest['text'] = 'load file to choose'
- buttontest['command'] = load_to_choose
- buttontest.grid(row=2, column=1)
- buttonget = tk.Button(win)
- buttonget['text'] = 'output my choose'
- buttonget['command'] = output_choose
- buttonget.grid(row=2, column=2)
- outputtitle = tk.Label(win, text = "工作狀態")
- outputtitle.grid(row = 0, column = 5)
- outputtext = tk.Label(win, text = "output information")
- outputtext.grid(row = 1, column = 5)
- var = {}
- for i in range(0, len(all_headlist)):
- if i >= 26:
- row = i-26+3
- col = 4
- elif i <= 4:
- row = i+3
- col = 0
- else:
- row = i-5+3
- col = 2
- var[i] = tk.IntVar(value = default_check[i])
- checkb[i] = tk.Checkbutton(win, text = all_headlist[i]+':', height = 2, anchor = 'nw', variable = var[i]) #
- checkb[i].grid(row = row, column = 0 + col, sticky = "W")
- checktext[i] = tk.Label(win, width = 50, anchor = 'sw')
- checktext[i].grid(row = row, column = 1+col, sticky = "W")
- win.mainloop()
GUI
留言
張貼留言