import sys import time import re from urllib import request, error, parse from urllib.parse import urlencode from bs4 import BeautifulSoup import re import asyncio from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.desired_capabilities import DesiredCapabilities testurl = "SomeURL" def remove_emoji(target_string): emoji_pattern = re.compile("[" u"\U0001F600-\U0001F64F" # emoticons u"\U0001F300-\U0001F5FF" # symbols & pictographs u"\U0001F680-\U0001F6FF" # transport & map symbols u"\U0001F1E0-\U0001F1FF" # flags (iOS) "]+", flags=re.UNICODE) return remove_string(emoji_pattern, target_string) def remove_string(target_pattern, target_string): result = target_string try: result = re.sub(target_pattern, '', target_string) except Exception as msg: print("emoji:"+msg) return result ''' title_is title_contains presence_of_element_located visibility_of_element_located visibility_of presence_of_all_elements_located text_to_be_present_in_element text_to_be_present_in_element_value frame_to_be_available_and_switch_to_it invisibility_of_element_located element_to_be_clickable staleness_of element_to_be_selected element_located_to_be_selected element_selection_state_to_be element_located_selection_state_to_be alert_is_present ''' def get_Intro( url='http://google.com' , waitCondition = EC.invisibility_of_element_located((By.CLASS_NAME, 'day-list-progress')) ): dcap = dict(DesiredCapabilities.PHANTOMJS) dcap["phantomjs.page.settings.userAgent"] = ( "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53 " "(KHTML, like Gecko) Chrome/15.0.87" ) driver = webdriver.PhantomJS('D:/lib/phantomjs-2.1.1-windows/phantomjs-2.1.1-windows/bin/phantomjs',desired_capabilities=dcap) print('CralwStart:'+url) #driver.implicitly_wait(1) # seconds driver.get(url) wait = WebDriverWait(driver, 20) element = wait.until( waitCondition ) time.sleep(15) #15초 기다림 return driver driver = get_Intro(url=testurl,waitCondition = EC.invisibility_of_element_located((By.CLASS_NAME, 'day-list-progress'))) html = remove_emoji(driver.page_source) print('DownCompleted size:'+ str( len(html) ) ) soup = BeautifulSoup(html, 'html.parser') strminpriceList = "day-list" eleMinPriceList = soup.find('ul',class_='day-list') btnsStep1List = soup.find_all('button',class_='bpk-button') print(btnsStep1List) xxxxx=999999 print(eleMinPriceList.prettify() ) driver.quit() ''' @asyncio.coroutine def periodic(): while True: print('periodic') yield from asyncio.sleep(1) def stop(): task.cancel() task = asyncio.Task(periodic()) loop = asyncio.get_event_loop() loop.call_later(5, stop) try: loop.run_until_complete(task) except asyncio.CancelledError: pass except KeyboardInterrupt: pass '''