import sys
import time
import re
from urllib import request, error, parse
from urllib.parse import urlencode
from bs4 import BeautifulSoup
import re

import asyncio
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities


testurl = "SomeURL"


def remove_emoji(target_string):
    emoji_pattern = re.compile("["
            u"\U0001F600-\U0001F64F"  # emoticons
            u"\U0001F300-\U0001F5FF"  # symbols & pictographs
            u"\U0001F680-\U0001F6FF"  # transport & map symbols
            u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                "]+", flags=re.UNICODE)
    return remove_string(emoji_pattern, target_string)

def remove_string(target_pattern, target_string):
    result = target_string
    try:
       result = re.sub(target_pattern, '', target_string)
    except Exception as msg:
        print("emoji:"+msg)
    return result


'''
title_is
title_contains
presence_of_element_located
visibility_of_element_located
visibility_of
presence_of_all_elements_located
text_to_be_present_in_element
text_to_be_present_in_element_value
frame_to_be_available_and_switch_to_it
invisibility_of_element_located
element_to_be_clickable
staleness_of
element_to_be_selected
element_located_to_be_selected
element_selection_state_to_be
element_located_selection_state_to_be
alert_is_present
'''

def get_Intro( url='http://google.com' , waitCondition = EC.invisibility_of_element_located((By.CLASS_NAME, 'day-list-progress')) ):
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.settings.userAgent"] = (
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53 "
        "(KHTML, like Gecko) Chrome/15.0.87"
    )
    driver = webdriver.PhantomJS('D:/lib/phantomjs-2.1.1-windows/phantomjs-2.1.1-windows/bin/phantomjs',desired_capabilities=dcap)

    print('CralwStart:'+url)
    #driver.implicitly_wait(1) # seconds
    driver.get(url)
    wait = WebDriverWait(driver, 20)
    element = wait.until( waitCondition )
    time.sleep(15)   #15초 기다림    
    return driver

driver = get_Intro(url=testurl,waitCondition = EC.invisibility_of_element_located((By.CLASS_NAME, 'day-list-progress')))
html = remove_emoji(driver.page_source)
print('DownCompleted size:'+ str( len(html) ) )
soup = BeautifulSoup(html, 'html.parser')
strminpriceList = "day-list"

eleMinPriceList = soup.find('ul',class_='day-list')

btnsStep1List = soup.find_all('button',class_='bpk-button')

print(btnsStep1List)

xxxxx=999999

print(eleMinPriceList.prettify() )

driver.quit()

'''
@asyncio.coroutine
def periodic():
    while True:
        print('periodic')
        yield from asyncio.sleep(1)

def stop():
    task.cancel()

task = asyncio.Task(periodic())
loop = asyncio.get_event_loop()
loop.call_later(5, stop)

try:
    loop.run_until_complete(task)
except asyncio.CancelledError:
    pass
except KeyboardInterrupt:
    pass
'''
  • No labels