from selenium import webdriver import time import pandas as pd import requests from bs4 import BeautifulSoup import nest_asyncio import asyncio import requests_html as rh import datetime import pandas as pd import json def f_write(driver): with open(r'C:\\BinOptions\\exemple_pars_tomorrow.txt', 'w',encoding="utf-8") as f: f.write(driver.page_source) def investingPars(path_file=None,dateDict=None): ans=None op=webdriver.FirefoxOptions() #op.add_argument("--headless") url='https://ru.investing.com/economic-calendar/' #driver = webdriver.Firefox(executable_path='C:\\Users\\Redsandy\\Downloads\\geckodriver.exe') EXE_PATH = r'C:\\Users\\Redsandy\\Downloads\\geckodriver.exe' driver = webdriver.Firefox(executable_path=EXE_PATH,options=op) try: driver.get(url=url) #time.sleep(5) #driver.find_element_by_id('timeFrame_tomorrow').click() #time.sleep(5) #f_write(driver) if dateDict!=None: driver.find_element_by_id('datePickerToggleBtn').click() driver.find_element_by_id("startDate").clear() driver.find_element_by_id("startDate").send_keys(dateDict['startDate']) driver.find_element_by_id("endDate").clear() driver.find_element_by_id("endDate").send_keys(dateDict['endDate']) #driver.find_element_by_id("startDate").send_keys("31/03/2022") driver.find_element_by_id('applyBtn').click() time.sleep(2) driver.execute_script("window.scrollTo(0,document.body.scrollHeight)") time.sleep(2) #f_write(driver) page_source=driver.page_source except Exception as ex: print(ex) finally: driver.close() driver.quit() path_file_csv=path_file ans=getDF(page_source,path_file_csv) return ans def getTime(strok): ans=None if len(strok.text.split())==2: ans = strok['title'].split()[-1] else: ans = strok.text return ans def getValat(strok): ans=None inc=0 for i in strok.find_all('i'): if i['class'][0]=='grayFullBullishIcon': inc=inc+1 ans= str(inc)+"/3" return(ans) def getUrl(strok): ans=None baseUrl='https://ru.investing.com' ans=baseUrl+strok.find_all('a', href=True)[0]['href'] return ans def getDF(doc,path_file): ans=None soup = BeautifulSoup(doc, 'html.parser') tabl=soup.find("table", {"id": "economicCalendarData"}).find('tbody') tdList=[] buff=[] for stats in tabl.find_all('tr'): for row in stats.find_all('td'): buff.append(row) tdList.append(buff) buff=[] col_names=['Время', 'Валюта', 'Важность','Событие','URL', 'Факт.','Прогноз','Пред.','Date'] df= pd.DataFrame(columns=col_names) for i in range(0, len(tdList)): if tdList[i][0]["class"][0]=='theDay': colDate = tdList[i][0].text[:-3] else: newRow={ col_names[0]:tdList[i][0].text, col_names[1]:tdList[i][1].text.strip(), col_names[2]:getValat(tdList[i][2]), col_names[3]:tdList[i][3].text.strip(), col_names[4]:getUrl(tdList[i][3]), col_names[5]:tdList[i][4].text.strip(), col_names[6]:tdList[i][5].text.strip(), col_names[7]:tdList[i][6].text.strip(), col_names[8]:colDate } df=df.append(newRow, ignore_index=True) if path_file!=None: df.to_csv(path_file, index=False, encoding='utf-8-sig') return df