diff --git a/News/InvestCal.py b/News/InvestCal.py new file mode 100644 index 0000000..64aea5f --- /dev/null +++ b/News/InvestCal.py @@ -0,0 +1,120 @@ +from selenium import webdriver +import time +import pandas as pd +import requests +from bs4 import BeautifulSoup +import nest_asyncio +import asyncio +import requests_html as rh +import datetime +import pandas as pd +import json + + +def f_write(driver): + with open(r'C:\\BinOptions\\exemple_pars_tomorrow.txt', 'w',encoding="utf-8") as f: + f.write(driver.page_source) + + + +def investingPars(path_file=None,dateDict=None): + ans=None + op=webdriver.FirefoxOptions() + #op.add_argument("--headless") + url='https://ru.investing.com/economic-calendar/' + #driver = webdriver.Firefox(executable_path='C:\\Users\\Redsandy\\Downloads\\geckodriver.exe') + EXE_PATH = r'C:\\Users\\Redsandy\\Downloads\\geckodriver.exe' + driver = webdriver.Firefox(executable_path=EXE_PATH,options=op) + try: + driver.get(url=url) + + #time.sleep(5) + #driver.find_element_by_id('timeFrame_tomorrow').click() + #time.sleep(5) + #f_write(driver) + if dateDict!=None: + driver.find_element_by_id('datePickerToggleBtn').click() + driver.find_element_by_id("startDate").clear() + driver.find_element_by_id("startDate").send_keys(dateDict['startDate']) + driver.find_element_by_id("endDate").clear() + driver.find_element_by_id("endDate").send_keys(dateDict['endDate']) + #driver.find_element_by_id("startDate").send_keys("31/03/2022") + driver.find_element_by_id('applyBtn').click() + time.sleep(2) + driver.execute_script("window.scrollTo(0,document.body.scrollHeight)") + time.sleep(2) + #f_write(driver) + page_source=driver.page_source + + except Exception as ex: + print(ex) + finally: + driver.close() + driver.quit() + + + path_file_csv=path_file + ans=getDF(page_source,path_file_csv) + return ans + + + +def getTime(strok): + ans=None + if len(strok.text.split())==2: + ans = strok['title'].split()[-1] + else: + ans = strok.text + return ans +def getValat(strok): + ans=None + inc=0 + for i in strok.find_all('i'): + if i['class'][0]=='grayFullBullishIcon': + inc=inc+1 + ans= str(inc)+"/3" + return(ans) +def getUrl(strok): + ans=None + baseUrl='https://ru.investing.com' + ans=baseUrl+strok.find_all('a', href=True)[0]['href'] + return ans +def getDF(doc,path_file): + ans=None + + soup = BeautifulSoup(doc, 'html.parser') + tabl=soup.find("table", {"id": "economicCalendarData"}).find('tbody') + tdList=[] + buff=[] + for stats in tabl.find_all('tr'): + for row in stats.find_all('td'): + buff.append(row) + tdList.append(buff) + buff=[] + col_names=['Время', 'Валюта', 'Важность','Событие','URL', 'Факт.','Прогноз','Пред.','Date'] + df= pd.DataFrame(columns=col_names) + + + for i in range(0, len(tdList)): + if tdList[i][0]["class"][0]=='theDay': + colDate = tdList[i][0].text[:-3] + else: + newRow={ + col_names[0]:tdList[i][0].text, + col_names[1]:tdList[i][1].text.strip(), + col_names[2]:getValat(tdList[i][2]), + col_names[3]:tdList[i][3].text.strip(), + col_names[4]:getUrl(tdList[i][3]), + col_names[5]:tdList[i][4].text.strip(), + col_names[6]:tdList[i][5].text.strip(), + col_names[7]:tdList[i][6].text.strip(), + col_names[8]:colDate + } + df=df.append(newRow, ignore_index=True) + if path_file!=None: + df.to_csv(path_file, index=False, encoding='utf-8-sig') + + + + return df +