Create InvestCal.py

2022-04-04 12:06:32 +03:00 · 2022-04-04 12:06:32 +03:00 · 00ee8ba939
commit 00ee8ba939
parent f63e57e140
1 changed files with 120 additions and 0 deletions
--- a/News/InvestCal.py
+++ b/News/InvestCal.py
@ -0,0 +1,120 @@
+from selenium import webdriver
+import time
+import pandas as pd
+import requests 
+from bs4 import BeautifulSoup 
+import nest_asyncio
+import asyncio
+import requests_html as rh
+import datetime 
+import pandas as pd
+import json
+
+
+def f_write(driver):
+    with open(r'C:\\BinOptions\\exemple_pars_tomorrow.txt', 'w',encoding="utf-8") as f:
+        f.write(driver.page_source)
+
+
+
+def investingPars(path_file=None,dateDict=None):
+    ans=None
+    op=webdriver.FirefoxOptions()
+    #op.add_argument("--headless")
+    url='https://ru.investing.com/economic-calendar/'
+    #driver = webdriver.Firefox(executable_path='C:\\Users\\Redsandy\\Downloads\\geckodriver.exe')
+    EXE_PATH = r'C:\\Users\\Redsandy\\Downloads\\geckodriver.exe'
+    driver = webdriver.Firefox(executable_path=EXE_PATH,options=op)
+    try:
+        driver.get(url=url)
+
+        #time.sleep(5)
+        #driver.find_element_by_id('timeFrame_tomorrow').click()
+        #time.sleep(5)
+        #f_write(driver)
+        if dateDict!=None:
+            driver.find_element_by_id('datePickerToggleBtn').click()
+            driver.find_element_by_id("startDate").clear()
+            driver.find_element_by_id("startDate").send_keys(dateDict['startDate'])
+            driver.find_element_by_id("endDate").clear()
+            driver.find_element_by_id("endDate").send_keys(dateDict['endDate'])
+            #driver.find_element_by_id("startDate").send_keys("31/03/2022")
+            driver.find_element_by_id('applyBtn').click()
+        time.sleep(2)
+        driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")
+        time.sleep(2)
+        #f_write(driver)
+        page_source=driver.page_source       
+        
+    except Exception as ex:
+        print(ex)
+    finally:
+        driver.close()
+        driver.quit()
+
+        
+    path_file_csv=path_file
+    ans=getDF(page_source,path_file_csv)
+    return ans
+
+
+
+def getTime(strok):
+    ans=None
+    if len(strok.text.split())==2:
+        ans = strok['title'].split()[-1]
+    else:
+        ans = strok.text
+    return ans
+def getValat(strok):
+    ans=None
+    inc=0
+    for i in strok.find_all('i'):
+        if i['class'][0]=='grayFullBullishIcon':
+            inc=inc+1
+    ans= str(inc)+"/3"
+    return(ans)
+def getUrl(strok):
+    ans=None
+    baseUrl='https://ru.investing.com'
+    ans=baseUrl+strok.find_all('a', href=True)[0]['href']
+    return ans
+def getDF(doc,path_file):
+    ans=None
+    
+    soup = BeautifulSoup(doc, 'html.parser')
+    tabl=soup.find("table", {"id": "economicCalendarData"}).find('tbody')
+    tdList=[]
+    buff=[]
+    for stats in tabl.find_all('tr'):
+        for row in stats.find_all('td'):
+            buff.append(row)
+        tdList.append(buff)
+        buff=[]
+        col_names=['Время', 'Валюта', 'Важность','Событие','URL', 'Факт.','Прогноз','Пред.','Date']
+        df= pd.DataFrame(columns=col_names)
+
+
+        for i in range(0, len(tdList)):
+            if tdList[i][0]["class"][0]=='theDay':
+                colDate = tdList[i][0].text[:-3]
+            else:
+                newRow={
+                    col_names[0]:tdList[i][0].text,
+                    col_names[1]:tdList[i][1].text.strip(),
+                    col_names[2]:getValat(tdList[i][2]),
+                    col_names[3]:tdList[i][3].text.strip(),
+                    col_names[4]:getUrl(tdList[i][3]),
+                    col_names[5]:tdList[i][4].text.strip(),
+                    col_names[6]:tdList[i][5].text.strip(),
+                    col_names[7]:tdList[i][6].text.strip(),
+                    col_names[8]:colDate
+                }
+                df=df.append(newRow, ignore_index=True)    
+    if path_file!=None:
+        df.to_csv(path_file, index=False, encoding='utf-8-sig')
+    
+    
+    
+    return df
+