marketTrade/market_trade/news/InvestCal.py

from selenium import webdriver
import time
import pandas as pd
import requests
from bs4 import BeautifulSoup
import nest_asyncio
import asyncio
import requests_html as rh
import datetime
import pandas as pd
import json


def f_write(driver):
    with open(r'C:\\BinOptions\\exemple_pars_tomorrow.txt', 'w',encoding="utf-8") as f:
        f.write(driver.page_source)


def investingPars(path_file=None,dateDict=None):
    ans=None
    op=webdriver.FirefoxOptions()
    #op.add_argument("--headless")
    url='https://ru.investing.com/economic-calendar/'
    #driver = webdriver.Firefox(executable_path='C:\\Users\\Redsandy\\Downloads\\geckodriver.exe')
    EXE_PATH = r'C:\\Users\\Redsandy\\Downloads\\geckodriver.exe'
    driver = webdriver.Firefox(executable_path=EXE_PATH,options=op)
    try:
        driver.get(url=url)

        #time.sleep(5)
        #driver.find_element_by_id('timeFrame_tomorrow').click()
        #time.sleep(5)
        #f_write(driver)
        if dateDict!=None:
            driver.find_element_by_id('datePickerToggleBtn').click()
            driver.find_element_by_id("startDate").clear()
            driver.find_element_by_id("startDate").send_keys(dateDict['startDate'])
            driver.find_element_by_id("endDate").clear()
            driver.find_element_by_id("endDate").send_keys(dateDict['endDate'])
            #driver.find_element_by_id("startDate").send_keys("31/03/2022")
            driver.find_element_by_id('applyBtn').click()
        time.sleep(2)
        driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")
        time.sleep(2)
        f_write(driver)
        page_source=driver.page_source

    except Exception as ex:
        print(ex)
    finally:
        driver.close()
        driver.quit()


    path_file_csv=path_file
    ans=getDF(page_source,path_file_csv)
    return ans


def getTime(strok):
    ans=None
    if len(strok.text.split())==2:
        ans = strok['title'].split()[-1]
    else:
        ans = strok.text
    return ans
def getValat(strok):
    ans=None
    inc=0
    for i in strok.find_all('i'):
        if i['class'][0]=='grayFullBullishIcon':
            inc=inc+1
    ans= str(inc)+"/3"
    return(ans)
def getUrl(strok):
    ans=None
    baseUrl='https://ru.investing.com'
    ans=baseUrl+strok.find_all('a', href=True)[0]['href']
    return ans
def getDF(doc,path_file):
    ans=None

    soup = BeautifulSoup(doc, 'html.parser')
    tabl=soup.find("table", {"id": "economicCalendarData"}).find('tbody')
    tdList=[]
    buff=[]
    for stats in tabl.find_all('tr'):
        for row in stats.find_all('td'):
            buff.append(row)
        tdList.append(buff)
        buff=[]
        col_names=['Время', 'Валюта', 'Важность','Событие','URL', 'Факт.','Прогноз','Пред.','Date']
        df= pd.DataFrame(columns=col_names)


        for i in range(0, len(tdList)):
            try:
                if tdList[i][0]["class"][0]=='theDay':
                    colDate = tdList[i][0].text[:-3]
                else:
                    newRow={
                        col_names[0]:getTime(tdList[i][0]),
                        col_names[1]:tdList[i][1].text.strip(),
                        col_names[2]:getValat(tdList[i][2]),
                        col_names[3]:tdList[i][3].text.strip(),
                        col_names[4]:getUrl(tdList[i][3]),
                        col_names[5]:tdList[i][4].text.strip(),
                        col_names[6]:tdList[i][5].text.strip(),
                        col_names[7]:tdList[i][6].text.strip(),
                        col_names[8]:colDate
                    }
                    df=df.append(newRow, ignore_index=True)
            except:
                pass
    if path_file!=None:
        df.to_csv(path_file, index=False, encoding='utf-8-sig')


    return df