Stock/stock_price.py

85 lines
2.8 KiB
Python
Raw Permalink Normal View History

2022-05-25 03:16:39 +09:00
import pandas as pd
import bs4
import requests
import re
import multiprocessing as mp
import sqlite3
import datetime
def get_naver_finance_price(code,page=1):
url = 'https://finance.naver.com/item/sise_day.nhn'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"}
html = requests.get(url,params={'code':code,'page':page},headers=headers)
if html.status_code != 200:
raise UserWarning(html.status_code)
return html.text
stock_h = ['날짜','종가','전일비','시가','고가','저가','거래량']
def get_data(soup,date):
nums = soup.select(".tah")
i = 0
ret=[pd.DataFrame(columns=stock_h)]
nums = [*map(lambda x:x.text.replace(',','').strip(),nums)]
while True:
m = nums[i:(i+7)]
if not m:
break
#for ISO 8601
m[0] = m[0].replace(".","-")
#date
if m[0] <= date:
return pd.concat(ret,ignore_index=True),True
ret.append(pd.DataFrame([m],columns=stock_h))
i += 7
return pd.concat(ret,ignore_index=True),False
def get_last_page(soup):
a = soup.select_one('.pgRR a')
if a is None:
index_list = soup.select('td a')
return len(index_list)
href = a.attrs['href']
p = re.compile(r"page=(\d*)")
g = p.search(href)
return g.groups()[0]
def croll_naver_page(code,page,date):
html_text = get_naver_finance_price(code,page)
soup = bs4.BeautifulSoup(html_text,'html.parser')
return get_data(soup,date)
def croll_naver_page_all(code,date) -> pd.DataFrame:
html_text = get_naver_finance_price(code)
s = bs4.BeautifulSoup(html_text,'html.parser')
last = int(get_last_page(s))
r = [(code,i) for i in range(1,last+1)]
retdata = []
for c,pagenum in r:
d,is_end = croll_naver_page(c,pagenum,date)
if is_end:
retdata.append(d)
break
retdata.append(d)
if len(retdata) == 0:
return []
return pd.concat(retdata,ignore_index=True)
def toSqlPos(x,code):
return (code,x["날짜"],x["종가"],x["전일비"],x["시가"],x["고가"],x["저가"],x["거래량"])
if __name__ == '__main__':
db = sqlite3.connect("stock.db")
today = datetime.date.today()
krx_stock_rows = [(i,code,last_update) for i,(code,last_update) in enumerate(db.execute("""SELECT Code,LastUpdate From KRXCorp"""))]
total = len(krx_stock_rows)
for i,code,last_update in krx_stock_rows:
print(f"{total}/{i}: code {code} : {last_update}")
if last_update == today.isoformat():
continue
d = croll_naver_page_all(code,last_update)
cursor = db.cursor()
if len(d)> 0:
cursor.executemany("INSERT INTO STOCK (Code,Date,Close,Diff,Open,High,Low,Volume) VALUES (?,?,?,?,?,?,?,?)",[toSqlPos(x,code) for i,x in d.iterrows() ])
cursor.execute("""UPDATE KRXCorp Set LastUpdate = ? WHERE Code = ?""",(today.isoformat(),code))
db.commit()