Skip to content

Commit

Permalink
✨ news sentiment classify OpenAI-davinci
Browse files Browse the repository at this point in the history
  • Loading branch information
Yonv1943 authored Feb 21, 2023
1 parent aa4bc08 commit 37f1064
Showing 1 changed file with 63 additions and 273 deletions.
336 changes: 63 additions & 273 deletions demo/shares_news_sentiment_classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@
import pandas as pd
import tushare as ts

import os
import openai


def get_news_from_tushare(api_key: str, data_path: str = 'finance_news_from_tushare.csv') -> str:
start_date = '2023-01-01'
end_date = '2023-01-02'
limit_line = 4
start_date = '2023-02-01'
end_date = '2023-02-02'
limit_line = 200
if_news_or_reports = False

if os.path.exists(data_path):
Expand All @@ -38,29 +41,25 @@ def get_news_from_tushare(api_key: str, data_path: str = 'finance_news_from_tush
"limit": limit_line,
"offset": 0
}, fields=[
"datetime"
"datetime",
"title",
"content",
])

df.to_csv(data_path)
print(df)

max_num_news = 4
max_len_title = 32
max_len_content = 64
data_str = "The finance news:\n"
for i in df.index:
max_len_content = 0
data_str = ""
for i in df.index[:max_num_news]:
row = df.iloc[i]
title = row['title'][1:max_len_title]
content = row['content'][:max_len_content]
data_str += f"TITLE: {title}...\tCONTENT: {content}\n"
data_str += f"{title}, {content}\n"
return data_str


# get_news_from_tushare(api_key='396edc19585416fbe4ec5115240821c07d435fac759f0ab95e95d816')
# exit()


def get_news_from_market_aux(api_key: str, data_path: str = 'finance_news_from_market_aux.txt'):
limit_line = 4

Expand All @@ -86,277 +85,68 @@ def get_news_from_market_aux(api_key: str, data_path: str = 'finance_news_from_m
data = conn.getresponse()
data = data.read().decode('utf-8')
data = json.loads(data)
data = json.dumps(data, indent=2)
with open(data_path, 'w') as f:
f.write(data)
f.write(json.dumps(data, indent=2))

assert isinstance(data, dict)

'''concert dict to string (Title: ... Content: ...)'''
max_num_news = 8
max_len_title = 32 * 4
max_len_content = 64 * 4
max_len_content = 0 * 4

data = data['data']

data_str = "The finance news:\n"
for item in data:
title = item['title'][1:max_len_title]
data_str = ""
for item in data[:max_num_news]:
title = item['title'][:max_len_title]
content = item['description'][:max_len_content]
data_str += f"TITLE: {title}...\tCONTENT: {content}\n"
print(data_str)
data_str += f"{title}, {content}\n"
return data_str


get_news_from_market_aux(api_key='XeR26870lukqSlfiPpb7r4KoxoUTK4WZj6In835C')
exit()
def get_result_from_openai_davinci(api_key: str, prompt_str: str):
max_tokens = 64

# openai.api_key =
openai.api_key = api_key # os.getenv("OPENAI_API_KEY")

import atexit
import base64
import json
import operator
import time
import uuid
import os
import shutil
from functools import reduce
from time import sleep
from typing import Optional
from playwright.sync_api import sync_playwright
from playwright._impl._api_structures import ProxySettings


class ChatGPT:
"""
A ChatGPT interface that uses Playwright to run a browser,
and interacts with that browser to communicate with ChatGPT in
order to provide an open API to ChatGPT.
"""

stream_div_id = "chatgpt-wrapper-conversation-stream-data"
eof_div_id = "chatgpt-wrapper-conversation-stream-data-eof"
session_div_id = "chatgpt-wrapper-session-data"

def __init__(self, headless: bool = True, browser="firefox", timeout=60, proxy: Optional[ProxySettings] = None):
self.play = sync_playwright().start()

try:
playbrowser = getattr(self.play, browser)
except Exception:
print(f"Browser {browser} is invalid, falling back on firefox")
playbrowser = self.play.firefox
try:
self.browser = playbrowser.launch_persistent_context(
user_data_dir="/tmp/playwright",
headless=headless,
proxy=proxy,
)
except Exception:
self.user_data_dir = f"/tmp/{str(uuid.uuid4())}"
shutil.copytree("/tmp/playwright", self.user_data_dir)
self.browser = playbrowser.launch_persistent_context(
user_data_dir=self.user_data_dir,
headless=headless,
proxy=proxy,
)

if len(self.browser.pages) > 0:
self.page = self.browser.pages[0]
else:
self.page = self.browser.new_page()
self._start_browser()
self.parent_message_id = str(uuid.uuid4())
self.conversation_id = None
self.session = None
self.timeout = timeout
atexit.register(self._cleanup)

def _start_browser(self):
self.page.goto("https://chat.openai.com/")

def _cleanup(self):
self.browser.close()
# remove the user data dir in case this is a second instance
if hasattr(self, "user_data_dir"):
shutil.rmtree(self.user_data_dir)
self.play.stop()

def refresh_session(self):
self.page.evaluate(
"""
const xhr = new XMLHttpRequest();
xhr.open('GET', 'https://chat.openai.com/api/auth/session');
xhr.onload = () => {
if(xhr.status == 200) {
var mydiv = document.createElement('DIV');
mydiv.id = "SESSION_DIV_ID"
mydiv.innerHTML = xhr.responseText;
document.body.appendChild(mydiv);
}
};
xhr.send();
""".replace(
"SESSION_DIV_ID", self.session_div_id
)
)

while True:
session_datas = self.page.query_selector_all(f"div#{self.session_div_id}")
if len(session_datas) > 0:
break
sleep(0.2)

session_data = json.loads(session_datas[0].inner_text())
self.session = session_data

self.page.evaluate(f"document.getElementById('{self.session_div_id}').remove()")

def _cleanup_divs(self):
self.page.evaluate(f"document.getElementById('{self.stream_div_id}').remove()")
self.page.evaluate(f"document.getElementById('{self.eof_div_id}').remove()")

def ask_stream(self, prompt: str):
if self.session is None:
self.refresh_session()

new_message_id = str(uuid.uuid4())

if "accessToken" not in self.session:
yield (
"Your ChatGPT session is not usable.\n"
"* Run this program with the `install` parameter and log in to ChatGPT.\n"
"* If you think you are already logged in, try running the `session` command."
)
return

request = {
"messages": [
{
"id": new_message_id,
"role": "user",
"content": {"content_type": "text", "parts": [prompt]},
}
],
"model": "text-davinci-002-render-sha",
"conversation_id": self.conversation_id,
"parent_message_id": self.parent_message_id,
"action": "next",
}

code = (
"""
const stream_div = document.createElement('DIV');
stream_div.id = "STREAM_DIV_ID";
document.body.appendChild(stream_div);
const xhr = new XMLHttpRequest();
xhr.open('POST', 'https://chat.openai.com/backend-api/conversation');
xhr.setRequestHeader('Accept', 'text/event-stream');
xhr.setRequestHeader('Content-Type', 'application/json');
xhr.setRequestHeader('Authorization', 'Bearer BEARER_TOKEN');
xhr.responseType = 'stream';
xhr.onreadystatechange = function() {
var newEvent;
if(xhr.readyState == 3 || xhr.readyState == 4) {
const newData = xhr.response.substr(xhr.seenBytes);
try {
const newEvents = newData.split(/\\n\\n/).reverse();
newEvents.shift();
if(newEvents[0] == "data: [DONE]") {
newEvents.shift();
}
if(newEvents.length > 0) {
newEvent = newEvents[0].substring(6);
// using XHR for eventstream sucks and occasionally ive seen incomplete
// json objects come through JSON.parse will throw if that happens, and
// that should just skip until we get a full response.
JSON.parse(newEvent);
}
} catch (err) {
console.log(err);
newEvent = undefined;
}
if(newEvent !== undefined) {
stream_div.innerHTML = btoa(newEvent);
xhr.seenBytes = xhr.responseText.length;
}
}
if(xhr.readyState == 4) {
const eof_div = document.createElement('DIV');
eof_div.id = "EOF_DIV_ID";
document.body.appendChild(eof_div);
}
};
xhr.send(JSON.stringify(REQUEST_JSON));
""".replace(
"BEARER_TOKEN", self.session["accessToken"]
)
.replace("REQUEST_JSON", json.dumps(request))
.replace("STREAM_DIV_ID", self.stream_div_id)
.replace("EOF_DIV_ID", self.eof_div_id)
)

self.page.evaluate(code)

last_event_msg = ""
start_time = time.time()
while True:
eof_datas = self.page.query_selector_all(f"div#{self.eof_div_id}")

conversation_datas = self.page.query_selector_all(
f"div#{self.stream_div_id}"
)
if len(conversation_datas) == 0:
continue

full_event_message = None

try:
event_raw = base64.b64decode(conversation_datas[0].inner_html())
if len(event_raw) > 0:
event = json.loads(event_raw)
if event is not None:
self.parent_message_id = event["message"]["id"]
self.conversation_id = event["conversation_id"]
full_event_message = "\n".join(
event["message"]["content"]["parts"]
)
except Exception:
yield (
"Failed to read response from ChatGPT. Tips:\n"
" * Try again. ChatGPT can be flaky.\n"
" * Use the `session` command to refresh your session, and then try again.\n"
" * Restart the program in the `install` mode and make sure you are logged in."
)
break

if full_event_message is not None:
chunk = full_event_message[len(last_event_msg):]
last_event_msg = full_event_message
yield chunk

# if we saw the eof signal, this was the last event we
# should process and we are done
if len(eof_datas) > 0 or (((time.time() - start_time) > self.timeout) and full_event_message is None):
break

sleep(0.2)

self._cleanup_divs()

def ask(self, message: str) -> str:
"""
Send a message to chatGPT and return the response.
Args:
message (str): The message to send.
Returns:
str: The response received from OpenAI.
"""
response = list(self.ask_stream(message))
return (
reduce(operator.add, response)
if len(response) > 0
else "Unusable response produced, maybe login session expired. Try 'pkill firefox' and 'chatgpt install'"
)

def new_conversation(self):
self.parent_message_id = str(uuid.uuid4())
self.conversation_id = None
response = openai.Completion.create(
model="text-davinci-003",
prompt=prompt_str,
temperature=0,
max_tokens=max_tokens,
top_p=1,
frequency_penalty=0,
presence_penalty=0
)
return response


API_KEY_Tushare = '' # https://www.tushare.pro/user/token
API_KEY_MarketAUX = '' # https://www.marketaux.com/account/dashboard
API_KEY_OpenAI = '' # https://platform.openai.com/account/api-keys


def run_news_in_chinese():
prompt_str = "读下方新闻,列举3个可能受影响的美国股票,分别简短地判断积极或消极:\n\n"
prompt_str += get_news_from_tushare(api_key=API_KEY_Tushare)
print(f"\n====\n{prompt_str}")

result_str = get_result_from_openai_davinci(api_key=API_KEY_OpenAI, prompt_str=prompt_str)
print(f"\n====\n{result_str}")


def run_news_in_english():
prompt_str = "Read following news and list 3 stocks that may be affected, " \
"briefly judge each one positively or negatively:\n\n"
prompt_str += get_news_from_market_aux(api_key=API_KEY_MarketAUX)
print(f"\n====\n{prompt_str}")

result_str = get_result_from_openai_davinci(api_key=API_KEY_OpenAI, prompt_str=prompt_str)
print(f"\n====\n{result_str}")


if __name__ == '__main__':
run_news_in_chinese()
run_news_in_english()

0 comments on commit 37f1064

Please sign in to comment.