Skip to content

Commit

Permalink
美股粉单,默认存贮改为csv,股票搜索接口
Browse files Browse the repository at this point in the history
  • Loading branch information
bbfamily committed Sep 9, 2017
1 parent 2f61cd1 commit 179a267
Show file tree
Hide file tree
Showing 41 changed files with 609 additions and 197 deletions.
17 changes: 15 additions & 2 deletions abupy/CoreBu/ABuEnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,17 @@ class EMarketSubType(Enum):
US_N = 'NYSE'
"""美股纳斯达克NASDAQ"""
US_OQ = 'NASDAQ'
"""美股粉单市场"""
US_PINK = 'PINK'
"""美股OTCMKTS"""
US_OTC = 'OTCMKTS'
"""未上市"""
US_PREIPO = 'PREIPO'


"""港股hk"""
HK = 'hk'

"""上证交易所sh"""
SH = 'sh'
"""深圳交易所sz"""
Expand Down Expand Up @@ -335,8 +344,12 @@ class EDataCacheType(Enum):
E_DATA_CACHE_MONGODB = 2


"""默认金融时间序列数据缓存类型为HDF5,单机固态硬盘推荐HDF5,非固态硬盘使用CSV,否则量大后hdf5写入速度无法接受"""
g_data_cache_type = EDataCacheType.E_DATA_CACHE_HDF5
# """默认金融时间序列数据缓存类型为HDF5,单机固态硬盘推荐HDF5,非固态硬盘使用CSV,否则量大后hdf5写入速度无法接受"""
# g_data_cache_type = EDataCacheType.E_DATA_CACHE_HDF5
"""对外版本由于用户电脑性能,存储空间且winodws用户,python2用户多,所以更改默认存储类型为csv"""
g_data_cache_type = EDataCacheType.E_DATA_CACHE_CSV


"""csv模式下的存储路径"""
g_project_kl_df_data_csv = path.join(g_project_data_dir, 'csv')

Expand Down
2 changes: 1 addition & 1 deletion abupy/FactorBuyBu/ABuFactorBuyWD.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- encoding:utf-8 -*-
"""
买入择时示例因子:黄金分割线买入择时因子
买入择时示例因子:日胜率均值买入择时因子
"""

from __future__ import absolute_import
Expand Down
69 changes: 48 additions & 21 deletions abupy/MarketBu/ABuDataFeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from ..MarketBu.ABuDataParser import BDParser, TXParser, NTParser, SNUSParser
from ..MarketBu.ABuDataParser import SNFuturesParser, SNFuturesGBParser, HBTCParser
from ..UtilBu import ABuStrUtil, ABuDateUtil, ABuMd5
from ..UtilBu.ABuDTUtil import catch_error
from ..CoreBu.ABuDeprecated import AbuDeprecated
# noinspection PyUnresolvedReferences
from ..CoreBu.ABuFixes import xrange, range, filter
Expand All @@ -38,6 +39,49 @@ def random_from_list(array):
return array[random.randrange(0, len(array))]


@AbuDeprecated('only read old symbol db, miss update!!!')
def query_symbol_sub_market(symbol):
path = TXApi.K_SYMBOLS_DB
conn = sqlite.connect(path)
cur = conn.cursor()
symbol = symbol.lower()
query = "select {} from {} where {} like \'{}.%\'".format(TXApi.K_DB_TABLE_SN, TXApi.K_DB_TABLE_NAME,
TXApi.K_DB_TABLE_SN, symbol)
cur.execute(query)
results = cur.fetchall()
conn.close()
sub_market = ''
if results is not None and len(results) > 0:
try:
if results[0][0].find('.') > 0:
sub_market = '.' + results[0][0].split('.')[1].upper()
except:
logging.info(results)
return sub_market


@catch_error(return_val=None, log=False)
def query_symbol_from_pinyin(pinyin):
"""通过拼音对symbol进行模糊查询"""
path = TXApi.K_SYMBOLS_DB
conn = sqlite.connect(path)
cur = conn.cursor()
pinyin = pinyin.lower()
query = "select stockCode from {} where pinyin=\'{}\'".format(TXApi.K_DB_TABLE_NAME, pinyin)
cur.execute(query)
results = cur.fetchall()
conn.close()
if len(results) > 0:
code = results[0][0]
# 查询到的stcok code eg:sh111111,usabcd.n
start = 2
end = len(code)
if '.' in code:
# 如果是美股要截取.
end = code.find('.')
return code[start:end]


class BDApi(StockBaseMarket, SupportMixin):
"""bd数据源,支持港股,美股,a股"""

Expand Down Expand Up @@ -132,26 +176,6 @@ def __init__(self, symbol):
# 设置数据源解析对象类
self.data_parser_cls = TXParser

@AbuDeprecated('only read old symbol db, miss update!!!')
def query_symbol_sub_market(self, symbol):
path = TXApi.K_SYMBOLS_DB
conn = sqlite.connect(path)
cur = conn.cursor()
symbol = symbol.lower()
query = "select {} from {} where {} like \'{}.%\'".format(TXApi.K_DB_TABLE_SN, TXApi.K_DB_TABLE_NAME,
TXApi.K_DB_TABLE_SN, symbol)
cur.execute(query)
results = cur.fetchall()
conn.close()
sub_market = ''
if results is not None and len(results) > 0:
try:
if results[0][0].find('.') > 0:
sub_market = '.' + results[0][0].split('.')[1].upper()
except:
logging.info(results)
return sub_market

def kline(self, n_folds=2, start=None, end=None):
"""日k线接口"""
cuid = ABuStrUtil.create_random_with_num_low(40)
Expand All @@ -175,7 +199,10 @@ def kline(self, n_folds=2, start=None, end=None):
# 如果已经有.了说明是大盘,大盘不需要子市场,eg:us.IXIC
sub_market = ''
else:
sub_market_map = {EMarketSubType.US_N.value: 'n', EMarketSubType.US_OQ.value: 'oq'}
# 这里tx的source不支持US_PINK, US_OTC, US_PREIPO
sub_market_map = {EMarketSubType.US_N.value: 'n', EMarketSubType.US_PINK.value: 'n',
EMarketSubType.US_OTC.value: 'n', EMarketSubType.US_PREIPO.value: 'n',
EMarketSubType.US_OQ.value: 'oq'}
sub_market = '.{}'.format(sub_market_map[self._symbol.sub_market.value])
url = TXApi.K_NET_BASE % (
market, self._symbol.value + sub_market, days,
Expand Down
14 changes: 8 additions & 6 deletions abupy/MarketBu/ABuMarket.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@
"""在market_train_test_split函数中,切割的训练集交易symbol,本地序列化存储路径的基础路径名"""
K_MARKET_TRAIN_FN_BASE = os.path.join(ABuEnv.g_project_cache_dir, 'market_train_symbols')

# TODO 从沙盒数据库里读取,否则之后有变动还需要跟着改
K_SAND_BOX_US = ['usTSLA', 'usNOAH', 'usSFUN', 'usBIDU', 'usAAPL', 'usGOOG', 'usWUBA', 'usVIPS']
K_SAND_BOX_CN = ['002230', '300104', '300059', '601766', '600085', '600036', '600809', '000002', '002594', '002739']
K_SAND_BOX_HK = ['hk03333', 'hk00700', 'hk02333', 'hk01359', 'hk00656', 'hk03888', 'hk02318']


# noinspection PyUnresolvedReferences
class MarketMixin(object):
Expand Down Expand Up @@ -140,8 +145,7 @@ def _all_us_symbol(index=False):

# noinspection PyProtectedMember
if ABuEnv._g_enable_example_env_ipython:
# TODO 从沙盒数据库里读取,否则之后有变动还需要跟着改
return ['usTSLA', 'usNOAH', 'usSFUN', 'usBIDU', 'usAAPL', 'usGOOG', 'usWUBA', 'usVIPS']
return K_SAND_BOX_US
return AbuSymbolUS().all_symbol(index=index)


Expand All @@ -153,8 +157,7 @@ def _all_cn_symbol(index=False):
"""
# noinspection PyProtectedMember
if ABuEnv._g_enable_example_env_ipython:
# TODO 从沙盒数据库里读取,否则之后有变动还需要跟着改
return ['002230', '300104', '300059', '601766', '600085', '600036', '600809', '000002', '002594', '002739']
return K_SAND_BOX_CN
return AbuSymbolCN().all_symbol(index=index)


Expand All @@ -166,8 +169,7 @@ def _all_hk_symbol(index=False):
"""
# noinspection PyProtectedMember
if ABuEnv._g_enable_example_env_ipython:
# TODO 从沙盒数据库里读取,否则之后有变动还需要跟着改
return ['hk03333', 'hk00700', 'hk02333', 'hk01359', 'hk00656', 'hk03888', 'hk02318']
return K_SAND_BOX_HK
return AbuSymbolHK().all_symbol(index=index)


Expand Down
149 changes: 146 additions & 3 deletions abupy/MarketBu/ABuSymbol.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,22 @@
from __future__ import absolute_import
from __future__ import division

from fnmatch import fnmatch

import numpy as np

from ..CoreBu.ABuEnv import EMarketTargetType, EMarketSubType
from ..CoreBu.ABuFixes import six
from ..UtilBu.ABuLazyUtil import LazyFunc


# noinspection PyProtectedMember
def code_to_symbol(code):
def code_to_symbol(code, rs=True):
"""
解析code成Symbol,如果code中带有市场编码直接用该市场,否则进行查询所属市场信息,
如果最后也没有发现symbol所在的市场,会向外raise ValueError
:param code: str对象,代码 如:300104,sz300104,usTSLA
:param rs: 没有匹配上是否对外抛异常,默认True
:return: Symbol对象
"""
from ..MarketBu.ABuSymbolFutures import AbuFuturesCn, AbuFuturesGB
Expand Down Expand Up @@ -64,7 +69,8 @@ def code_to_symbol(code):
market = EMarketTargetType.E_MARKET_TARGET_HK
sub_market = EMarketSubType.HK
return Symbol(market, sub_market, code)
raise TypeError('cn symbol len = 6, hk symbol len = 5')
if rs:
raise TypeError('cn symbol len = 6, hk symbol len = 5')
elif code.isalpha() and code in Symbol.HK_INDEX:
# 全字母且匹配港股大盘'HSI', 'HSCEI', 'HSCCI'
market = EMarketTargetType.E_MARKET_TARGET_HK
Expand Down Expand Up @@ -101,7 +107,144 @@ def code_to_symbol(code):
market = EMarketTargetType.E_MARKET_TARGET_FUTURES_CN
return Symbol(market, sub_market, futures_code)

raise ValueError('arg code :{} format dt support'.format(code))
if rs:
raise ValueError('arg code :{} format dt support'.format(code))


def __search(market_df, search_match, search_code, search_result, match_key='co_name'):
"""具体搜索执行接口"""

def __search_whole_code(_match_code):
_sc_df = market_df[market_df.symbol == _match_code]
if not _sc_df.empty:
search_result[_sc_df.symbol.values[0]] = _sc_df[match_key].values[0]
return True
return False

def __search_pinyin_code(_match_code):
from ..MarketBu.ABuDataFeed import query_symbol_from_pinyin
# 使用query_symbol_from_pinyin对模糊拼音进行查询
pinyin_symbol = query_symbol_from_pinyin(_match_code)
if pinyin_symbol is not None:
# 需要把拼音code标准化为可查询的code
search_symbol = code_to_symbol(pinyin_symbol, rs=False)
if search_symbol is not None:
_search_code = search_symbol.symbol_code
sc_df = market_df[market_df.symbol == _search_code]
if not sc_df.empty:
search_result[sc_df.symbol.values[0]] = sc_df[match_key].values[0]

def __search_fnmatch_info(_search_match):
# 模糊匹配公司名称信息或者交易产品信息
mc_df = market_df[market_df[match_key].apply(lambda name: fnmatch(name, _search_match))]
if not mc_df.empty:
for ind in np.arange(0, len(mc_df)):
mcs = mc_df.iloc[ind]
search_result[mcs.symbol] = mcs[match_key]

# 首先全匹配search_code
if not __search_whole_code(search_code):
# 如果search_code没有能全匹配成功,使用拼音进行匹配一次
__search_pinyin_code(search_code)
# 模糊匹配公司名称或者产品等信息symbol
__search_fnmatch_info(search_match)


def _us_search(search_match, search_code, search_result):
"""美股市场symbol关键字搜索"""
from ..MarketBu.ABuSymbolStock import AbuSymbolUS
__search(AbuSymbolUS().df, search_match, search_code, search_result)


def _cn_search(search_match, search_code, search_result):
"""a股市场symbol关键字搜索"""
from ..MarketBu.ABuSymbolStock import AbuSymbolCN
__search(AbuSymbolCN().df, search_match, search_code, search_result)


def _hk_search(search_match, search_code, search_result):
"""港股市场symbol关键字搜索"""
from ..MarketBu.ABuSymbolStock import AbuSymbolHK
__search(AbuSymbolHK().df, search_match, search_code, search_result)


def _fcn_search(search_match, search_code, search_result):
"""国内期货symbol关键字搜索"""
from ..MarketBu.ABuSymbolFutures import AbuFuturesCn
__search(AbuFuturesCn().futures_cn_df,
search_match, search_code, search_result, match_key='product')


def _fgb_search(search_match, search_code, search_result):
"""国际期货symbol关键字搜索"""
from ..MarketBu.ABuSymbolFutures import AbuFuturesGB
__search(AbuFuturesGB().futures_gb_df,
search_match, search_code, search_result, match_key='product')


# TODO 币类匹配统一标准规范
def _tc_search(search_match, search_code, search_result):
if fnmatch('比特币', search_match) or 'btc' == search_code:
search_result['btc'] = '比特币'
if fnmatch('莱特币', search_match) or 'ltc' == search_code:
search_result['ltc'] = '莱特币'


def search_to_symbol_dict(search):
"""
symbol搜索对外接口,全匹配symbol code,拼音匹配symbol,别名匹配,模糊匹配公司名称,产品名称等信息
eg:
in:
search_to_symbol_dict('黄金')
out:
{'002155': '湖南黄金',
'600489': '中金黄金',
'600547': '山东黄金',
'600766': '园城黄金',
'600988': '赤峰黄金',
'ABX': '巴里克黄金',
'AU0': '黄金',
'DGL': '黄金基金-PowerShares',
'DGLD': '黄金3X做空-VelocityShares',
'DGP': '黄金2X做多-DB',
'DGZ': '黄金做空-PowerShares',
'DZZ': '黄金2X做空-DB',
'EGO': '埃尔拉多黄金公司',
'GC': '纽约黄金',
'GEUR': 'Gartman欧元黄金ETF-AdvisorShares ',
'GLD': '黄金ETF-SPDR',
'GLL': '黄金2X做空-ProShares',
'GYEN': 'Gartman日元黄金ETF-AdvisorShares',
'HMY': '哈莫尼黄金',
'IAU': '黄金ETF-iShares',
'KGC': '金罗斯黄金',
'LIHR': '利希尔黄金',
'PRME': '全球黄金地段房地产ETF-First Trust Heitman',
'RGLD': '皇家黄金',
'UGL': '黄金2x做多-ProShares',
'UGLD': '黄金3X做多-VelocityShares'}
:param search: eg:'黄金', '58'
:return: symbol dict
"""
search_symbol_dict = {}
search = search.lower()
while len(search_symbol_dict) == 0 and len(search) > 0:
# 构建模糊匹配进行匹配带通配符的字符串
search_match = '*{}*'.format(search)
# 构建精确匹配或拼音模糊匹配的symbol
search_symbol = code_to_symbol(search, rs=False)
search_code = ''
if search_symbol is not None:
search_code = search_symbol.symbol_code
# 对search的内容进行递减匹配
search = search[:-1]
# 依次对各个市场进行搜索匹配操作
_tc_search(search_match, search_code, search_symbol_dict)
_cn_search(search_match, search_code, search_symbol_dict)
_us_search(search_match, search_code, search_symbol_dict)
_fcn_search(search_match, search_code, search_symbol_dict)
_fgb_search(search_match, search_code, search_symbol_dict)
return search_symbol_dict


class Symbol(object):
Expand Down
Loading

0 comments on commit 179a267

Please sign in to comment.