diff --git a/mlbgame/data.py b/mlbgame/data.py index eab9ab4..59547ad 100644 --- a/mlbgame/data.py +++ b/mlbgame/data.py @@ -51,6 +51,16 @@ def get_box_score(game_id): except HTTPError: raise ValueError('Could not find a game with that id.') +def get_raw_box_score(game_id): + """Return the raw box score file of a game with matching id.""" + year, month, day = get_date_from_game_id(game_id) + try: + return urlopen(GAME_URL.format(year, month, day, + game_id, + 'rawboxscore.xml')) + except HTTPError: + raise ValueError('Could not find a game with that id.') + def get_game_events(game_id): """Return the game events file of a game with matching id.""" diff --git a/mlbgame/stats.py b/mlbgame/stats.py index f8c61c5..0f7501e 100644 --- a/mlbgame/stats.py +++ b/mlbgame/stats.py @@ -8,83 +8,147 @@ import lxml.etree as etree - def __player_stats_info(data, name): home = [] away = [] for y in data: - # loops through pitchers + # loops through pitchers and batters for x in y.findall(name): stats = {} # loop through and save stats for i in x.attrib: stats[i] = x.attrib[i] # apply to correct list - if y.attrib['team_flag'] == 'home': - home.append(stats) - elif not home: - away.append(stats) + home.append(stats) if y.attrib['team_flag'] == 'home' else away.append(stats) return (home, away) +def __raw_player_stats_info(data): + home_pitchers = [] + away_pitchers = [] + home_batters = [] + away_batters = [] + + for team in data.findall('team'): + home_flag = team.attrib['team_flag'] == 'home' + pitching = team.find('pitching') + for pitcher in pitching.findall('pitcher'): + stats = {} + for i in pitcher.attrib: + stats[i] = pitcher.attrib[i] + home_pitchers.append(stats) if home_flag else away_pitchers.append(stats) + + batting = team.find('batting') + for batter in batting.findall('batter'): + stats = {} + for i in batter.attrib: + stats[i] = batter.attrib[i] + home_batters.append(stats) if home_flag else away_batters.append(stats) + home = { + 'pitchers': home_pitchers, + 'batters': home_batters + } + + away = { + 'pitchers': away_pitchers, + 'batters': away_batters + } + return (home, away) def player_stats(game_id): - """Return dictionary of individual stats of a game with matching id.""" + """Return dictionary of individual stats of a game with matching id. + + The additional pitching/batting is mostly the same stats, except it contains + some useful stats such as groundouts/flyouts per pitcher (go/ao). MLB decided + to have two box score files, thus we return the data from both. + """ # get data from data module - data = mlbgame.data.get_box_score(game_id) + box_score = mlbgame.data.get_box_score(game_id) + raw_box_score = mlbgame.data.get_raw_box_score(game_id) # parse XML - parsed = etree.parse(data) - root = parsed.getroot() + box_score_tree = etree.parse(box_score).getroot() + raw_box_score_tree = etree.parse(raw_box_score).getroot() # get pitching and batting info - pitching = root.findall('pitching') - batting = root.findall('batting') + pitching = box_score_tree.findall('pitching') + batting = box_score_tree.findall('batting') # get parsed stats pitching_info = __player_stats_info(pitching, 'pitcher') batting_info = __player_stats_info(batting, 'batter') + # get parsed additional stats + additional_stats = __raw_player_stats_info(raw_box_score_tree) + addl_home_pitching = additional_stats[0]['pitchers'] + addl_home_batting = additional_stats[0]['batters'] + addl_away_pitching = additional_stats[1]['pitchers'] + addl_away_batting = additional_stats[1]['batters'] + output = { 'home_pitching': pitching_info[0], 'away_pitching': pitching_info[1], 'home_batting': batting_info[0], - 'away_batting': batting_info[1] + 'away_batting': batting_info[1], + 'home_additional_pitching': addl_home_pitching, + 'away_additional_pitching': addl_away_pitching, + 'home_additional_batting': addl_home_batting, + 'away_additional_batting': addl_away_batting } return output - -def team_stats(game_id): - """Return team stats of a game with matching id.""" - # get data from data module - data = mlbgame.data.get_box_score(game_id) - # parse XML - parsed = etree.parse(data) - root = parsed.getroot() - # get pitching and batting ingo - pitching = root.findall('pitching') - batting = root.findall('batting') - # dictionary for output - output = {} - # loop through pitching info - for x in pitching: +def __team_stats_info(data, output, output_key): + for x in data: stats = {} # loop through stats and save for y in x.attrib: stats[y] = x.attrib[y] # apply to correct team if x.attrib['team_flag'] == 'home': - output['home_pitching'] = stats + # Example: 'home_batting' when output_key is 'batting' + output['home_' + output_key] = stats elif x.attrib['team_flag'] == 'away': - output['away_pitching'] = stats - # loop through pitching info - for x in batting: + output['away_' + output_key] = stats + return output + +def __raw_team_stats_info(data, output): + for team in data.findall('team'): + home_flag = team.attrib['team_flag'] == 'home' + pitching = team.find('pitching') stats = {} - # loop through stats and save - for y in x.attrib: - stats[y] = x.attrib[y] - # apply to correct team - if x.attrib['team_flag'] == 'home': - output['home_batting'] = stats - elif x.attrib['team_flag'] == 'away': - output['away_batting'] = stats + for stat in pitching.attrib: + stats[stat] = pitching.attrib[stat] + if home_flag: + output['home_additional_pitching'] = stats + else: + output['away_additional_pitching'] = stats + + stats = {} + batting = team.find('batting') + for stat in batting.attrib: + stats[stat] = batting.attrib[stat] + if home_flag: + output['home_additional_batting'] = stats + else: + output['away_additional_batting'] = stats return output +def team_stats(game_id): + """Return team stats of a game with matching id. + + The additional pitching/batting is mostly the same stats. MLB decided + to have two box score files, thus we return the data from both. + """ + # get data from data module + box_score = mlbgame.data.get_box_score(game_id) + raw_box_score = mlbgame.data.get_raw_box_score(game_id) + # parse XML + box_score_tree = etree.parse(box_score).getroot() + raw_box_score_tree = etree.parse(raw_box_score).getroot() + # get pitching and batting ingo + pitching = box_score_tree.findall('pitching') + batting = box_score_tree.findall('batting') + # dictionary for output + output = {} + output = __team_stats_info(pitching, output, 'pitching') + output = __team_stats_info(batting, output, 'batting') + output = __raw_team_stats_info(raw_box_score_tree, output) + return output class Stats(object): """Hold stats information for a game. @@ -95,6 +159,10 @@ class Stats(object): game_id home_batting home_pitching + away_additional_pitching + away_additional_batting + home_additional_pitching + home_additional_batting """ def __init__(self, data, game_id, player): @@ -104,7 +172,8 @@ def __init__(self, data, game_id, player): """ self.game_id = game_id output = {'home_pitching': [], 'away_pitching': [], 'home_batting': [], - 'away_batting': []} + 'away_batting': [], 'home_additional_pitching': [], 'home_additional_batting': [], + 'away_additional_pitching': [], 'away_additional_batting': []} for y in data: # create objects for all data if player: @@ -118,7 +187,10 @@ def __init__(self, data, game_id, player): self.away_pitching = output['away_pitching'] self.home_batting = output['home_batting'] self.away_batting = output['away_batting'] - + self.home_additional_pitching = output['home_additional_pitching'] + self.away_additional_pitching = output['away_additional_pitching'] + self.home_additional_batting = output['home_additional_batting'] + self.away_additional_batting = output['away_additional_batting'] class PlayerStats(mlbgame.object.Object): """Holds stats information for a player.