Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/stevenj/KiCost into steve…
Browse files Browse the repository at this point in the history
…nj-master

Conflicts:
	kicost/__main__.py
	kicost/kicost.py

Add code to calculate costs for different schematic variants based on including parts whose field labels (e.g., 'kicost:<var_name>') match a regular expression passed as an argument.
  • Loading branch information
Dave Vandenbout committed Jan 26, 2016
2 parents d50bb9c + 912774b commit bfc559b
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 41 deletions.
17 changes: 11 additions & 6 deletions kicost/__main__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
# MIT license
#
#
# Copyright (C) 2015 by XESS Corporation
#
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
Expand Down Expand Up @@ -60,6 +60,11 @@ def main():
type=str,
metavar='file.xlsx',
help='Generated cost spreadsheet.')
parser.add_argument('-var', '--variant',
nargs='?',
type=str,
default='',
help='schematic variant name filter')
parser.add_argument('-w', '--overwrite',
action='store_true',
help='Allow overwriting of an existing spreadsheet.')
Expand Down Expand Up @@ -123,9 +128,9 @@ def main():
num_processes = args.num_processes

kicost(in_file=args.input, out_filename=args.output,
ignore_fields=args.ignore_fields, num_processes=num_processes)
ignore_fields=args.ignore_fields, variant=args.variant,
num_processes=num_processes)


###############################################################################
# Main entrypoint.
###############################################################################
Expand Down
91 changes: 56 additions & 35 deletions kicost/kicost.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,11 @@
DEBUG_OBSESSIVE = logging.DEBUG-2


def kicost(in_file, out_filename, ignore_fields, num_processes):
def kicost(in_file, out_filename, ignore_fields, variant, num_processes):
'''Take a schematic input file and create an output file with a cost spreadsheet in xlsx format.'''

# Get groups of identical parts.
parts = get_part_groups(in_file, ignore_fields)
parts = get_part_groups(in_file, ignore_fields, variant)

# Create an HTML page containing all the local part information.
local_part_html = create_local_part_html(parts)
Expand Down Expand Up @@ -137,7 +137,7 @@ def kicost(in_file, out_filename, ignore_fields, num_processes):
parts[id].qty_avail = qty_avail

# Create the part pricing spreadsheet.
create_spreadsheet(parts, out_filename)
create_spreadsheet(parts, out_filename, variant)

# Print component groups for debugging purposes.
if logger.isEnabledFor(DEBUG_DETAILED):
Expand All @@ -158,34 +158,48 @@ def kicost(in_file, out_filename, ignore_fields, num_processes):
pass
print()


# Temporary class for storing part group information.
class IdenticalComponents(object):
pass

def get_part_groups(in_file, ignore_fields):
def get_part_groups(in_file, ignore_fields, variant):
'''Get groups of identical parts from an XML file and return them as a dictionary.'''

ign_fields = [str(f.lower()) for f in ignore_fields]

def extract_fields(part):
def extract_fields(part, variant):
'''Extract XML fields from the part in a library or schematic.'''

fields = {}
try:
for f in part.find('fields').find_all('field'):
# Store the name and value for each kicost-related field.
name = str(f['name'].lower()) # Ignore case of field name.
if name in ign_fields:
continue # Ignore fields in the ignore list.
if SEPRTR not in name: # No separator, so get global field value.
fields[name] = str(f.string)
elif name.startswith('kicost:'): # Store kicost-related values.
name = name[len('kicost:'):] # strip leading 'kicost:'.
# Add 'local' to non-manf#/cat# fields without leading distributor name.
if name != 'manf#' and name[:-1] not in distributors:
if SEPRTR not in name: # This field has no distributor.
name = 'local:'+name # Assign it to a local distributor.
elif SEPRTR not in name: # No separator, so get global field value.
fields[name] = str(f.string)
else:
# Now look for fields that start with 'kicost' and possibly
# another dot-separated variant field and store their values.
# Anything else is in a non-kicost namespace.
# key_re = 'kicost(\.{})?:(?P<name>.*)'.format(re.escape(variant))
key_re = 'kicost(\.{})?:(?P<name>.*)'.format(variant)
mtch = re.match(key_re, name, flags=re.IGNORECASE)
if mtch:
# The field name is anything that came after the leading
# 'kicost' and variant field.
fld_nm = mtch.group('name')
# If the field name isn't for a manufacturer's part
# number or a distributors catalog number, then add
# it to 'local' if it doesn't start with a distributor
# name and colon.
if fld_nm != 'manf#' and fld_nm[:-1] not in distributors:
if SEPRTR not in fld_nm: # This field has no distributor.
fld_nm = 'local:' + fld_nm # Assign it to a local distributor.
fields[fld_nm] = str(f.string)

except AttributeError:
pass # No fields found for this part.
return fields
Expand All @@ -201,7 +215,7 @@ def extract_fields(part):
for p in root.find('libparts').find_all('libpart'):

# Get the values for the fields in each library part (if any).
fields = extract_fields(p)
fields = extract_fields(p, variant)

# Store the field dict under the key made from the
# concatenation of the library and part names.
Expand Down Expand Up @@ -241,10 +255,10 @@ def extract_fields(part):
except AttributeError:
pass

# Get the values for any other kicost-related fields in the part
# Get the values for any other kicost-related fields in the part
# (if any) from the schematic. These will override any field values
# from the part library.
fields.update(extract_fields(c))
fields.update(extract_fields(c, variant))

# Store the fields for the part using the reference identifier as the key.
components[str(c['ref'])] = fields
Expand Down Expand Up @@ -316,7 +330,7 @@ def extract_fields(part):
if components[ref].get('manf#') == manf_num:
sub_group.refs.append(ref)
new_component_groups.append(sub_group)

# Now get the values of all fields within the members of a group.
# These will become the field values for ALL members of that group.
for grp in new_component_groups:
Expand All @@ -338,10 +352,10 @@ def extract_fields(part):
# Now return a list of the groups without their hash keys.
return list(new_component_groups.values())


def create_local_part_html(parts):
'''Create HTML page containing info for local (non-webscraped) parts.'''

global distributors

logger.log(DEBUG_OVERVIEW, 'Create HTML page for parts with custom pricing...')
Expand All @@ -353,7 +367,7 @@ def create_local_part_html(parts):
# Find the manufacturer's part number if it exists.
pn = p.fields.get('manf#') # Returns None if no manf# field.

# Find the various distributors for this part by
# Find the various distributors for this part by
# looking for leading fields terminated by SEPRTR.
for key in p.fields:
try:
Expand All @@ -380,7 +394,7 @@ def make_random_catalog_number(p):
hash_fields = {k: p.fields[k] for k in p.fields}
hash_fields['dist'] = dist
return '#{0:08X}'.format(abs(hash(tuple(sorted(hash_fields.items())))))

cat_num = cat_num or pn or make_random_catalog_number(p)
p.fields[dist+':cat#'] = cat_num # Store generated cat#.
with tag('div', klass=dist+SEPRTR+cat_num):
Expand All @@ -402,14 +416,21 @@ def make_random_catalog_number(p):
return html


def create_spreadsheet(parts, spreadsheet_filename):
def create_spreadsheet(parts, spreadsheet_filename, variant):
'''Create a spreadsheet using the info for the parts (including their HTML trees).'''

logger.log(DEBUG_OVERVIEW, 'Create spreadsheet...')

DEFAULT_BUILD_QTY = 100 # Default value for number of boards to build.
WORKSHEET_NAME = 'KiCost' # Default name for part-pricing worksheet.

if len(variant) > 0:
# Append an indication of the variant to the worksheet title.
# Remove any special characters that might be illegal in a
# worksheet name since the variant might be a regular expression.
WORKSHEET_NAME = WORKSHEET_NAME + '.' + re.sub(
'[\[\]\\\/\|\?\*\:\(\)]','_',variant)

# Create spreadsheet file.
with xlsxwriter.Workbook(spreadsheet_filename) as workbook:

Expand Down Expand Up @@ -1331,7 +1352,7 @@ def get_local_qty_avail(html_tree):
return int(re.sub('[^0-9]', '', qty_str))
except ValueError:
return 0


def get_user_agent():
# The default user_agent_list comprises chrome, IE, firefox, Mozilla, opera, netscape.
Expand Down Expand Up @@ -1365,7 +1386,7 @@ def FakeBrowser(url):
req.add_header('User-agent', get_user_agent())
return req


class PartHtmlError(Exception):
'''Exception for failed retrieval of an HTML parse tree for a part.'''
pass
Expand Down Expand Up @@ -1650,15 +1671,15 @@ def get_newark_part_html_tree(dist, pn, url=None, descend=2):

# I don't know what happened here, so give up.
raise PartHtmlError


def get_local_part_html_tree(dist, pn, url=None):
'''Extract the HTML tree from the HTML page for local parts.'''

# Extract the HTML tree from the local part HTML page.
html = local_part_html
tree = BeautifulSoup(html, 'lxml')

try:
# Find the DIV in the tree for the given part and distributor.
class_ = dist + SEPRTR + pn
Expand Down Expand Up @@ -1686,7 +1707,7 @@ def get_part_html_tree(part, dist, distributor_dict, local_html):
# Get function name for getting the HTML tree for this part from this distributor.
function = distributor_dict[dist]['function']
get_dist_part_html_tree = THIS_MODULE['get_{}_part_html_tree'.format(function)]

try:
# Search for part information using one of the following:
# 1) the distributor's catalog number.
Expand All @@ -1704,23 +1725,23 @@ def get_part_html_tree(part, dist, distributor_dict, local_html):
# If no HTML page was found, then return a tree for an empty page.
return BeautifulSoup('<html></html>', 'lxml'), ''


def scrape_part(args):
'''Scrape the data for a part from each distributor website or local HTML.'''

id, part, distributor_dict, local_html = args # Unpack the arguments.

# Create dictionaries for the various items of part data from each distributor.
url = {}
part_num = {}
price_tiers = {}
qty_avail = {}

# Scrape the part data from each distributor website or the local HTML.
for d in distributor_dict:
# Get the HTML tree for the part.
html_tree, url[d] = get_part_html_tree(part, d, distributor_dict, local_html)

# Get the function names for getting the part data from the HTML tree.
function = distributor_dict[d]['function']
get_dist_price_tiers = THIS_MODULE['get_{}_price_tiers'.format(function)]
Expand All @@ -1731,6 +1752,6 @@ def scrape_part(args):
part_num[d] = get_dist_part_num(html_tree)
qty_avail[d] = get_dist_qty_avail(html_tree)
price_tiers[d] = get_dist_price_tiers(html_tree)

# Return the part data.
return id, url, part_num, price_tiers, qty_avail

0 comments on commit bfc559b

Please sign in to comment.