-
Notifications
You must be signed in to change notification settings - Fork 1
/
convert-mol.py
executable file
·126 lines (88 loc) · 3.53 KB
/
convert-mol.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env python3
"""
Extract bonds from .mol file for building an atoms_list file for Blacomcalc
Author: (c) Matteo Paolieri, University of Cologne, Dec 2020
Version: 1.2
License: MIT
Docs: https://github.com/mtplr/blacomcalc
"""
import argparse
import os
def change_btype(bt):
# converts bond order to letter
# 1 = s, 2 = d, 3 = t
try:
bt = int(bt)
except Exception as e:
print(f'Error with bond type conversion occurred: {e}')
if bt == 1:
bt = 's'
elif bt == 2:
bt = 'd'
elif bt == 3:
bt = 't'
elif bt == 4:
print('Found a bond order 4 ("aromaticity"). Not supported. Please check your bond types.')
else:
print('Problem with the bond type conversion.')
return bt
def extract(molfile, n):
with open(molfile, 'r') as f:
data = f.readlines() # read .mol file just to see first where is the bonds part
i = 0
start_line = 0
for line in data:
line = line.split()
if len(line) == 7: # the turning point of the mol file is when it goes to 7 entries = bonds stuff
start_line = i # at what line start to read the file
break
else:
i += 1
continue # if the line doesn't have 7 entries, skip it
with open(molfile, 'r') as f: # this time at the right point
data = f.readlines()[start_line:] # read all the data from the mol file
del data[-1] # remove the last item containing "END", in .mol file
with open(f'bonds-{molfile[:-4]}.txt', 'w') as f: # remove ".mol" ext
# need to read the line every 3 chars because the limit is 999 atoms for .mol files
print(f'#M', file=f)
i = 0
for line in data:
i += 1 # keep trace of line number
chars = [] # for every line reset the chars
for ch in line:
if ch == ' ':
ch = ''
chars.append(ch)
elif ch.isdigit() is True: # write spaces and integers only
chars.append(ch)
else:
continue
a1 = chars[0] + chars[1] + chars[2]
a2 = chars[3] + chars[4] + chars[5]
bt = chars[8]
new_bt = change_btype(bt)
print(f'{a1} {a2} {new_bt}', file=f)
if (i % n) == 0: # check if the number of molecules is a multiple of the line number; add #M delimiter
print(f'#M', file=f)
else:
continue
def valid_file(param): # check if the file is .mol
base, ext = os.path.splitext(param)
if ext.lower() not in '.mol':
print(f'Error: file must have a .mol extension. Used {param} instead.')
return param
def main (mol, n):
try:
extract(mol, n)
except Exception as e:
print(f'Error: {e}')
if __name__ == "__main__":
# parser for shell
parser = argparse.ArgumentParser(description='Extract bond information from .mol file and make it '
'readable for Blacomcalc atoms_list file.')
parser.add_argument('mol_file', type=valid_file, help="Input .mol file")
parser.add_argument('number_atoms', type=int, help="Input: number of atoms of each molecule, e.g. "
"for 5 molecules of H2O: n = 3. The file will be splitted in "
"5 parts delimited by #M.")
args = parser.parse_args()
main(args.mol_file, args.number_atoms)