-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcommand_generator.py
116 lines (104 loc) · 5.22 KB
/
command_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
"""This script is purely to quickly get commands to use as input.
This generator has only been used on Windows and might not work for others.
"""
from typing import Any
import pyperclip
import pprint
import commandline_helper as clh
def integer_input(txt: str, optional: bool = False):
while True:
try:
user_input = input(txt)
if optional and user_input == "": return user_input
int_user_input = int(user_input)
except ValueError:
print("not a number, try again.")
continue
else:
return int_user_input
def float_input(txt: str, optional: bool = False):
while True:
try:
user_input = input(txt)
if optional and user_input == "": return user_input
float_user_input = float(user_input)
except ValueError:
print("not a decimal number, try again.")
continue
else:
return float_user_input
def list_input(choices: list[Any]):
while True:
user_input = input("options, seperated by spaces (press enter to use default): ")
if user_input == "": return False
input_lst = user_input.split(" ")
for x in input_lst:
if x not in choices:
print("one of the inputs is not a valid input option")
break
return input_lst
def bool_input(txt: str, o: str):
user_input = input(f"to {txt}, enter anything, else press only enter: ")
return user_input if user_input == "" else f"-{o} "
def optional(user_input, o: str):
return "" if user_input == "" else f"-{o} {user_input} "
def ocr_parser():
folder = input("path to folder (containing ALTO files): ")
page = integer_input("page number: ")
output_file = input("path to file to output csv to (for no output: press enter): ")
if output_file == "": output_str = ""
else: output_str = f"-o {output_file} "
return f"ocr_parser {output_str}{folder} {page}"
def ocr_file_parser():
pass
def ocr_parser_page():
pass
def txt_parser():
pass
def csv_parser():
pass
def all_pages_mapping():
txt_f = input("path to txt file: ")
alto_f = input("path to ALTO file: ")
start = integer_input("first page number: ")
end = integer_input("last page number: ")
print("output options:\nfinal: store the final matchings\nocr_wl: store the ocr wordlist\ntr_wl: store the transcription wordlist\nwls: store both wordlists\nall: store all of the above\n")
output = input("what to output (for no output: press enter): ")
if output == "": output_str = ""
else:
output_str = f"-o {output}"
output_dir = input("directory to store the output file(s) in: ")
output_fn = input("filename of the output file(s): ")
output_sf = bool_input("store the output of multiple pages in one file", "sf")
print("The default types are chosen for matching/wordlist output.")
output_str = f"-o {output} -dir {output_dir} -fn {output_fn} {output_sf}"
return f"all_pages_mapping {output_str}{txt_f} {alto_f} {start} {end}"
subprocesses = {"o": ocr_parser, "of": ocr_file_parser, "op": ocr_parser_page, "t": txt_parser, "c": csv_parser, "a": all_pages_mapping}
print("Input codes for processes:")
pprint.pprint({k:v.__name__ for k,v in subprocesses.items()})
subprocess = input("process to execute: ")
if subprocess not in subprocesses: print("not one of the options"); exit()
subcommand = subprocesses[subprocess]()
print("options to print to console, default is only updates:")
pprint.pprint(clh.PrintTypes.get_all_choices_with_expl())
print_options = list_input(clh.PrintTypes.get_all_choices())
if print_options:
print_options_str = ""
for option in print_options:
print_options_str += f"-p {option} "
else: print_options_str = ""
print("Some available similarity sets:\ninput/configs/basic_sim_sets.txt (default)\ninput/configs/extended_sim_sets.txt\ninput/configs/ultimate_sim_sets.txt")
simset = input("file containing the similarity sets (press enter to use default): ")
simset_str = "" if simset == "" else f"-sise {simset} "
icap = bool_input("ignore differences in capitalization when matching", "icap")
ch = bool_input("make the difference between two strings 1 if only one of them is a number", "ch")
ei = bool_input("store extra information from the ALTO file", "ei")
cmw = optional(integer_input("the maximum amount of words that can be grouped together into one word (pressing enter gives default 2): ", True), "cmw")
ma = optional(integer_input("the number of matchings to execute (pressing enter gives default 1): ", True), "ma")
inma = optional(float_input("upper threshold for valid matches (pressing enter gives default 0.4): ", True), "inma")
insa = optional(float_input("upper threshold for valid split matches (pressing enter gives default 0.5): ", True), "inma")
inja = optional(float_input("upper threshold for valid join matches (pressing enter gives default 0.5): ", True), "inma")
incr = optional(float_input("the number to increase the threshold with after each matching (pressing enter gives default 0.0): ", True), "inma")
command = f"python main.py {print_options_str}{simset_str}{icap}{ch}{ei}{cmw}{ma}{inma}{insa}{inja}{incr}{subcommand}"
print(command)
pyperclip.copy(command)