Module sentspace.package_lexical
Expand source code
from pathlib import Path
import argparse
import pickle
import pandas as pd
def main(**kwargs):
"""used to run the main pipeline, start to end, depending on the arguments and flags"""
# Parse input
parser = argparse.ArgumentParser("sentspace")
parser.add_argument(
"input_file",
type=str,
help="Path to input file to package as a lexical feature e.g., example/example.csv",
)
parser.add_argument(
"-word",
"--word_column",
default="Word",
help="Column we should extract the word from (default: 'Word')",
)
parser.add_argument(
"-norm",
"--norm_column",
required=True,
help="Column we should extract the norm from",
)
parser.add_argument(
"-name",
"--norm_name",
required=False,
help="Name of the feature to package. By default, the same as `norm_column`",
)
parser.add_argument(
"-out",
"--output_directory",
default="~/.cache/sentspace/",
help="Where should we output the `norm_name.pkl` packaged file?",
)
args = parser.parse_args()
print(args)
df = pd.read_csv(args.input_file)
words = df[args.word_column]
norms = df[args.norm_column]
mapping = dict(zip(words, norms))
name = args.norm_name or args.norm_column
out = Path(args.output_directory).expanduser().resolve()
out.mkdir(parents=True, exist_ok=True)
with (out / f"{name}.pkl").open("wb") as f:
pickle.dump(mapping, f)
if __name__ == "__main__":
main()
Functions
def main(**kwargs)
-
used to run the main pipeline, start to end, depending on the arguments and flags
Expand source code
def main(**kwargs): """used to run the main pipeline, start to end, depending on the arguments and flags""" # Parse input parser = argparse.ArgumentParser("sentspace") parser.add_argument( "input_file", type=str, help="Path to input file to package as a lexical feature e.g., example/example.csv", ) parser.add_argument( "-word", "--word_column", default="Word", help="Column we should extract the word from (default: 'Word')", ) parser.add_argument( "-norm", "--norm_column", required=True, help="Column we should extract the norm from", ) parser.add_argument( "-name", "--norm_name", required=False, help="Name of the feature to package. By default, the same as `norm_column`", ) parser.add_argument( "-out", "--output_directory", default="~/.cache/sentspace/", help="Where should we output the `norm_name.pkl` packaged file?", ) args = parser.parse_args() print(args) df = pd.read_csv(args.input_file) words = df[args.word_column] norms = df[args.norm_column] mapping = dict(zip(words, norms)) name = args.norm_name or args.norm_column out = Path(args.output_directory).expanduser().resolve() out.mkdir(parents=True, exist_ok=True) with (out / f"{name}.pkl").open("wb") as f: pickle.dump(mapping, f)