Module sentspace.lexical
Expand source code
import os
from pathlib import Path
from typing import List
import copy
import sentspace.utils
from sentspace.lexical import utils
from sentspace.utils import io, text
from sentspace.utils.caching import cache_to_disk, cache_to_mem
from sentspace.utils.resources import feat_rename_dict
def get_features(sentence: sentspace.Sentence.Sentence, lock=None) -> dict:
# io.log(f'computing lexical featuures for `{sentence}`')
# if lock: lock.acquire()
databases = utils.load_databases(features="all")
# if lock: lock.release()
features_from_database = utils.get_all_features(
sentence, databases
) # lexical features
# Rename keys in features_from_database if they exist in feat_rename_dict
for key, val in features_from_database.copy().items():
if key in feat_rename_dict:
features_from_database[feat_rename_dict[key]] = features_from_database.pop(
key
)
accumulator = []
# return list of token-level features, as a dict per token
for i, token in enumerate(sentence.tokens):
db_features_slice = {
feature: features_from_database[feature][i]
for feature in features_from_database
}
accumulator += [{
"index": sentence.uid,
"sentence": str(sentence),
"token": token,
"lemma": sentence.lemmas[i],
"tag": sentence.pos_tags[i],
"content_word": sentence.content_words[i],
**db_features_slice,
}]
return accumulator
Sub-modules
sentspace.lexical.sanity_checks
sentspace.lexical.utils
Functions
def get_features(sentence: Sentence, lock=None) ‑> dict
-
Expand source code
def get_features(sentence: sentspace.Sentence.Sentence, lock=None) -> dict: # io.log(f'computing lexical featuures for `{sentence}`') # if lock: lock.acquire() databases = utils.load_databases(features="all") # if lock: lock.release() features_from_database = utils.get_all_features( sentence, databases ) # lexical features # Rename keys in features_from_database if they exist in feat_rename_dict for key, val in features_from_database.copy().items(): if key in feat_rename_dict: features_from_database[feat_rename_dict[key]] = features_from_database.pop( key ) accumulator = [] # return list of token-level features, as a dict per token for i, token in enumerate(sentence.tokens): db_features_slice = { feature: features_from_database[feature][i] for feature in features_from_database } accumulator += [{ "index": sentence.uid, "sentence": str(sentence), "token": token, "lemma": sentence.lemmas[i], "tag": sentence.pos_tags[i], "content_word": sentence.content_words[i], **db_features_slice, }] return accumulator