Module `sentspace.lexical`

Expand source code

import os
from pathlib import Path
from typing import List
import copy

import sentspace.utils
from sentspace.lexical import utils
from sentspace.utils import io, text
from sentspace.utils.caching import cache_to_disk, cache_to_mem
from sentspace.utils.resources import feat_rename_dict


def get_features(sentence: sentspace.Sentence.Sentence, lock=None) -> dict:

    # io.log(f'computing lexical featuures for `{sentence}`')

    # if lock: lock.acquire()
    databases = utils.load_databases(features="all")
    # if lock: lock.release()

    features_from_database = utils.get_all_features(
        sentence, databases
    )  # lexical features

    # Rename keys in features_from_database if they exist in feat_rename_dict
    for key, val in features_from_database.copy().items():
        if key in feat_rename_dict:
            features_from_database[feat_rename_dict[key]] = features_from_database.pop(
                key
            )

    accumulator = []
    # return list of token-level features, as a dict per token
    for i, token in enumerate(sentence.tokens):
        db_features_slice = {
            feature: features_from_database[feature][i]
            for feature in features_from_database
        }

        accumulator += [{
            "index": sentence.uid,
            "sentence": str(sentence),
            "token": token,
            "lemma": sentence.lemmas[i],
            "tag": sentence.pos_tags[i],
            "content_word": sentence.content_words[i],
            **db_features_slice,
        }]

    return accumulator

Sub-modules

sentspace.lexical.sanity_checks
sentspace.lexical.utils

Functions

def get_features(sentence: Sentence, lock=None) ‑> dict

Expand source code

def get_features(sentence: sentspace.Sentence.Sentence, lock=None) -> dict:

    # io.log(f'computing lexical featuures for `{sentence}`')

    # if lock: lock.acquire()
    databases = utils.load_databases(features="all")
    # if lock: lock.release()

    features_from_database = utils.get_all_features(
        sentence, databases
    )  # lexical features

    # Rename keys in features_from_database if they exist in feat_rename_dict
    for key, val in features_from_database.copy().items():
        if key in feat_rename_dict:
            features_from_database[feat_rename_dict[key]] = features_from_database.pop(
                key
            )

    accumulator = []
    # return list of token-level features, as a dict per token
    for i, token in enumerate(sentence.tokens):
        db_features_slice = {
            feature: features_from_database[feature][i]
            for feature in features_from_database
        }

        accumulator += [{
            "index": sentence.uid,
            "sentence": str(sentence),
            "token": token,
            "lemma": sentence.lemmas[i],
            "tag": sentence.pos_tags[i],
            "content_word": sentence.content_words[i],
            **db_features_slice,
        }]

    return accumulator