Source code for pyscaffold.identification

"""Internal library for manipulating, creating and dealing with names, or more generally
identifiers.
"""

import keyword
import re
from typing import Callable, Iterable, List, TypeVar

from .exceptions import InvalidIdentifier

T = TypeVar("T")


[docs]def is_valid_identifier(string: str) -> bool: """Check if string is a valid package name Args: string: package name Returns: True if string is valid package name else False """ if not re.match("[_A-Za-z][_a-zA-Z0-9]*$", string): return False if keyword.iskeyword(string): return False return True
[docs]def make_valid_identifier(string: str) -> str: """Try to make a valid package name identifier from a string Args: string: invalid package name Returns: Valid package name as string or :obj:`RuntimeError` Raises: :obj:`InvalidIdentifier`: raised if identifier can not be converted """ string = str(string).strip() string = string.replace("-", "_") string = string.replace(" ", "_") string = re.sub("[^_a-zA-Z0-9]", "", string) string = string.lower() if is_valid_identifier(string): return string raise InvalidIdentifier("String cannot be converted to a valid identifier.")
# from https://en.wikibooks.org/, Creative Commons Attribution-ShareAlike 3.0
[docs]def levenshtein(s1: str, s2: str) -> int: """Calculate the Levenshtein distance between two strings Args: s1: first string s2: second string Returns: Distance between s1 and s2 """ if len(s1) < len(s2): return levenshtein(s2, s1) # len(s1) >= len(s2) if len(s2) == 0: return len(s1) previous_row = list(range(len(s2) + 1)) for i, c1 in enumerate(s1): current_row = [i + 1] for j, c2 in enumerate(s2): insertions = previous_row[j + 1] + 1 deletions = current_row[j] + 1 substitutions = previous_row[j] + (c1 != c2) current_row.append(min(insertions, deletions, substitutions)) previous_row = current_row return previous_row[-1]
[docs]def dasherize(word: str) -> str: """Replace underscores with dashes in the string. Example:: >>> dasherize("foo_bar") "foo-bar" Args: word (str): input word Returns: input word with underscores replaced by dashes """ return word.replace("_", "-")
CAMEL_CASE_SPLITTER = re.compile(r"\W+|([A-Z][^A-Z\W]*)")
[docs]def underscore(word: str) -> str: """Convert CamelCasedStrings or dasherized-strings into underscore_strings. Example:: >>> underscore("FooBar-foo") "foo_bar_foo" """ return "_".join(w for w in CAMEL_CASE_SPLITTER.split(word) if w).lower()
[docs]def deterministic_name(obj): """Private API that returns an string that can be used to deterministically deduplicate and sort sequences of objects. """ mod_name = getattr(obj, "__module__", "..") qual_name = getattr(obj, "__qualname__", obj.__class__.__qualname__) return f"{mod_name}.{qual_name}"
[docs]def deterministic_sort(sequence: Iterable[T]) -> List[T]: """Private API that order a sequence of objects lexicographically (by :obj:`deterministic_name`), removing duplicates, which is needed for determinism. The main purpose of this function is to deterministically sort a sequence of PyScaffold extensions (it will also sort internal extensions before external: "pyscaffold.*" < "pyscaffoldext.*"). """ deduplicated = {deterministic_name(x): x for x in sequence} # ^ duplicated keys will overwrite each other, so just one of them is left return [v for (_k, v) in sorted(deduplicated.items())]
[docs]def get_id(function: Callable) -> str: """Given a function, calculate its identifier. A identifier is a string in the format ``<module name>:<function name>``, similarly to the convention used for setuptools entry points. Note: This function does not return a Python 3 ``__qualname__`` equivalent. If the function is nested inside another function or class, the parent name is ignored. Args: function (callable): function object """ return f"{function.__module__}:{function.__name__}"