diff options
author | Jeffery To <jeffery.to@gmail.com> | 2020-05-10 19:14:25 +0800 |
---|---|---|
committer | Jeffery To <jeffery.to@gmail.com> | 2020-05-14 23:38:24 +0800 |
commit | d4d78c2511177758a8910dab76ebbc576bbf3df8 (patch) | |
tree | 693d296075b997f9ea662de5611d7d1244a27725 /lang/python/python3-find-stdlib-depends.sh | |
parent | 83d36065d5e678f3309375f5d07188f960a6850f (diff) |
python3: Add script to help find standard library dependencies
This adds a script that searches a Python package's source code to find
imports for separately-packaged standard library modules.
The script can be run by calling make with the configure target and
"PY3=stdlib V=s" arguments, e.g.
make package/python3-lxml/configure PY3=stdlib V=s
This also updates the readme on how to call this script, as well as more
information on Python package dependencies in general.
Signed-off-by: Jeffery To <jeffery.to@gmail.com>
Diffstat (limited to 'lang/python/python3-find-stdlib-depends.sh')
-rw-r--r-- | lang/python/python3-find-stdlib-depends.sh | 434 |
1 files changed, 434 insertions, 0 deletions
diff --git a/lang/python/python3-find-stdlib-depends.sh b/lang/python/python3-find-stdlib-depends.sh new file mode 100644 index 000000000..609fac454 --- /dev/null +++ b/lang/python/python3-find-stdlib-depends.sh @@ -0,0 +1,434 @@ +#!/bin/sh + +# Packages data +# +# Notes: +# * python3-codecs: Also contains codecs for CJK encodings but we don't +# have a good way to check for uses +# * python3-openssl: Don't include hashlib as it supports several +# standard algorithms without requiring OpenSSL + +packages=" +python3-asyncio: asyncio +python3-cgi: cgi +python3-cgitb: cgitb +python3-codecs: unicodedata +python3-ctypes: ctypes +python3-dbm: dbm +python3-decimal: decimal +python3-distutils: distutils +python3-email: email +python3-gdbm: dbm.gnu +python3-logging: logging +python3-lzma: lzma +python3-multiprocessing: multiprocessing +python3-ncurses: ncurses +python3-openssl: ssl +python3-pydoc: doctest pydoc +python3-sqlite3: sqlite3 +python3-unittest: unittest +python3-urllib: urllib +python3-xml: xml xmlrpc +" + + +# Constants + +stdin_name="<stdin>" +grep_dir_filters=" +-Ir +--include=*.py +--exclude=setup.py +--exclude=test_*.py +--exclude=*_test.py +--exclude-dir=test +--exclude-dir=tests +--exclude-dir=ipkg-* +--exclude-dir=.pkgdir +" + +log_level_notice=5 +log_level_info=6 +log_level_debug=7 + +# /usr/include/sysexits.h +ex_usage=64 +ex_noinput=66 +ex_unavailable=69 +ex_software=70 + +newline=" +" +oifs="$IFS" + + +# Defaults + +grep_output_default_max_count=3 +grep_output_default_color_when="auto" +grep_output_default_line_prefix="-HnT --label=$stdin_name" +grep_output_default_context_num=1 + +log_level_default="$log_level_info" + + +# Global variables + +log_level= +grep= +grep_output_options= +grep_output_description= +stdin= +output_name= +is_first_search= + + +# Logging + +log() { + printf '%s\n' "$*" +} + +can_log_notice() { + [ "$log_level" -ge "$log_level_notice" ] +} + +can_log_info() { + [ "$log_level" -ge "$log_level_info" ] +} + +can_log_debug() { + [ "$log_level" -ge "$log_level_debug" ] +} + +log_notice() { + if can_log_notice; then + log "$@" + fi +} + +log_info() { + if can_log_info; then + log "$@" + fi +} + +log_debug() { + if can_log_debug; then + log "$@" + fi +} + +log_error() { + printf '%s\n' "Error: $*" >&2 +} + + +# Usage + +usage() { + cat <<- EOF + Usage: ${0##*/} [OPTION]... [FILE]... + Search for imports of certain Python standard libraries in each FILE, + generate a list of suggested package dependencies. + + With no FILE, or when FILE is -, read standard input. + + Options: + -c WHEN use color in output; + WHEN is 'always', 'never', or 'auto' (default: '$grep_output_default_color_when') + -h display this help text and exit + -m NUM show max NUM matches per package per file (default: $grep_output_default_max_count); + use 0 to show all matches + -n NAME when one or no FILE is given, use NAME instead of FILE in + displayed information + -q show suggested dependencies only + -v show verbose output (also show all matches) + -x NUM show NUM lines of context (default: $grep_output_default_context_num) + + EOF +} + + +# Imports search + +get_package_modules() { + local line="$1" + local field_num=0 + local IFS=: + + for field in $line; do + # strip leading and trailing whitespace + field="${field#"${field%%[! ]*}"}" + field="${field%"${field##*[! ]}"}" + + # set variables in search_path() + if [ "$field_num" -eq 0 ]; then + package="$field" + field_num=1 + elif [ "$field_num" -eq 1 ]; then + modules="$field" + field_num=2 + else + field_num=3 + fi + done + + if [ "$field_num" -ne 2 ] || [ -z "$package" ] || [ -z "$modules" ]; then + log_error "invalid package data \"$line\"" + exit "$ex_software" + fi +} + +search_path_for_modules() { + local path="$1" + local path_is_dir="$2" + local path_is_stdin="$3" + local package="$4" + local modules="$5" + local modules_regex + local regex + local remove_dir_prefix + local grep_results + local IFS="$oifs" + + log_debug " Looking for modules in $package ($modules)" + + modules_regex=$(printf '%s' "$modules" | sed -e 's/\./\\./g' -e 's/\s\+/|/g') + regex="\b(import\s+($modules_regex)|from\s+($modules_regex)\S*\s+import)\b" + + if [ -n "$path_is_dir" ]; then + remove_dir_prefix="s|^\(\(\x1b[[0-9;]*[mK]\)*\)$path|\1|" + grep_results=$($grep $grep_output_options $grep_dir_filters -E "$regex" "$path") + + elif [ -n "$path_is_stdin" ]; then + grep_results=$(printf '%s\n' "$stdin" | $grep $grep_output_options -E "$regex") + + else + grep_results=$($grep $grep_output_options -E "$regex" "$path") + fi + + if [ "$?" -ne 0 ]; then + log_debug " No imports found" + log_debug "" + return 0 + fi + + log_info " Found imports for: $modules" + + if can_log_info; then + printf '%s\n' "$grep_results" | sed -e "$remove_dir_prefix" -e "s/^/ /" + fi + + log_info "" + + # set variable in search_path() + suggested="$suggested +$package" +} + +search_path() { + local path="$1" + local name="$2" + local path_is_dir + local path_is_stdin + local package + local modules + local suggested + local IFS="$newline" + + if [ "$path" = "-" ]; then + path_is_stdin=1 + + else + if ! [ -e "$path" ]; then + log_error "$path does not exist" + exit "$ex_noinput" + fi + + if [ -d "$path" ]; then + path="${path%/}/" + path_is_dir=1 + fi + fi + + log_info "" + log_info "Searching $name (showing $grep_output_description):" + log_info "" + + if [ -n "$path_is_stdin" ]; then + stdin="$(cat)" + fi + + for line in $packages; do + # strip leading whitespace + line="${line#"${line%%[! ]*}"}" + + # skip blank lines or comments (beginning with #) + if [ -z "$line" ] || [ "$line" != "${line###}" ]; then + continue + fi + + package= + modules= + + get_package_modules "$line" + search_path_for_modules "$path" "$path_is_dir" "$path_is_stdin" "$package" "$modules" + done + + log_notice "Suggested dependencies for $name:" + + if [ -z "$suggested" ]; then + suggested="(none)" + fi + IFS="$oifs" + for package in $suggested; do + log_notice " $package" + done + + log_notice "" +} + + +# Find GNU grep + +if ggrep --version 2>&1 | grep -q GNU; then + grep="ggrep" +elif grep --version 2>&1 | grep -q GNU; then + grep="grep" +else + log_error "cannot find GNU grep" + exit "$ex_unavailable" +fi + + +# Process environment variables + +case $PYTHON3_FIND_STDLIB_DEPENDS_LOG_LEVEL in + notice) + log_level="$log_level_notice" + ;; + info) + log_level="$log_level_info" + ;; + debug) + log_level="$log_level_debug" + ;; + *) + log_level="$log_level_default" + ;; +esac + +grep_output_max_count="${PYTHON3_FIND_STDLIB_DEPENDS_MAX_COUNT:-$grep_output_default_max_count}" +grep_output_color_when="${PYTHON3_FIND_STDLIB_DEPENDS_COLOR_WHEN:-$grep_output_default_color_when}" +grep_output_line_prefix="${PYTHON3_FIND_STDLIB_DEPENDS_LINE_PREFIX:-$grep_output_default_line_prefix}" +grep_output_context_num="${PYTHON3_FIND_STDLIB_DEPENDS_CONTEXT_NUM:-$grep_output_default_context_num}" + + +# Process command line options + +while getopts c:hm:n:qvx: OPT; do + case $OPT in + c) + grep_output_color_when="$OPTARG" + ;; + h) + usage + exit 0 + ;; + m) + grep_output_max_count="$OPTARG" + ;; + n) + output_name="$OPTARG" + ;; + q) + log_level="$log_level_notice" + ;; + v) + log_level="$log_level_debug" + ;; + x) + grep_output_context_num="$OPTARG" + ;; + \?) + usage + exit "$ex_usage" + ;; + esac +done + +shift $((OPTIND - 1)) + + +# Set up grep output options + +if can_log_info; then + if [ "$grep_output_color_when" = "auto" ]; then + if [ -t 1 ]; then + grep_output_color_when="always" + else + grep_output_color_when="never" + fi + fi + + if ! can_log_debug && [ "$grep_output_max_count" -gt 0 ]; then + grep_output_options="-m $grep_output_max_count" + + if [ "$grep_output_max_count" -eq 1 ]; then + grep_output_description="max 1 match per file" + else + grep_output_description="max $grep_output_max_count matches per file" + fi + + else + grep_output_description="all matches" + fi + + if [ "$grep_output_context_num" -gt 0 ]; then + grep_output_options="$grep_output_options -C $grep_output_context_num" + + if [ "$grep_output_context_num" -eq 1 ]; then + grep_output_description="$grep_output_description, 1 line of context" + else + grep_output_description="$grep_output_description, $grep_output_context_num lines of context" + fi + fi + + grep_output_options="$grep_output_options --color=$grep_output_color_when $grep_output_line_prefix" + +else + grep_output_options="-q" +fi + + +# Main + +if [ "$#" -gt 0 ]; then + is_first_search=1 + + if [ "$#" -gt 1 ]; then + output_name= + fi + + for path; do + if [ -z "$is_first_search" ]; then + log_info "====" + fi + + if [ -z "$output_name" ]; then + if [ "$path" = "-" ]; then + output_name="$stdin_name" + else + output_name="$path" + fi + fi + + search_path "$path" "$output_name" + + is_first_search= + output_name= + done + +else + search_path "-" "${output_name:-$stdin_name}" +fi |