1+ #! /usr/bin/env bash
2+
3+ # summary of a repo
4+
5+ # https://github.com/tj/git-extras/blob/main/bin/git-summary
6+ # https://stackoverflow.com/questions/1265040/how-to-count-total-lines-changed-by-a-specific-author-in-a-git-repository
7+
8+
9+ # cd "$(git root)" || { echo "Can't cd to top level directory";exit 1; }
10+
11+ PROJECT_FULL_PATH=
12+ SUMMARY_BY_LINE=
13+ DEDUP_BY_EMAIL=
14+ MERGES_ARG=
15+ OUTPUT_STYLE=
16+ for arg in " $@ " ; do
17+ case " $arg " in
18+ --full-path)
19+ PROJECT_FULL_PATH=1
20+ ;;
21+ --line)
22+ SUMMARY_BY_LINE=1
23+ ;;
24+ --dedup-by-email)
25+ DEDUP_BY_EMAIL=1
26+ ;;
27+ --no-merges)
28+ MERGES_ARG=" --no-merges"
29+ ;;
30+ --output-style)
31+ OUTPUT_STYLE=" $2 "
32+ shift
33+ ;;
34+ -* )
35+ >&2 echo " unknown argument $arg found"
36+ exit 1
37+ ;;
38+ * )
39+ # set the argument back
40+ set -- " $@ " " $arg "
41+ ;;
42+ esac
43+
44+ shift
45+ done
46+
47+ if [ -n " $DEDUP_BY_EMAIL " ] && [ -n " $SUMMARY_BY_LINE " ]; then
48+ >&2 echo " --dedup-by-email used with --line is not supported"
49+ exit 1
50+ fi
51+
52+ if [ -n " $MERGES_ARG " ] && [ -n " $SUMMARY_BY_LINE " ]; then
53+ >&2 echo " --no-merges used with --line is not supported"
54+ exit 1
55+ fi
56+
57+ commit=" HEAD"
58+ if [ -n " $SUMMARY_BY_LINE " ]; then
59+ paths=( " $@ " )
60+ else
61+ [ $# -ne 0 ] && commit=$*
62+ fi
63+
64+ if [[ -n " $PROJECT_FULL_PATH " ]]; then
65+ project=${PWD/ ${HOME} / \~ }
66+ else
67+ project=${PWD##*/ }
68+ fi
69+
70+ #
71+ # get date for the given <commit>
72+ #
73+ commit_date () {
74+ # the $1 can be empty
75+ # shellcheck disable=SC2086
76+ git log $MERGES_ARG --pretty=' format: %ai' " $1 " | cut -d ' ' -f 2
77+ }
78+
79+ #
80+ # get active days for the given <commit>
81+ #
82+ active_days () {
83+ commit_date " $1 " | sort -r | uniq | awk '
84+ { sum += 1 }
85+ END { print sum }
86+ '
87+ }
88+
89+ #
90+ # get the commit total
91+ #
92+ commit_count () {
93+ # shellcheck disable=SC2086
94+ git rev-list $MERGES_ARG --count " $commit "
95+ }
96+
97+ #
98+ # total file count
99+ #
100+ file_count () {
101+ git ls-files | wc -l | tr -d ' '
102+ }
103+
104+ #
105+ # remove duplicate authors who belong to the same email address
106+ #
107+ dedup_by_email () {
108+ # in:
109+ # 27 luo zexuan <LuoZexuan@xxx.com>
110+ # 7 罗泽轩 <luozexuan@xxx.com>
111+ # out:
112+ # 34 luo zexuan
113+ LC_ALL=C awk '
114+ {
115+ sum += $1
116+ last_field = tolower($NF)
117+ if (last_field in emails) {
118+ emails[last_field] += $1
119+ } else {
120+ email = last_field
121+ emails[email] = $1
122+ # set commits/email to empty
123+ $1=$NF=""
124+ sub(/^[[:space:]]+/, "", $0)
125+ sub(/[[:space:]]+$/, "", $0)
126+ name = $0
127+ if (name in names) {
128+ # when the same name is associated with existed email,
129+ # merge the previous email into the later one.
130+ emails[email] += emails[names[name]]
131+ emails[names[name]] = 0
132+ }
133+ names[name] = email
134+ }
135+ }
136+ END {
137+ for (name in names) {
138+ email = names[name]
139+ printf "%6d\t%s\n", emails[email], name
140+ }
141+ }' | sort -rn -k 1
142+ }
143+
144+ #
145+ # list authors
146+ #
147+ format_authors () {
148+ # a rare unicode character is used as separator to avoid conflicting with
149+ # author name. However, Linux column utility will escape tab if separator
150+ # specified, so we do unesaping after it.
151+ LC_ALL=C awk '
152+ { args[NR] = $0; sum += $0 }
153+ END {
154+ for (i = 1; i <= NR; ++i) {
155+ printf "%s♪%2.1f%%\n", args[i], 100 * args[i] / sum
156+ }
157+ }
158+ ' | column -t -s♪ | sed " s/\\ \x09/\t/g"
159+ }
160+
161+ #
162+ # fetch repository age from oldest commit
163+ #
164+ repository_age () {
165+ git log --reverse --pretty=oneline --format=" %ar" | head -n 1 | LC_ALL=C sed ' s/ago//'
166+ }
167+
168+ #
169+ # fetch repository age of the latest commit
170+ #
171+ last_active () {
172+ git log --pretty=oneline --format=" %ar" -n 1
173+ }
174+
175+ #
176+ # list the last modified author for each line
177+ #
178+ single_file () {
179+ while read -r data
180+ do
181+ if [[ $( file " $data " ) = * text* ]]; then
182+ git blame --line-porcelain " $data " 2> /dev/null | grep " ^author " | LC_ALL=C sed -n ' s/^author //p' ;
183+ fi
184+ done
185+ }
186+
187+ current_branch_name () {
188+ git rev-parse --abbrev-ref HEAD
189+ }
190+
191+ #
192+ # list the author for all file
193+ #
194+ lines () {
195+ git ls-files -- " $@ " | single_file
196+ }
197+
198+ #
199+ # get the number of the lines
200+ #
201+ line_count () {
202+ lines " $@ " | wc -l
203+ }
204+
205+ uncommitted_changes_count () {
206+ git status --porcelain | wc -l
207+ }
208+
209+
210+ COLUMN_CMD_DELIMTER=" ¬" # Hopefully, this symbol is not used in branch names... I use it as a separator for columns
211+ SP=" $COLUMN_CMD_DELIMTER |"
212+
213+ print_summary_by_line () {
214+ if [ " $OUTPUT_STYLE " = " tabular" ]; then
215+ tabular_headers=" # Repo $SP Lines"
216+ echo -e " $tabular_headers \n$project $SP $( line_count " ${paths[@]} " ) " | column -t -s " $COLUMN_CMD_DELIMTER "
217+ elif [ " $OUTPUT_STYLE " = " oneline" ]; then
218+ echo " $project / lines: $( line_count " ${paths[@]} " ) "
219+ elif [ -n " $SUMMARY_BY_LINE " ]; then
220+ echo
221+ echo " project : $project "
222+ echo " lines : $( line_count " ${paths[@]} " ) "
223+ echo " authors :"
224+ lines " ${paths[@]} " | sort | uniq -c | sort -rn | format_authors
225+ fi
226+ }
227+
228+ print_summary () {
229+ if [ " $OUTPUT_STYLE " = " tabular" ]; then
230+ tabular_headers=" # Repo $SP Age $SP Last active $SP Active on $SP Commits $SP Uncommitted $SP Branch"
231+ echo -e " $tabular_headers \n$project $SP $( repository_age) $SP $( last_active) $SP $( active_days " $commit " ) days $SP $( commit_count " $commit " ) $SP $( uncommitted_changes_count) $SP $( current_branch_name) " | column -t -s " $COLUMN_CMD_DELIMTER "
232+ elif [ " $OUTPUT_STYLE " = " oneline" ]; then
233+ echo " $project / age: $( repository_age) / last active: $( last_active) / active on $( active_days " $commit " ) days / commits: $( commit_count " $commit " ) / uncommitted: $( uncommitted_changes_count) / branch: $( current_branch_name) "
234+ else
235+ echo
236+ echo " project : $project "
237+ echo " repo age : $( repository_age) "
238+ echo " branch: : $( current_branch_name) "
239+ echo " last active : $( last_active) "
240+ echo " active on : $( active_days " $commit " ) days"
241+ echo " commits : $( commit_count " $commit " ) "
242+
243+ # The file count doesn't support passing a git ref so ignore it if a ref is given
244+ if [ " $commit " = " HEAD" ]; then
245+ echo " files : $( file_count) "
246+ fi
247+ echo " uncommitted : $( uncommitted_changes_count) "
248+ echo " authors : "
249+ if [ -n " $DEDUP_BY_EMAIL " ]; then
250+ # the $commit can be empty
251+ # shellcheck disable=SC2086
252+ git shortlog $MERGES_ARG -n -s -e " $commit " | dedup_by_email | format_authors
253+ else
254+ # shellcheck disable=SC2086
255+ git shortlog $MERGES_ARG -n -s " $commit " | format_authors
256+ fi
257+ fi
258+ }
259+
260+ if [ -n " $SUMMARY_BY_LINE " ]; then
261+ print_summary_by_line
262+ else
263+ print_summary
264+ fi
0 commit comments