verify-digests.sh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175

#!/bin/bash
# Name: verify-digests.sh
# Title: Gentoo Linux release digest verification
# Author: Robin H Johnson <robbat2@gentoo.org>
# Copyright 2016 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
#
# Description:
# This script exists to help mirrors verify raw digests of release files, to
# detect possible disk and filesystem corruptions.  By design, it does NOT check
# GPG signatures.
#
# Usage:
# verify-digests.sh [FILES-OR-DIRECTORIES...]
#
# If passed a digest file:
# - it will be checked.
# If passed a non-digest file:
# - that immediate directory will be checked for all digest files.
# If passed a directory:
# - it and all subdirs will be checked for all digest files.
# If passed no arguments:
# - it will act like the directory '.' was passed.
#
# Return value:
# On success, exits zero.
# On failures, exits non-zero, and writes a file of errors to $TMPDIR.


# Take Gentoo digest files and convert to a plain BSD-format digest file.
# - strip any PGP signing
# - pass existing BSD-format digest
# - convert coreutils-format to BSD-format
transform_digest() {
	sed -n -r \
		-e '/BEGIN (PGP|GPG) SIGNED MESSAGE/,/^$/d' \
		-e '/BEGIN (PGP|GPG) SIGNATURE/,/END (PGP|GPG) SIGNATURE/{d}' \
		-e 'p' \
	| \
	awk \
		-e '/^# .* HASH$/{hash=$2}' \
		-e '(hash=="BLAKE2B"){hash="BLAKE2b"}' \
		-e '(hash=="BLAKE2S"){hash="BLAKE2s"}' \
		-e '/^[[:xdigit:]]+[[:space:]]+.+/{if(hash != ""){printf "%s (%s) = %s\n",hash,$2,$1}}' \
		-e '/^([A-Z]+[0-9A-Za-z-]+) \(.*\) = [[:xdigit:]]+/{print $0}' \
		-e '/^([A-Z]+[0-9A-Za-z-]+) [[:xdigit:]]+ [^[:space:]]+$/{ printf "%s (%s) = %s\n",$1,$3,$2; }'
}

# Pass all directory arguments to find
# Keep all file arguments as-is (so you can pass .asc files directly)
DIGESTS_ARGS=( )
DIGESTS_FIND=( )
if [[ ${#@} -eq 0 ]]; then
	DIGESTS_FIND+=( . )
else
	for f in "${@}" ; do
		if [ -d "$f" ]; then
			DIGESTS_FIND+=( "$f" )
		else
			DIGESTS_ARGS+=( "$f" )
		fi
	done
fi

# Check if non-dir arguments were digest files or files that you want to get checked
DIGESTS_ARGS2=( )
for f in "${DIGESTS_ARGS[@]}" ; do
	if [[ "${f/DIGEST}" != "$f" ]] || grep -sq -m 1 -E -e '# ([A-Z]+[0-9A-Za-z-]+) HASH' -e ') = [0-9a-f]\+' "$f"; then
		DIGESTS_ARGS2+=( "$f" )
	else
		d=$( dirname "$f" )
		DIGESTS_FIND2=( )
		readarray -t DIGESTS_FIND2 <<< "$(find "$d" -maxdepth 1  ! -type d \( -name '*.DIGESTS' -o -name '*.DIGESTS.asc' \) | fmt -1 |sort | uniq)"
		DIGESTS_ARGS2+=( "${DIGESTS_FIND2[@]}" )
		DIGESTS_FIND2=( )
	fi
done
if [[ "${#DIGESTS_FIND[@]}" -gt 0 ]]; then
	readarray -t DIGESTS_FIND <<< "$(find "${DIGESTS_FIND[@]}" ! -type d \( -name '*.DIGESTS' -o -name '*.DIGESTS.asc' \) | fmt -1 | sort | uniq )"
fi
# merge all items
DIGESTS=( "${DIGESTS_ARGS2[@]}" "${DIGESTS_FIND[@]}" )


# Prefer signed digests where possible, but sometimes they were in the original
# .DIGESTS file, and other times there was a seperate .asc file.
DIGESTS2="$(echo "${DIGESTS[@]}" | fmt -1 |sed '/.asc$/s/.asc$//' | sort | uniq)"
DIGESTS=( )
for d in ${DIGESTS2} ; do
	if [[ -e "${d}" ]] && [[ -e "${d}.asc" ]]; then
		# split signed
		DIGESTS+=( "${d}.asc" )
	elif [[ ! -e "${d}" ]] && [[ -e "${d}.asc" ]]; then
		# cleansigned, with extension
		DIGESTS+=( "${d}.asc" )
	elif [[ -e "${d}" ]] && [[ ! -e "${d}.asc" ]]; then
		# cleansigned, no extension
		DIGESTS+=( "${d}" )
	fi
done


# Setup storage for digest conversion & results
T=$(date -u +%Y%m%dT%H%M%SZ)
tmp1=$(mktemp --tmpdir)
tmp2=$(mktemp --tmpdir)
failures=$(mktemp --tmpdir "gentoo-failures.$T.XXXXXXXXXX")
trap 'rm -f "${tmp1}" "${tmp2}"' SIGINT SIGTERM EXIT

# Now check them
failed_digests=()
for d in $(echo "${DIGESTS[@]}" | fmt -1 | sort | uniq); do
  sleep 0.01
  echo -n "Checking digests from $d: "
  transform_digest < "$d" >"$tmp1"
  # add leading & trailing space to match
  hashes=" $(awk '{print $1}' "$tmp1" | sort | uniq ) "
  checked=0
  found=0
  # order by strength
  for h in BLAKE2B SHA3-512 WHIRLPOOL SHA512 SHA384 SHA256 SHA224 ; do
	  sleep 0.01
	  [[ "$found" -eq 1 ]] && break
	  if [[ "${hashes/$h}" != "${hashes}" ]]; then
		  found=1
		  echo "using $h"
		  pushd "$(dirname "$d")" >/dev/null
		  cmd=${h}sum
		  cmd=${cmd,,}
		  # Special case, the tool name is different than the hash.
		  case ${h,,} in
			  blake2b) cmd='b2sum' ;;
			  blake2s) cmd='b2sum' ;;
		  esac
		  # Check we have the tooling to validate
		  if command -v "$cmd" >/dev/null; then
			  :
		  elif command -v rhash >/dev/null ; then
			  cmd=rhash
		  else
			  echo "Could not find $cmd or rhash to verify ${h} hashes" 1>&2
			  continue
		  fi
		  # Run the tooling now.
		  # TODO: if we assume rhash is available always, it could check all the hashes at once
		  # but that means rewriting this loop of strength-ordering
		  grep "^$h " "$tmp1" | ionice -c 3 --ignore "${cmd}" -c - | tee "$tmp2"
		  rc=${PIPESTATUS[1]}
		  if [ "$rc" -ne 0 ]; then
			  failed_digests+=("$d")
			  cat "$tmp2" >> "$failures"
		  fi
		  checked=1
		  popd >/dev/null
	  fi
  done
  if [[ $checked -eq 0 ]]; then
	  echo " FAIL - no usable digest"
  fi
done

# Handle output of errors
if [[ "${#failed_digests[@]}" -eq 0 ]]; then
	exit 0
else
	echo "----"
	echo "Failures detected in the following DIGESTS:" 1>&2
	for f in "${failed_digests[@]}"; do
		echo "$f" 1>&2
	done
	echo "----" 1>&2
	echo "Complete output of failed DIGESTS, stored in $failures:" 1>&2
	cat "$failures" 1>&2
	exit 1
fi