# $NetBSD: tzdata2netbsd,v 1.12 2018/03/24 01:54:48 kre Exp $

# For use by NetBSD developers when updating to new versions of tzdata.
#
# 0. Be in an up-to-date checkout of src/external/public-domain/tz
#    from NetBSD-current.
# 1. Make sure that you have Paul Eggert's 4K RSA public key in your
#    keyring (62AA7E34, eggert@cs.ucla.edu)  It is not required that it be trusted.
# 2. Run this script.  You will be prompted for confirmation before
#    anything major (such as a cvs operation).  The tz versions can be
#    specified as args (new version first, and the previous second) if
#    needed to override the calculated values
# 3. If something fails, abort the script and fix it.
# 4. Re-run this script until you are happy.  It's designed to
#    be re-run over and over, and later runs will try not to
#    redo non-trivial work done by earlier runs.
#

VERS_PATTERN='2[0-9][0-9][0-9][a-z]'
# This needs to be updated twice every millennium to allow for the
# new millenium's years.
# First in the late xx90's sometime, allow the new one by changing the leading
# digit from a specific value to the class containing the current and
# following values (eg: in 2098 or so, change '2' to be '[23]').
# Tnen in the following early xx00's sometime, delete the class, and insert
# leave only the current value as valid (eg: in 3001 or 3002,
# change '[23]' to be just '3'
# Doing it this way helps guard against invalid specifications.
# We could automate this, but it is (IMO) not worth the cost, to avoid a
# twice a millenium edit requirement.
# A more significant (and harder) change will be needed in the late 9990's
# If this script lasts until then, send me a postcard, I'll be waiting for it!
# Things get easier again after that until the late 99990's (etc.)

# Note the pattern is used on both the old and new version specifiers,
# so it must be able to cope with the shift from one form (eg 2999g)
# to the new one (eg: 3000a) without failing (or the code that uses it
# below needs to be updated).

# Also note that the option of having a second alpha (1997aa or something)
# to handle years with much activity is handled below, the pattern does not
# need to match those.
# If that convention changes (as of date of writing, it has never been
# exercised) then code changes below will be required.

DIST_HOST=ftp.iana.org
DIST_PATH=tz
DIST_FILES=releases

EDITOR=${EDITOR:-vi}
WORK_PFX=$(pwd)/update-work || fail "Cannot obtain PWD"
UPDATE_FROM=${WORK_PFX}/updating.from.version

usage()
{
	printf >&2 '%s\n'						\
		"Usage: $0 [new-version-id [old-version-id]]"		\
		"     where a version-id is of the form YYYYx (eg: 2018c)" \
		"     or '' for new-version-id (to specify only the old)"
	exit 2
}

fail()
{
	local IFS=' '

	printf >&2 '%s\n'	"Error detected:" "   $*" "Aborting."
	exit 1
}

valid_vers()
{
	case "$2" in
	${VERS_PATTERN} | ${VERS_PATTERN}[a-z] )
		;;
	*)	printf >&2 '%s: %s\n' \
		    "Bad form for $1 version specifier '$2'" \
		    "should (usually) be 'YYYYx'"
		return 1
		;;
	esac
	return 0
}

get_curvers()
{
	local LF=''
	local LIST=iana-listing
	local SED_SCRIPT='
		/tzdata-latest.*-> /{
			s/^.*-> //
			s/\..*$//
			s;^releases/tzdata;;p
			q
		}
		d'

	test -d "${WORK_PFX}" &&
		test -s "${WORK_PFX}/${LIST}" &&
		test "${WORK_PFX}/${LIST}" -nt dist/CVS &&
		LF=$(find "${WORK_PFX}" -name "${LIST}" -mtime -1 -print) &&
		test -n "${LF}" &&
		NEWVER=$(sed -n < "${LF}" "${SED_SCRIPT}") &&
		valid_vers new "${NEWVER}"				||

	ftp >/dev/null 2>&1 -ia "${DIST_HOST}" <<- EOF &&
					dir ${DIST_PATH} ${WORK_PFX}/${LIST}
					quit
					EOF
		test -s "${WORK_PFX}/${LIST}" &&
		NEWVER=$(sed -n < "${WORK_PFX}/${LIST}" "${SED_SCRIPT}") &&
		valid_vers new "${NEWVER}"				||

	{
		rm -f "${WORK_PFX}/${LIST}"
		fail "Cannot fetch current tzdata version from ${DIST_HOST}"
	}

	printf '%s\n' "Updating from ${OLDVER} to ${NEWVER}"
}

argparse()
{
	local OVF

	if OVF=$(find "${WORK_PFX}" -name "${UPDATE_FROM##*/}" -mtime +2 -print)
	then
		# delete anything old
		test -n "${OVF}" && rm -f "${OVF}"
	fi

	case "$#" in
	0|1)
		# once we have obtained OLDVER once, never guess it again.
		test -f "${UPDATE_FROM}" && OLDVER=$(cat "${UPDATE_FROM}") ||

		OLDVER=$(cat dist/version) || {
			printf >&2 '%s\n'  \
			    "Cannot determine current installed version"  \
			    "Specify it on the command line."  \
			    ""
			usage
		}

		valid_vers old "${OLDVER}" ||
			fail "Calculated bad OLDVER, give as 2nd arg"
		;;

	2)	valid_vers old "$2" && OLDVER="$2" || usage
		;;

	*)	usage
		;;
	esac

	case "$#:$1" in
	0: | 1: | 2: )
		;;
	1:?*|2:?*)	
		valid_vers new "$1" && NEWVER="$1" || usage
		;;
	*)	usage
		;;
	esac

	test -z "${NEWVER}" && get_curvers

	test "${NEWVER}" = "${OLDVER}" && {
		printf '%s\n' \
		     "New and old versions both ${NEWVER}: nothing to do"
		exit 0

	}

	printf '%s\n' "${OLDVER}" > "${UPDATE_FROM}" ||
	    fail "Unable to preserve old version ${OLDVER} in ${UPDATE_FROM}"

	test "${#NEWVER}" -gt "${#OLDVER}" ||
		test "${NEWVER}" '>' "${OLDVER}" ||
		{
			local reply

			printf '%s\n' \
			    "Update would revert ${OLDVER} to ${NEWVER}"
			read -p "Is reversion intended? " reply
			case "${reply}" in
			[Yy]*)	;;
			*)	printf '%s\n' OK. Aborted.
				rm -f "${UPDATE_FROM}"
				exit 1
				;;
			esac
		}

	return 0
}

setup_versions()
{
	# Uppercase variants of OLDVER and NEWVER
	OLDVER_UC="$( echo "${OLDVER}" | tr '[a-z]' '[A-Z]' )"
	NEWVER_UC="$( echo "${NEWVER}" | tr '[a-z]' '[A-Z]' )"

	# Tags for use with version control systems
	CVSOLDTAG="TZDATA${OLDVER_UC}"
	CVSNEWTAG="TZDATA${NEWVER_UC}"
	CVSBRANCHTAG="TZDATA"
	GITHUBTAG="${NEWVER}"

	# URLs for fetching distribution files, etc.
	DISTURL="ftp://${DIST_HOST}/${DIST_PATH}/${DIST_FILES}"
	DISTURL="${DISTURL}/tzdata${NEWVER}.tar.gz"
	SIGURL="${DISTURL}.asc"
	NEWSURL="https://github.com/eggert/tz/raw/${GITHUBTAG}/NEWS"

	# Directories
	REPODIR="src/external/public-domain/tz/dist"
				# relative to the NetBSD CVS repo
	TZDISTDIR="$(pwd)/dist" # should be .../external/public-domain/tz/dist
	WORKDIR="${WORK_PFX}/${NEWVER}"
	EXTRACTDIR="${WORKDIR}/extract"

	# Files in the work directory
	DISTFILE="${WORKDIR}/${DISTURL##*/}"
	SIGFILE="${DISTFILE}.asc"
	PGPVERIFYLOG="${WORKDIR}/pgpverify.log"
	NEWSFILE="${WORKDIR}/NEWS"
	NEWSTRIMFILE="${WORKDIR}/NEWS.trimmed"
	IMPORTMSGFILE="${WORKDIR}/import.msg"
	IMPORTDONEFILE="${WORKDIR}/import.done"
	MERGSMSGFILE="${WORKDIR}/merge.msg"
	MERGEDONEFILE="${WORKDIR}/merge.done"
	COMMITMERGEDONEFILE="${WORKDIR}/commitmerge.done"

	printf '%s\n' "${CVSOLDTAG}"  > "${WORK_PFX}/updating_from"
}

DOIT()
{
	local really_do_it=false
	local reply

	echo "In directory $(pwd)"
	echo "ABOUT TO DO:" "$(shell_quote "$@")"
	read -p "Really do it? [yes/no/quit] " reply
	case "${reply}" in
	[yY]*)	really_do_it=true ;;
	[nN]*)	really_do_it=false ;;
	[qQ]*)
		echo "Aborting"
		return 1
		;;
	*)	echo "Huh?"
		return 1
		;;
	esac
	if $really_do_it; then
		echo "REALLY DOING IT NOW..."
		"$@"
	else
		echo "NOT REALLY DOING THE ABOVE COMMAND"
	fi
}

# Quote args to make them safe in the shell.
# Usage: quotedlist="$(shell_quote args...)"
#
# After building up a quoted list, use it by evaling it inside
# double quotes, like this:
#    eval "set -- $quotedlist"
# or like this:
#    eval "\$command $quotedlist \$filename"
#
shell_quote()
(
	local result=''
	local arg qarg
	LC_COLLATE=C ; export LC_COLLATE # so [a-zA-Z0-9] works in ASCII
	for arg in "$@" ; do
		case "${arg}" in
		'')
			qarg="''"
			;;
		*[!-./a-zA-Z0-9]*)
			# Convert each embedded ' to '\'',
			# then insert ' at the beginning of the first line,
			# and append ' at the end of the last line.
			# Finally, elide unnecessary '' pairs at the
			# beginning and end of the result and as part of
			# '\'''\'' sequences that result from multiple
			# adjacent quotes in he input.
			qarg="$(printf '%s\n' "$arg" | \
			    ${SED:-sed} -e "s/'/'\\\\''/g" \
				-e "1s/^/'/" -e "\$s/\$/'/" \
				-e "1s/^''//" -e "\$s/''\$//" \
				-e "s/'''/'/g"
				)"
			;;
		*)
			# Arg is not the empty string, and does not contain
			# any unsafe characters.  Leave it unchanged for
			# readability.
			qarg="${arg}"
			;;
		esac
		result="${result}${result:+ }${qarg}"
	done
	printf '%s\n' "$result"
)

validate_pwd()
{
	local P="$(pwd)" || return 1

	test -d "${P}" &&
	    test -d "${P}/CVS" &&
	    test -d "${P}/dist" &&
	    test -f "${P}/dist/zone.tab" &&
	    test -f "${P}/tzdata2netbsd" || {
		printf >&2 '%s\n' "Please change to the correct directory"
		return 1
	}
	
	test -f "${P}/CVS/Tag" && {

		# Here (for local use only) if needed for private branch work
		# insert tests for the value of $(cat "${P}/CVS/Tag") and
		# allow your private branch tag to pass. Eg:

		#	case "$(cat "${P}/CVS/Tag")" in
		#	my-branch-name)	return 0;;
		#	esac

		# Do not commit a version of this script modified that way,
		# (not even on the private branch) - keep it as a local
		# modified file.  (This script will not commit it.)

		printf >&2 '%s\n' \
		    "This script should be run in a checkout of HEAD only"
		return 1
	}

	return 0
}

findcvsroot()
{
	[ -n "${CVSROOT}" ] && return 0
	CVSROOT="$( cat ./CVS/Root )"
	[ -n "${CVSROOT}" ] && return 0
	echo >&2 "Failed to set CVSROOT value"
	return 1
}

mkworkpfx()
{
	mkdir -p "${WORK_PFX}" || fail "Unable to make missing ${WORK_PFX}"
}
mkworkdir()
{
	mkdir -p "${WORKDIR}" || fail "Unable to make missing ${WORKDIR}"
}

fetch()
{
	[ -f "${DISTFILE}" ] || ftp -o "${DISTFILE}" "${DISTURL}" ||
		fail "fetch of ${DISTFILE} failed"
	[ -f "${SIGFILE}" ] || ftp -o "${SIGFILE}" "${SIGURL}" ||
		fail "fetch of ${SIGFILE} failed"
	[ -f "${NEWSFILE}" ] || ftp -o "${NEWSFILE}" "${NEWSURL}" ||
		fail "fetch of ${NEWSFILE} failed"
}

checksig()
{
	{ gpg --verify "${SIGFILE}" "${DISTFILE}"
	  echo gpg exit status $?
	} 2>&1 | tee "${PGPVERIFYLOG}"

	# The output should contain lines that match all the following regexps
	#
	while read line; do
		if ! grep -E -q -e "^${line}\$" "${PGPVERIFYLOG}"; then
			echo >&2 "Failed to verify signature: ${line}"
			return 1
		fi
	done <<'EOF'
gpg: Signature made .* using RSA key ID (62AA7E34|44AD418C)
gpg: Good signature from "Paul Eggert <eggert@cs.ucla.edu>"
gpg exit status 0
EOF
}

extract()
{
	[ -f "${EXTRACTDIR}/zone.tab" ] && return
	mkdir -p "${EXTRACTDIR}"
	tar -z -xf "${DISTFILE}" -C "${EXTRACTDIR}"
}

addnews()
{
	[ -f "${EXTRACTDIR}/NEWS" ] && return
	cp -p "${NEWSFILE}" "${EXTRACTDIR}"/NEWS
}

# Find the relevant part of the NEWS file for all releases between
# OLDVER and NEWVER, and save them to NEWSTRIMFILE.
#
trimnews()
{
	[ -s "${NEWSTRIMFILE}" ] && return
	awk -v oldver="${OLDVER}" -v newver="${NEWVER}" \
	    '
		BEGIN {inrange = 0}
		/^Release [0-9]+[a-z]+ - .*/ {
			# "Release <version> - <date>"
			# Note: must handle transition from 2018z to 2018aa
			# Assumptions: OLDVER and NEWVER have been sanitized,
			# and format of NEWS file does not alter (and
			# contains valid data)
			inrange = ((length($2) > length(oldver) || \
					$2 > oldver) && \
				(length($2) < newver || $2 <= newver))
		}
		// { if (inrange) print; }
	    ' \
		<"${NEWSFILE}" >"${NEWSTRIMFILE}"
	echo "tzdata-${NEWVER}" > ${TZDISTDIR}/TZDATA_VERSION
}

# Create IMPORTMSGFILE from NEWSTRIMFILE, by ignoring some sections,
# keeping only the first sentence from paragraphs in other sections,
# and changing the format.
#
# The result should be edited by hand before performing a cvs commit.
# A message to that effect is inserted at the beginning of the file.
#
mkimportmsg()
{
	[ -s "${IMPORTMSGFILE}" ] && return
	{ cat <<EOF
EDIT ME: Edit this file and then delete the lines marked "EDIT ME".
EDIT ME: This file will be used as a log message for the "cvs commit" that
EDIT ME: imports tzdata${NEWVER}.  The initial contents of this file were
EDIT ME: generated from ${NEWSFILE}.
EDIT ME: 
EOF
	awk -v oldver="${OLDVER}" -v newver="${NEWVER}" \
	    -v disturl="${DISTURL}" -v newsurl="${NEWSURL}" \
	    '
		BEGIN {
			bullet = "  * ";
			indent = "    ";
			blankline = 0;
			goodsection = 0;
			havesentence = 0;
			print "Import tzdata"newver" from "disturl;
			#print "and NEWS file from "newsurl;
		}
		/^Release/ {
			# "Release <version> - <date>"
			ver = $2;
			date = gensub(".* - ", "", 1, $0);
			print "";
			print "Summary of changes in tzdata"ver \
				" ("date"):";
		}
		/^$/ { blankline = 1; havesentence = 0; }
		/^  Changes affecting/ { goodsection = 0; }
		/^  Changes affecting.*time/ { goodsection = 1; }
		/^  Changes affecting.*data/ { goodsection = 1; }
		/^  Changes affecting.*documentation/ || \
		/^  Changes affecting.*commentary/ {
			t = gensub("^ *", "", 1, $0);
			t = gensub("\\.*$", ".", 1, t);
			print bullet t;
			goodsection = 0;
		}
		/^    .*/ && goodsection {
			# In a paragraph in a "good" section.
			# Ignore leading spaces, and ignore anything
			# after the first sentence.
			# First line of paragraph gets a bullet.
			t = gensub("^ *", "", 1, $0);
			t = gensub("\\. .*", ".", 1, t);
			if (blankline) print bullet t;
			else if (! havesentence) print indent t;
			havesentence = (havesentence || (t ~ "\\.$"));
		}
		/./ { blankline = 0; }
	    ' \
		<"${NEWSTRIMFILE}"
	} >"${IMPORTMSGFILE}"
}

editimportmsg()
{
	if [ -s "${IMPORTMSGFILE}" ] \
	&& ! grep -q '^EDIT' "${IMPORTMSGFILE}"
	then
		return 0 # file has already been edited
	fi
	# Pass both IMPORTMSGFILE and NEWSFILE to the editor, so that the
	# user can easily consult NEWSFILE while editing IMPORTMSGFILE.
	${EDITOR} "${IMPORTMSGFILE}" "${NEWSFILE}"
}

cvsimport()
{
	if [ -e "${IMPORTDONEFILE}" ]; then
		cat >&2 <<EOF
The CVS import has already been performed.
EOF
		return 0
	fi
	if ! [ -s "${IMPORTMSGFILE}" ] \
	|| grep -q '^EDIT' "${IMPORTMSGFILE}"
	then
		cat >&2 <<EOF
The message file ${IMPORTMSGFILE}
has not been properly edited.
Not performing cvs import.
EOF
		return 1
	fi
	( cd "${EXTRACTDIR}" &&
	  DOIT cvs -d "${CVSROOT}" import -m "$(cat "${IMPORTMSGFILE}")" \
		"${REPODIR}" "${CVSBRANCHTAG}" "${CVSNEWTAG}"
	) && touch "${IMPORTDONEFILE}"
}

cvsmerge()
{

	cd "${TZDISTDIR}" || exit 1
	if [ -e "${MERGEDONEFILE}" ]; then
		cat >&2 <<EOF
The CVS merge has already been performed.
EOF
		return 0
	fi
	DOIT cvs -d "${CVSROOT}" update -j"${CVSOLDTAG}" -j"${CVSNEWTAG}" \
	&& touch "${MERGEDONEFILE}"
}

resolveconflicts()
{
	cd "${TZDISTDIR}" || exit 1
	if grep -l '^[<=>][<=>][<=>]' *
	then
		cat <<EOF
There appear to be conflicts in the files listed above.
Resolve conflicts, then re-run this script.
EOF
		return 1
	fi
}

cvscommitmerge()
{
	cd "${TZDISTDIR}" || exit 1
	if grep -l '^[<=>][<=>][<=>]' *
	then
		cat >&2 <<EOF
There still appear to be conflicts in the files listed above.
Not performing cvs commit.
EOF
		return 1
	fi
	if [ -e "${COMMITMERGEDONEFILE}" ]; then
		cat >&2 <<EOF
The CVS commmit (of the merge result) has already been performed.
EOF
		return 0
	fi
	DOIT cvs -d "${CVSROOT}" commit -m "Merge tzdata${NEWVER}" \
	&& touch "${COMMITMERGEDONEFILE}"
}

extra()
{
	cat <<EOF
Also do the following:
 * Edit and commit  src/doc/3RDPARTY
 * Edit and commit  src/doc/CHANGES
 * Edit and commit  src/distrib/sets/lists/base/mi
 *      if the set of installed files altered.
 * Submit pullup requests for all active release branches.
 * rm -rf ${WORK_PFX}  (optional)
 * Verify that
  ${UPDATE_FROM}
 * no longer exists.
EOF
}

main()
{
	set -e
	validate_pwd
	findcvsroot
	mkworkpfx
	argparse "$@"
	setup_versions
	mkworkdir
	fetch
	checksig
	extract
	addnews
	trimnews
	mkimportmsg
	editimportmsg
	cvsimport
	cvsmerge
	resolveconflicts
	cvscommitmerge
	rm -f "${UPDATE_FROM}"
	extra
}

main "$@"