1#!/bin/sh
2# Copyright (C) 2016 and later: Unicode, Inc. and others.
3# License & terms of use: http://www.unicode.org/copyright.html
4# Copyright (C) 2001-2010, International Business Machines
5#   Corporation and others.  All Rights Reserved.
6#
7# Authors:
8# Ami Fixler
9# Steven R. Loomis
10# George Rhoten
11#
12# Shell script to unpax ICU and convert the files to an EBCDIC codepage.
13# After extracting to EBCDIC, binary files are re-extracted without the
14# EBCDIC conversion, thus restoring them to original codepage.
15#
16# Set the following variable to the list of binary file suffixes (extensions)
17
18#ICU specific binary files
19#****************************************************************************
20binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML nrm NRM utf16be UTF16BE'
21data_files='icu/source/data/brkitr/* icu/source/data/locales/* icu/source/data/coll/* icu/source/data/rbnf/* icu/source/data/mappings/* icu/source/data/misc/* icu/source/data/translit/* icu/source/data/unidata/* icu/source/test/testdata/*'
22
23#****************************************************************************
24# Function:     usage
25# Description:  Prints out text that describes how to call this script
26# Input:        None
27# Output:       None
28#****************************************************************************
29usage()
30{
31    echo "Enter archive filename as a parameter: $0 icu-archive.tar"
32}
33
34#****************************************************************************
35# first make sure we at least one arg and it's a file we can read
36#****************************************************************************
37
38# check for no arguments
39if [ $# -eq 0 ]; then
40    usage
41    exit
42fi
43tar_file=$1
44if [ ! -r $tar_file ]; then
45    echo "$tar_file does not exist or cannot be read."
46    usage
47    exit
48fi
49
50echo ""
51echo "Extracting from $tar_file ..."
52echo ""
53# extract files while converting them to EBCDIC
54pax -rvf $tar_file -o to=IBM-1047,from=ISO8859-1 -o setfiletag
55
56#****************************************************************************
57# For files we have restored as CCSID 37, check the BOM to see if they    
58# should be processed as 819.  Also handle files with special paths. Files
59# that match will be added to binary files lists.  The lists will in turn
60# be processed to restore files as 819.
61#****************************************************************************
62echo ""
63echo "Determining binary files by BOM ..."
64echo ""
65
66# When building in ASCII mode, text files are converted as ASCII
67if [ "${ICU_ENABLE_ASCII_STRINGS}" -eq 1 ]; then
68    binary_suffixes="$binary_suffixes txt TXT ucm UCM"
69elif [ -f icu/as_is/bomlist.txt ];
70then
71    echo 'Using icu/as_is/bomlist.txt'
72    binary_files=$(cat icu/as_is/bomlist.txt)
73else
74    echo "Analyzing files .."
75	for file in `find ./icu \( -name \*.txt -print \) | sed -e 's/^\.\///'`; do
76		bom8=`head -c 3 $file|\
77			od -t x1|\
78			head -n 1|\
79			sed 's/  */ /g'|\
80			cut -f2-4 -d ' '|\
81			tr 'A-Z' 'a-z'`;
82		#Find a converted UTF-8 BOM
83		if [ "$bom8" = "57 8b ab" ]
84		then
85			binary_files="$binary_files $file";
86		fi
87	done
88fi
89
90echo "Looking for binary suffixes.."
91
92for i in $(pax -f $tar_file 2>/dev/null)
93do
94	case $i in
95	*/) ;;		# then this entry is a directory
96	*.*)		# then this entry has a dot in the filename
97		for j in $binary_suffixes
98		do
99			# We substitute the suffix more than once
100			# to handle files like NormalizationTest-3.2.0.txt
101			suf=${i#*.*}
102			suf=${suf#*.*}
103			suf=${suf#*.*}
104			if [ "$suf" = "$j" ]
105			then
106				binary_files="$binary_files $i"
107				break
108			fi
109		done
110		;;
111	*) ;;		# then this entry does not have a dot in it
112    esac
113done
114
115# now see if a re-extract of binary files is necessary
116if [ ${#binary_files} -eq 0 ]; then
117    echo ""
118    echo "There are no binary files to restore."
119else
120    echo "Restoring binary files ..."
121    echo ""
122    rm $binary_files
123    pax -rvf $tar_file $binary_files
124    # Tag the files as binary for proper interaction with the _BPXK_AUTOCVT
125    # environment setting
126    chtag -b $binary_files
127fi
128echo ""
129echo "$0 has completed extracting ICU from $tar_file."
130