BaseTools/Scripts: Add ConvertUni.py script

This script uses python codecs to convert .uni string files between
the utf-16 and utf-8 formats.

The advantages of utf-8 data:
 * Generally smaller files
 * More commonly supported by editors
 * Not treated as binary data in patch files

The script was tested on MdePkg with both python 2.7 and python 3.4.
It was able to convert all MdePkg .uni files between utf-8 and utf-16
multiple times always producing the same files for each format.

v2:
 * Rename ConvertUtf16ToUtf8.py to ConvertUni.py
 * Also support utf-8 to utf-16 conversion (with --utf-16)

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Jaben Carsey <jaben.carsey@intel.com>
Reviewed-by: Michael Kinney <michael.d.kinney@intel.com>
Reviewed-by: Yonghong Zhu <yonghong.zhu@intel.com>

git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@19247 6f19259b-4bc3-4df7-8a09-765794883524
This commit is contained in:
Jordan Justen 2015-12-15 04:50:50 +00:00 committed by jljusten
parent 3b567f08f9
commit 3f45c1375d
1 changed files with 137 additions and 0 deletions

137
BaseTools/Scripts/ConvertUni.py Executable file
View File

@ -0,0 +1,137 @@
## @file
# Check a patch for various format issues
#
# Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>
#
# This program and the accompanying materials are licensed and made
# available under the terms and conditions of the BSD License which
# accompanies this distribution. The full text of the license may be
# found at http://opensource.org/licenses/bsd-license.php
#
# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS"
# BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER
# EXPRESS OR IMPLIED.
#
from __future__ import print_function
VersionNumber = '0.1'
__copyright__ = "Copyright (c) 2015, Intel Corporation All rights reserved."
import argparse
import codecs
import os
import sys
try:
from io import StringIO
except ImportError:
from StringIO import StringIO
class ConvertOneArg:
"""Converts utf-16 to utf-8 for one command line argument.
This could be a single file, or a directory.
"""
def __init__(self, utf8, source):
self.utf8 = utf8
self.source = source
self.ok = True
if not os.path.exists(source):
self.ok = False
elif os.path.isdir(source):
for (root, dirs, files) in os.walk(source):
files = filter(lambda a: a.endswith('.uni'), files)
for filename in files:
path = os.path.join(root, filename)
self.ok &= self.convert_one_file(path)
if not self.ok:
break
if not self.ok:
break
else:
self.ok &= self.convert_one_file(source)
def convert_one_file(self, source):
if self.utf8:
new_enc, old_enc = 'utf-8', 'utf-16'
else:
new_enc, old_enc = 'utf-16', 'utf-8'
#
# Read file
#
f = open(source, mode='rb')
file_content = f.read()
f.close()
#
# Detect UTF-16 Byte Order Mark at beginning of file.
#
bom = (file_content.startswith(codecs.BOM_UTF16_BE) or
file_content.startswith(codecs.BOM_UTF16_LE))
if bom != self.utf8:
print("%s: already %s" % (source, new_enc))
return True
#
# Decode old string data
#
str_content = file_content.decode(old_enc, 'ignore')
#
# Encode new string data
#
new_content = str_content.encode(new_enc, 'ignore')
#
# Write converted data back to file
#
f = open(source, mode='wb')
f.write(new_content)
f.close()
print(source + ": converted, size", len(file_content), '=>', len(new_content))
return True
class ConvertUniApp:
"""Converts .uni files between utf-16 and utf-8."""
def __init__(self):
self.parse_options()
sources = self.args.source
self.ok = True
for patch in sources:
self.process_one_arg(patch)
if self.ok:
self.retval = 0
else:
self.retval = -1
def process_one_arg(self, arg):
self.ok &= ConvertOneArg(self.utf8, arg).ok
def parse_options(self):
parser = argparse.ArgumentParser(description=__copyright__)
parser.add_argument('--version', action='version',
version='%(prog)s ' + VersionNumber)
parser.add_argument('source', nargs='+',
help='[uni file | directory]')
group = parser.add_mutually_exclusive_group()
group.add_argument("--utf-8",
action="store_true",
help="Convert from utf-16 to utf-8 [default]")
group.add_argument("--utf-16",
action="store_true",
help="Convert from utf-8 to utf-16")
self.args = parser.parse_args()
self.utf8 = not self.args.utf_16
if __name__ == "__main__":
sys.exit(ConvertUniApp().retval)