audk/BaseTools/Tests/CheckUnicodeSourceFiles.py
Michael D Kinney 2e351cbe8e BaseTools: Replace BSD License with BSD+Patent License
https://bugzilla.tianocore.org/show_bug.cgi?id=1373

Replace BSD 2-Clause License with BSD+Patent License.  This change is
based on the following emails:

  https://lists.01.org/pipermail/edk2-devel/2019-February/036260.html
  https://lists.01.org/pipermail/edk2-devel/2018-October/030385.html

RFCs with detailed process for the license change:

  V3: https://lists.01.org/pipermail/edk2-devel/2019-March/038116.html
  V2: https://lists.01.org/pipermail/edk2-devel/2019-March/037669.html
  V1: https://lists.01.org/pipermail/edk2-devel/2019-March/037500.html

Contributed-under: TianoCore Contribution Agreement 1.1
Signed-off-by: Michael D Kinney <michael.d.kinney@intel.com>
Reviewed-by: Bob Feng <bob.c.feng@intel.com>
2019-04-09 09:10:20 -07:00

176 lines
5.6 KiB
Python

## @file
# Unit tests for AutoGen.UniClassObject
#
# Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>
#
# SPDX-License-Identifier: BSD-2-Clause-Patent
#
##
# Import Modules
#
import os
import unittest
import codecs
import TestTools
from Common.Misc import PathClass
import AutoGen.UniClassObject as BtUni
from Common import EdkLogger
EdkLogger.InitializeForUnitTest()
class Tests(TestTools.BaseToolsTest):
SampleData = u'''
#langdef en-US "English"
#string STR_A #language en-US "STR_A for en-US"
'''
def EncodeToFile(self, encoding, string=None):
if string is None:
string = self.SampleData
if encoding is not None:
data = codecs.encode(string, encoding)
else:
data = string
path = 'input.uni'
self.WriteTmpFile(path, data)
return PathClass(self.GetTmpFilePath(path))
def ErrorFailure(self, error, encoding, shouldPass):
msg = error + ' should '
if shouldPass:
msg += 'not '
msg += 'be generated for '
msg += '%s data in a .uni file' % encoding
self.fail(msg)
def UnicodeErrorFailure(self, encoding, shouldPass):
self.ErrorFailure('UnicodeError', encoding, shouldPass)
def EdkErrorFailure(self, encoding, shouldPass):
self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)
def CheckFile(self, encoding, shouldPass, string=None):
path = self.EncodeToFile(encoding, string)
try:
BtUni.UniFileClassObject([path])
if shouldPass:
return
except UnicodeError:
if not shouldPass:
return
else:
self.UnicodeErrorFailure(encoding, shouldPass)
except EdkLogger.FatalError:
if not shouldPass:
return
else:
self.EdkErrorFailure(encoding, shouldPass)
except Exception:
pass
self.EdkErrorFailure(encoding, shouldPass)
def testUtf16InUniFile(self):
self.CheckFile('utf_16', shouldPass=True)
def testSupplementaryPlaneUnicodeCharInUtf16File(self):
#
# Supplementary Plane characters can exist in UTF-16 files,
# but they are not valid UCS-2 characters.
#
# This test makes sure that BaseTools rejects these characters
# if seen in a .uni file.
#
data = u'''
#langdef en-US "English"
#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
'''
self.CheckFile('utf_16', shouldPass=False, string=data)
def testSurrogatePairUnicodeCharInUtf16File(self):
#
# Surrogate Pair code points are used in UTF-16 files to
# encode the Supplementary Plane characters. But, a Surrogate
# Pair code point which is not followed by another Surrogate
# Pair code point might be interpreted as a single code point
# with the Surrogate Pair code point.
#
# This test makes sure that BaseTools rejects these characters
# if seen in a .uni file.
#
data = codecs.BOM_UTF16_LE + b'//\x01\xd8 '
self.CheckFile(encoding=None, shouldPass=False, string=data)
def testValidUtf8File(self):
self.CheckFile(encoding='utf_8', shouldPass=True)
def testValidUtf8FileWithBom(self):
#
# Same test as testValidUtf8File, but add the UTF-8 BOM
#
data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8')
self.CheckFile(encoding=None, shouldPass=True, string=data)
def test32bitUnicodeCharInUtf8File(self):
data = u'''
#langdef en-US "English"
#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
'''
self.CheckFile('utf_16', shouldPass=False, string=data)
def test32bitUnicodeCharInUtf8File(self):
data = u'''
#langdef en-US "English"
#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
'''
self.CheckFile('utf_8', shouldPass=False, string=data)
def test32bitUnicodeCharInUtf8Comment(self):
data = u'''
// Even in comments, we reject non-UCS-2 chars: \U00010300
#langdef en-US "English"
#string STR_A #language en-US "A"
'''
self.CheckFile('utf_8', shouldPass=False, string=data)
def testSurrogatePairUnicodeCharInUtf8File(self):
#
# Surrogate Pair code points are used in UTF-16 files to
# encode the Supplementary Plane characters. In UTF-8, it is
# trivial to encode these code points, but they are not valid
# code points for characters, since they are reserved for the
# UTF-16 Surrogate Pairs.
#
# This test makes sure that BaseTools rejects these characters
# if seen in a .uni file.
#
data = b'\xed\xa0\x81'
self.CheckFile(encoding=None, shouldPass=False, string=data)
def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):
#
# Same test as testSurrogatePairUnicodeCharInUtf8File, but add
# the UTF-8 BOM
#
data = codecs.BOM_UTF8 + b'\xed\xa0\x81'
self.CheckFile(encoding=None, shouldPass=False, string=data)
TheTestSuite = TestTools.MakeTheTestSuite(locals())
if __name__ == '__main__':
allTests = TheTestSuite()
unittest.TextTestRunner().run(allTests)