Fix Segmenter tests: Check "input" in segment-*-iterable.js (#2714)

* Check "input" in segment-*-iterable.js

Remove dup test in segment-*.js

* Fix segment-tostring.js
This commit is contained in:
Frank Yung-Fong Tang 2020-07-29 12:00:35 -07:00 committed by GitHub
parent 80b54781b0
commit 0f5a274aad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 8 additions and 103 deletions

View File

@ -35,6 +35,8 @@ for (const text of [
assert.sameValue(false, v.hasOwnProperty("isWordLike"));
assert.sameValue("string", typeof v.segment);
assert(v.segment.length > 0);
assert.sameValue("string", typeof v.input);
assert.sameValue(text, v.input);
segments.push(v.segment);
}
assert.sameValue(text, segments.join(''));

View File

@ -1,34 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-Intl.Segmenter.prototype.segment
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
info: |
Intl.Segmenter.prototype.segment( string )
features: [Intl.Segmenter]
---*/
const seg = new Intl.Segmenter([], {granularity: "grapheme"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
assert.sameValue(undefined, iter.breakType);
assert.sameValue(0, iter.index);
}

View File

@ -35,6 +35,8 @@ for (const text of [
assert.sameValue(false, v.hasOwnProperty("isWordLike"));
assert.sameValue("string", typeof v.segment);
assert(v.segment.length > 0);
assert.sameValue("string", typeof v.input);
assert.sameValue(text, v.input);
segments.push(v.segment);
}
assert.sameValue(text, segments.join(''));

View File

@ -1,34 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-Intl.Segmenter.prototype.segment
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
info: |
Intl.Segmenter.prototype.segment( string )
features: [Intl.Segmenter]
---*/
const seg = new Intl.Segmenter([], {granularity: "sentence"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
assert.sameValue(undefined, iter.breakType);
assert.sameValue(0, iter.index);
}

View File

@ -26,7 +26,8 @@ const tests = [
const segmenter = new Intl.Segmenter("en", { "granularity": "word" });
for (const [args, expected] of tests) {
const segments = segmenter.segment(...args);
assert.sameValue(segments.string, expected, `Expected segment "${expected}", found "${segments.segment}" for arguments ${args}`);
const actual = [...segments][0].segment;
assert.sameValue(actual, expected, `Expected segment "${expected}", found "${actual}" for arguments ${args}`);
}
const symbol = Symbol();

View File

@ -35,6 +35,8 @@ for (const text of [
assert.sameValue(true, v.hasOwnProperty("isWordLike"));
assert.sameValue("string", typeof v.segment);
assert(v.segment.length > 0);
assert.sameValue("string", typeof v.input);
assert.sameValue(text, v.input);
segments.push(v.segment);
}
assert.sameValue(text, segments.join(''));

View File

@ -1,34 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-Intl.Segmenter.prototype.segment
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
info: |
Intl.Segmenter.prototype.segment( string )
features: [Intl.Segmenter]
---*/
const seg = new Intl.Segmenter([], {granularity: "word"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
assert.sameValue(undefined, iter.breakType);
assert.sameValue(0, iter.index);
}