diff options
author | Torne (Richard Coles) <torne@google.com> | 2014-11-27 15:41:26 +0000 |
---|---|---|
committer | Torne (Richard Coles) <torne@google.com> | 2014-11-27 15:41:26 +0000 |
commit | e318d1364cbed337f3c792dc0727677bf0886c77 (patch) | |
tree | 02011651eea5971e03c08055bf301d5307eb18af | |
parent | cf50e1e1bced13a28c8f78c0c2eed5f6ce5713f9 (diff) | |
parent | 6242e2fbb36f486f2c0addd1c3cef67fc4ed33fb (diff) | |
download | icu-e318d1364cbed337f3c792dc0727677bf0886c77.tar.gz |
Merge third_party/icu from https://chromium.googlesource.com/chromium/deps/icu52.git at 6242e2fbb36f486f2c0addd1c3cef67fc4ed33fb
This commit was generated by merge_from_chromium.py.
Change-Id: I714bb374c019b5d2ba7b7cfbbba844387f285078
-rw-r--r-- | README.chromium | 6 | ||||
-rw-r--r-- | icu.gyp | 38 | ||||
-rw-r--r-- | patches/regex.patch | 86 | ||||
-rw-r--r-- | source/i18n/regexcmp.cpp | 16 | ||||
-rw-r--r-- | source/i18n/regexcmp.h | 4 | ||||
-rw-r--r-- | source/test/testdata/regextst.txt | 18 |
6 files changed, 130 insertions, 38 deletions
diff --git a/README.chromium b/README.chromium index 66dd3d8..d271b5c 100644 --- a/README.chromium +++ b/README.chromium @@ -238,3 +238,9 @@ This directory contains the source code of ICU 52.1 for C/C++ 11. Cherry-pick an upstream patch to fix a bug in bidi. - patches/bidi.patch - upstream bug : http://bugs.icu-project.org/trac/ticket/11054 + +12. Apply the following patch for regex + - patches/regex.patch + - upstream bugs : http://bugs.icu-project.org/trac/ticket/11369 + http://bugs.icu-project.org/trac/ticket/11370 + @@ -247,17 +247,6 @@ }, }, }], - ['OS == "android" and use_system_stlport == 1', { - 'target_conditions': [ - ['_toolset == "target"', { - # ICU requires RTTI, which is not present in the system's - # stlport, so we have to include gabi++. - 'include_dirs': [ - '<(android_src)/abi/cpp/include', - ], - }], - ], - }], ], # conditions }, { @@ -354,17 +343,6 @@ }, }, }], - ['OS == "android" and use_system_stlport == 1', { - 'target_conditions': [ - ['_toolset == "target"', { - # ICU requires RTTI, which is not present in the system's - # stlport, so we have to include gabi++. - 'include_dirs': [ - '<(android_src)/abi/cpp/include', - ], - }], - ], - }], ], # conditions }, ], # targets @@ -375,20 +353,6 @@ 'target_name': 'system_icu', 'type': 'none', 'conditions': [ - ['OS=="android"', { - 'direct_dependent_settings': { - 'include_dirs': [ - '<(android_src)/external/icu/icu4c/source/common', - '<(android_src)/external/icu/icu4c/source/i18n', - ], - }, - 'link_settings': { - 'libraries': [ - '-licui18n', - '-licuuc', - ], - }, - }], ['OS=="qnx"', { 'link_settings': { 'libraries': [ @@ -397,7 +361,7 @@ ], }, }], - ['OS!="android" and OS!="qnx"', { + ['OS!="qnx"', { 'link_settings': { 'ldflags': [ '<!@(icu-config --ldflags)', diff --git a/patches/regex.patch b/patches/regex.patch new file mode 100644 index 0000000..4f74ee1 --- /dev/null +++ b/patches/regex.patch @@ -0,0 +1,86 @@ +Index: source/i18n/regexcmp.h +=================================================================== +--- source/i18n/regexcmp.h (revision 292476) ++++ source/i18n/regexcmp.h (working copy) +@@ -182,7 +182,9 @@ + int32_t fMatchOpenParen; // The position in the compiled pattern + // of the slot reserved for a state save + // at the start of the most recently processed +- // parenthesized block. ++ // parenthesized block. Updated when processing ++ // a close to the location for the corresponding open. ++ + int32_t fMatchCloseParen; // The position in the pattern of the first + // location after the most recently processed + // parenthesized block. +Index: source/i18n/regexcmp.cpp +=================================================================== +--- source/i18n/regexcmp.cpp (revision 292476) ++++ source/i18n/regexcmp.cpp (working copy) +@@ -2133,6 +2133,10 @@ + int32_t patEnd = fRXPat->fCompiledPat->size() - 1; + int32_t minML = minMatchLength(fMatchOpenParen, patEnd); + int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd); ++ if (URX_TYPE(maxML) != 0) { ++ error(U_REGEX_LOOK_BEHIND_LIMIT); ++ break; ++ } + if (maxML == INT32_MAX) { + error(U_REGEX_LOOK_BEHIND_LIMIT); + break; +@@ -2166,6 +2170,10 @@ + int32_t patEnd = fRXPat->fCompiledPat->size() - 1; + int32_t minML = minMatchLength(fMatchOpenParen, patEnd); + int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd); ++ if (URX_TYPE(maxML) != 0) { ++ error(U_REGEX_LOOK_BEHIND_LIMIT); ++ break; ++ } + if (maxML == INT32_MAX) { + error(U_REGEX_LOOK_BEHIND_LIMIT); + break; +@@ -2329,7 +2337,15 @@ + int32_t topOfBlock = blockTopLoc(FALSE); + if (fIntervalUpper == 0) { + // Pathological case. Attempt no matches, as if the block doesn't exist. ++ // Discard the generated code for the block. ++ // If the block included parens, discard the info pertaining to them as well. + fRXPat->fCompiledPat->setSize(topOfBlock); ++ if (fMatchOpenParen >= topOfBlock) { ++ fMatchOpenParen = -1; ++ } ++ if (fMatchCloseParen >= topOfBlock) { ++ fMatchCloseParen = -1; ++ } + return TRUE; + } + +Index: source/test/testdata/regextst.txt +=================================================================== +--- source/test/testdata/regextst.txt (revision 292476) ++++ source/test/testdata/regextst.txt (working copy) +@@ -1173,6 +1173,24 @@ + "(?<=(?:){11})bc" "<0>bc</0>" # Empty (?:) expression. + + ++# Bug 11369 ++# Incorrect optimization of patterns with a zero length quantifier {0} ++ ++"(.|b)(|b){0}\$(?#xxx){3}(?>\D*)" "AAAAABBBBBCCCCCDDDDEEEEE" ++"(|b)ab(c)" "<0><1></1>ab<2>c</2></0>" ++"(|b){0}a{3}(D*)" "<0>aaa<2></2></0>" ++"(|b){0,1}a{3}(D*)" "<0><1></1>aaa<2></2></0>" ++"((|b){0})a{3}(D*)" "<0><1></1>aaa<3></3></0>" ++ ++# Bug 11370 ++# Max match length computation of look-behind expression gives result that is too big to fit in the ++# in the 24 bit operand portion of the compiled code. Expressions should fail to compile ++# (Look-behind match length must be bounded. This case is treated as unbounded, an error.) ++ ++"(?<!(0123456789a){10000000})x" E "no match" ++"(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match" ++ ++ + # Random debugging, Temporary + # + #"^(?:a?b?)*$" "a--" diff --git a/source/i18n/regexcmp.cpp b/source/i18n/regexcmp.cpp index 0ec6154..8f1504f 100644 --- a/source/i18n/regexcmp.cpp +++ b/source/i18n/regexcmp.cpp @@ -2133,6 +2133,10 @@ void RegexCompile::handleCloseParen() { int32_t patEnd = fRXPat->fCompiledPat->size() - 1; int32_t minML = minMatchLength(fMatchOpenParen, patEnd); int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd); + if (URX_TYPE(maxML) != 0) { + error(U_REGEX_LOOK_BEHIND_LIMIT); + break; + } if (maxML == INT32_MAX) { error(U_REGEX_LOOK_BEHIND_LIMIT); break; @@ -2166,6 +2170,10 @@ void RegexCompile::handleCloseParen() { int32_t patEnd = fRXPat->fCompiledPat->size() - 1; int32_t minML = minMatchLength(fMatchOpenParen, patEnd); int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd); + if (URX_TYPE(maxML) != 0) { + error(U_REGEX_LOOK_BEHIND_LIMIT); + break; + } if (maxML == INT32_MAX) { error(U_REGEX_LOOK_BEHIND_LIMIT); break; @@ -2329,7 +2337,15 @@ UBool RegexCompile::compileInlineInterval() { int32_t topOfBlock = blockTopLoc(FALSE); if (fIntervalUpper == 0) { // Pathological case. Attempt no matches, as if the block doesn't exist. + // Discard the generated code for the block. + // If the block included parens, discard the info pertaining to them as well. fRXPat->fCompiledPat->setSize(topOfBlock); + if (fMatchOpenParen >= topOfBlock) { + fMatchOpenParen = -1; + } + if (fMatchCloseParen >= topOfBlock) { + fMatchCloseParen = -1; + } return TRUE; } diff --git a/source/i18n/regexcmp.h b/source/i18n/regexcmp.h index 0041beb..5d526be 100644 --- a/source/i18n/regexcmp.h +++ b/source/i18n/regexcmp.h @@ -182,7 +182,9 @@ private: int32_t fMatchOpenParen; // The position in the compiled pattern // of the slot reserved for a state save // at the start of the most recently processed - // parenthesized block. + // parenthesized block. Updated when processing + // a close to the location for the corresponding open. + int32_t fMatchCloseParen; // The position in the pattern of the first // location after the most recently processed // parenthesized block. diff --git a/source/test/testdata/regextst.txt b/source/test/testdata/regextst.txt index 5716ab5..f0b00ab 100644 --- a/source/test/testdata/regextst.txt +++ b/source/test/testdata/regextst.txt @@ -1173,6 +1173,24 @@ "(?<=(?:){11})bc" "<0>bc</0>" # Empty (?:) expression. +# Bug 11369 +# Incorrect optimization of patterns with a zero length quantifier {0} + +"(.|b)(|b){0}\$(?#xxx){3}(?>\D*)" "AAAAABBBBBCCCCCDDDDEEEEE" +"(|b)ab(c)" "<0><1></1>ab<2>c</2></0>" +"(|b){0}a{3}(D*)" "<0>aaa<2></2></0>" +"(|b){0,1}a{3}(D*)" "<0><1></1>aaa<2></2></0>" +"((|b){0})a{3}(D*)" "<0><1></1>aaa<3></3></0>" + +# Bug 11370 +# Max match length computation of look-behind expression gives result that is too big to fit in the +# in the 24 bit operand portion of the compiled code. Expressions should fail to compile +# (Look-behind match length must be bounded. This case is treated as unbounded, an error.) + +"(?<!(0123456789a){10000000})x" E "no match" +"(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match" + + # Random debugging, Temporary # #"^(?:a?b?)*$" "a--" |