|
 |
f0633d |
From 2421317990d00e14325298f34db3c60735527697 Mon Sep 17 00:00:00 2001
|
|
 |
f0633d |
From: Rohit Deshmukh <rohit.deshmukh@synerzip.com>
|
|
 |
f0633d |
Date: Fri, 6 Dec 2013 15:42:53 +0530
|
|
 |
f0633d |
Subject: [PATCH 8/9] fdo#72219: Fix for corruption of symbols in docx
|
|
 |
f0633d |
|
|
 |
f0633d |
Issue:
|
|
 |
f0633d |
OUString uses UTF-16, so for a Unicode surrogate character there are 2
|
|
 |
f0633d |
values stored, not just 1.
|
|
 |
f0633d |
So we are getting assert failure in "rtl_uString_iterateCodePoints" method.
|
|
 |
f0633d |
|
|
 |
f0633d |
erAck: Underlying cause was that the dictionary breakiterator misused
|
|
 |
f0633d |
UTF-16 positions as Unicode code point positions.
|
|
 |
f0633d |
|
|
 |
f0633d |
Change-Id: I923485f56c2d879b63687adaea2b489a3479991c
|
|
 |
f0633d |
Reviewed-on: https://gerrit.libreoffice.org/6955
|
|
 |
f0633d |
Reviewed-by: Eike Rathke <erack@redhat.com>
|
|
 |
f0633d |
Tested-by: Eike Rathke <erack@redhat.com>
|
|
 |
f0633d |
(cherry picked from commit d8fd15875901d584a4bbcc07c927fa20332e4841)
|
|
 |
f0633d |
Reviewed-on: https://gerrit.libreoffice.org/7322
|
|
 |
f0633d |
(cherry picked from commit 994d0c9e7aa8d1a7602e61b770991da980c1cde5)
|
|
 |
f0633d |
Reviewed-on: https://gerrit.libreoffice.org/7324
|
|
 |
f0633d |
---
|
|
 |
f0633d |
i18npool/qa/cppunit/test_breakiterator.cxx | 19 ++++++++++++++++++-
|
|
 |
f0633d |
i18npool/source/breakiterator/xdictionary.cxx | 6 ++++--
|
|
 |
f0633d |
2 files changed, 22 insertions(+), 3 deletions(-)
|
|
 |
f0633d |
|
|
 |
f0633d |
diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx
|
|
 |
f0633d |
index 41e4077..b4174ba 100644
|
|
 |
f0633d |
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
|
|
 |
f0633d |
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
|
|
 |
f0633d |
@@ -44,7 +44,7 @@ public:
|
|
 |
f0633d |
#endif
|
|
 |
f0633d |
void testKhmer();
|
|
 |
f0633d |
void testJapanese();
|
|
 |
f0633d |
-
|
|
 |
f0633d |
+ void testChinese();
|
|
 |
f0633d |
CPPUNIT_TEST_SUITE(TestBreakIterator);
|
|
 |
f0633d |
CPPUNIT_TEST(testLineBreaking);
|
|
 |
f0633d |
CPPUNIT_TEST(testGraphemeIteration);
|
|
 |
f0633d |
@@ -60,6 +60,7 @@ public:
|
|
 |
f0633d |
CPPUNIT_TEST(testKhmer);
|
|
 |
f0633d |
#endif
|
|
 |
f0633d |
CPPUNIT_TEST(testJapanese);
|
|
 |
f0633d |
+ CPPUNIT_TEST(testChinese);
|
|
 |
f0633d |
CPPUNIT_TEST_SUITE_END();
|
|
 |
f0633d |
private:
|
|
 |
f0633d |
uno::Reference<i18n::XBreakIterator> m_xBreak;
|
|
 |
f0633d |
@@ -909,6 +910,22 @@ void TestBreakIterator::testJapanese()
|
|
 |
f0633d |
}
|
|
 |
f0633d |
}
|
|
 |
f0633d |
|
|
 |
f0633d |
+void TestBreakIterator::testChinese()
|
|
 |
f0633d |
+{
|
|
 |
f0633d |
+ lang::Locale aLocale;
|
|
 |
f0633d |
+ aLocale.Language = "zh";
|
|
 |
f0633d |
+ aLocale.Country = "CN";
|
|
 |
f0633d |
+ i18n::Boundary aBounds;
|
|
 |
f0633d |
+
|
|
 |
f0633d |
+ {
|
|
 |
f0633d |
+ const sal_Unicode CHINESE[] = { 0x6A35, 0x6A30, 0x69FE, 0x8919, 0xD867, 0xDEDB };
|
|
 |
f0633d |
+
|
|
 |
f0633d |
+ OUString aTest(CHINESE, SAL_N_ELEMENTS(CHINESE));
|
|
 |
f0633d |
+ aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale,
|
|
 |
f0633d |
+ i18n::WordType::DICTIONARY_WORD, true);
|
|
 |
f0633d |
+ CPPUNIT_ASSERT(aBounds.startPos == 4 && aBounds.endPos == 6);
|
|
 |
f0633d |
+ }
|
|
 |
f0633d |
+}
|
|
 |
f0633d |
void TestBreakIterator::setUp()
|
|
 |
f0633d |
{
|
|
 |
f0633d |
BootstrapFixtureBase::setUp();
|
|
 |
f0633d |
diff --git a/i18npool/source/breakiterator/xdictionary.cxx b/i18npool/source/breakiterator/xdictionary.cxx
|
|
 |
f0633d |
index 72da09f..3b43fa3 100644
|
|
 |
f0633d |
--- a/i18npool/source/breakiterator/xdictionary.cxx
|
|
 |
f0633d |
+++ b/i18npool/source/breakiterator/xdictionary.cxx
|
|
 |
f0633d |
@@ -383,9 +383,11 @@ Boundary xdictionary::getWordBoundary(const OUString& rText, sal_Int32 anyPos, s
|
|
 |
f0633d |
if (u_isWhitespace(ch))
|
|
 |
f0633d |
i--;
|
|
 |
f0633d |
}
|
|
 |
f0633d |
+
|
|
 |
f0633d |
boundary.endPos = boundary.startPos;
|
|
 |
f0633d |
- rText.iterateCodePoints(&boundary.endPos, aCache.wordboundary[i]);
|
|
 |
f0633d |
- rText.iterateCodePoints(&boundary.startPos, aCache.wordboundary[i-1]);
|
|
 |
f0633d |
+ boundary.endPos += aCache.wordboundary[i];
|
|
 |
f0633d |
+ boundary.startPos += aCache.wordboundary[i-1];
|
|
 |
f0633d |
+
|
|
 |
f0633d |
} else {
|
|
 |
f0633d |
boundary.startPos = anyPos;
|
|
 |
f0633d |
if (anyPos < len) rText.iterateCodePoints(&anyPos, 1);
|
|
 |
f0633d |
--
|
|
 |
f0633d |
1.8.4.2
|
|
 |
f0633d |
|