From d4dd796e74a50f2ca9b2321237eee1423019e173 Mon Sep 17 00:00:00 2001
From: Christophe Fergeau <cfergeau@redhat.com>
Date: Tue, 26 Sep 2017 11:02:58 +0200
Subject: [PATCH 1/2] goo: Add GooString::has{Big,Little}EndianBOM

---
 goo/GooString.cc | 10 ++++++++++
 goo/GooString.h  |  5 ++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/goo/GooString.cc b/goo/GooString.cc
index 10976e87..0838d058 100644
--- a/goo/GooString.cc
+++ b/goo/GooString.cc
@@ -917,6 +917,16 @@ GBool GooString::hasUnicodeMarker(void) const
   return length > 1 && (s[0] & 0xff) == 0xfe && (s[1] & 0xff) == 0xff;
 }
 
+GBool GooString::hasBigEndianBOM(void) const
+{
+  return length > 1 && (s[0] & 0xff) == 0xfe && (s[1] & 0xff) == 0xff;
+}
+
+GBool GooString::hasLittleEndianBOM(void) const
+{
+  return length > 1 && (s[0] & 0xff) == 0xff && (s[1] & 0xff) == 0xfe;
+}
+
 void GooString::prependUnicodeMarker()
 {
     insert(0, (char)0xff);
diff --git a/goo/GooString.h b/goo/GooString.h
index a5418c3d..623655fc 100644
--- a/goo/GooString.h
+++ b/goo/GooString.h
@@ -161,8 +161,11 @@ public:
   GBool endsWith(const char *suffix) const;
 
   GBool hasUnicodeMarker(void) const;
+  GBool hasBigEndianBOM(void) const;
+  GBool hasLittleEndianBOM(void) const;
   void prependUnicodeMarker();
-  GBool hasJustUnicodeMarker(void) const { return length == 2 && hasUnicodeMarker(); }
+  /* FIXME: Move this (hasBigEndianBOM() || hasLittleEndianBOM()) check to hasUnicodeMarker? */
+  GBool hasJustUnicodeMarker(void) const { return length == 2 && (hasBigEndianBOM() || hasLittleEndianBOM()); }
 
   // Sanitizes the string so that it does
   // not contain any ( ) < > [ ] { } / %
-- 
2.13.5


From 993ab2aa1f4cda5b6ab74f7434146879866e76cd Mon Sep 17 00:00:00 2001
From: Christophe Fergeau <cfergeau@redhat.com>
Date: Tue, 26 Sep 2017 11:03:49 +0200
Subject: [PATCH 2/2] document: Handle UTF16-LE annotations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I can produce such annotations when adding annotations to a PDF
attachement from the standard mail app on my iPhone (iOS 11).
They currently all show as "ÿþÚ" rather than the actual string content.
UTF16-BE vs UTF16-LE is detected by inferring the endianness from the
first two bytes of the string (0xFF 0xFE and 0xFE 0xFF aka Byte Order
Marker).
---
 glib/poppler-document.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc
index 41b6a04b..e8efd3b2 100644
--- a/glib/poppler-document.cc
+++ b/glib/poppler-document.cc
@@ -716,10 +716,14 @@ char *_poppler_goo_string_to_utf8(GooString *s)
 
   char *result;
 
-  if (s->hasUnicodeMarker()) {
+  if (s->hasBigEndianBOM()) {
     result = g_convert (s->getCString () + 2,
 			s->getLength () - 2,
 			"UTF-8", "UTF-16BE", NULL, NULL, NULL);
+  } else if (s->hasLittleEndianBOM()) {
+    result = g_convert (s->getCString () + 2,
+			s->getLength () - 2,
+			"UTF-8", "UTF-16LE", NULL, NULL, NULL);
   } else {
     int len;
     gunichar *ucs4_temp;
-- 
2.13.5

