nodejs · exinfinitum · Oct 7, 2015 · bnoordhuis · Nov 5, 2015 · exinfinitum
diff --git a/src/node_buffer.cc b/src/node_buffer.cc
@@ -460,10 +460,11 @@ void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args) {
   // need to reorder on BE platforms.  See http://nodejs.org/api/buffer.html
   // regarding Node's "ucs2" encoding specification.
   const bool aligned = (reinterpret_cast<uintptr_t>(data) % sizeof(*buf) == 0);
-  if (IsLittleEndian() && aligned) {
-    buf = reinterpret_cast<const uint16_t*>(data);
-  } else {
+  if (IsLittleEndian() && !aligned) {
     // Make a copy to avoid unaligned accesses in v8::String::NewFromTwoByte().
+    // This applies ONLY to little endian platforms, as misalignment will be
+    // handled by a byte-swapping operation in StringBytes::Encode on
+    // big endian platforms.
     uint16_t* copy = new uint16_t[length];
     for (size_t i = 0, k = 0; i < length; i += 1, k += 2) {
       // Assumes that the input is little endian.
@@ -473,6 +474,8 @@ void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args) {
     }
     buf = copy;
     release = true;
+  } else {
+    buf = reinterpret_cast<const uint16_t*>(data);
   }
 
   args.GetReturnValue().Set(StringBytes::Encode(env->isolate(), buf, length));

diff --git a/src/string_bytes.cc b/src/string_bytes.cc
@@ -6,6 +6,7 @@
 
 #include <limits.h>
 #include <string.h>  // memcpy
+#include <vector>
 
 // When creating strings >= this length v8's gc spins up and consumes
 // most of the execution time. For these cases it's more performant to
@@ -406,9 +407,7 @@ size_t StringBytes::Write(Isolate* isolate,
           reinterpret_cast<uintptr_t>(buf) % sizeof(uint16_t);
       if (is_aligned) {
         uint16_t* const dst = reinterpret_cast<uint16_t*>(buf);
-        for (size_t i = 0; i < nchars; i++)
-          dst[i] = dst[i] << 8 | dst[i] >> 8;
-        break;
+        SwapBytes(dst, dst, nchars);
       }
 
       ASSERT_EQ(sizeof(uint16_t), 2);
@@ -857,7 +856,16 @@ Local<Value> StringBytes::Encode(Isolate* isolate,
                                  const uint16_t* buf,
                                  size_t buflen) {
   Local<String> val;
-
+  std::vector<uint16_t> dst;
+  if (IsBigEndian()) {
+    // Node's "ucs2" encoding expects LE character data inside a
+    // Buffer, so we need to reorder on BE platforms.  See
+    // http://nodejs.org/api/buffer.html regarding Node's "ucs2"
+    // encoding specification
+    dst.resize(buflen);
+    SwapBytes(&dst[0], buf, buflen);
+    buf = &dst[0];
+  }
   if (buflen < EXTERN_APEX) {
     val = String::NewFromTwoByte(isolate,
                                  buf,

diff --git a/src/util-inl.h b/src/util-inl.h
@@ -198,6 +198,20 @@ TypeName* Unwrap(v8::Local<v8::Object> object) {
   return static_cast<TypeName*>(pointer);
 }
 
+void SwapBytes(uint16_t* dst, const uint16_t* src, size_t buflen) {
+  for (size_t i = 0; i < buflen; i++) {
+    // __builtin_bswap16 generates more efficient code with
+    // g++ 4.8 on PowerPC and other big-endian archs
+#ifdef __GNUC__
+    dst[i] = __builtin_bswap16(src[i]);
+#else
+    dst[i] = (src[i] << 8) | (src[i] >> 8);
+#endif
+  }
+}
+
+
+
 }  // namespace node
 
 #endif  // SRC_UTIL_INL_H_
diff --git a/src/util.h b/src/util.h
@@ -176,6 +176,8 @@ inline void ClearWrap(v8::Local<v8::Object> object);
 template <typename TypeName>
 inline TypeName* Unwrap(v8::Local<v8::Object> object);
 
+inline void SwapBytes(uint16_t* dst, const uint16_t* src, size_t buflen);
+
 class Utf8Value {
   public:
     explicit Utf8Value(v8::Isolate* isolate, v8::Local<v8::Value> value);