Commit 23eb895b authored by Wei Chen's avatar Wei Chen
Browse files

support multi-channel gray slide



add two functions:

```
void openslide_read_region_gray8(openslide_t *osr, uint8_t *dest,
                                 int64_t x, int64_t y, int32_t level,
                                 int64_t w, int64_t h);

void openslide_read_region_gray16(openslide_t *osr, uint8_t *dest,
                                  int64_t x, int64_t y, int32_t level,
                                  int64_t w, int64_t h);
```

They accept similar set of parameters as openslide_read_region(). The
only difference is the `level` parameter. Since `level` is the index
into `osr->levels` array, and `osr->levels` now stores `struct
_openslide_level` for all levels of all channels, caller needs calculate
`level` as:

    num_of_channels * num_of_level_in_one_channel + level_in_channel

Signed-off-by: default avatarWei Chen <chenw1@uthscsa.edu>
parent 5e6ceeb9
Loading
Loading
Loading
Loading
+11 −30
Original line number Diff line number Diff line
@@ -78,7 +78,7 @@ static guint get_bits_per_pixel(const PKPixelFormatGUID *pixel_format) {
  return pixel_info.cbitUnit;
}

bool _openslide_jxr_decode_buf(const void *src, int64_t src_len, uint32_t *dst,
bool _openslide_jxr_decode_buf(const void *src, int64_t src_len, uint8_t *dst,
                               int64_t dst_len, GError **err) {
  struct WMPStream *pStream = NULL;
  PKImageDecode *pDecoder = NULL;
@@ -86,7 +86,6 @@ bool _openslide_jxr_decode_buf(const void *src, int64_t src_len, uint32_t *dst,
  ERR jerr;
  PKPixelFormatGUID fmt;
  PKRect rect = {0, 0, 0, 0};
  g_autofree uint8_t *unjxr = NULL;

  CreateWS_Memory(&pStream, (void *) src, src_len);
  // IID_PKImageWmpDecode is the only supported decoder PKIID
@@ -102,49 +101,33 @@ bool _openslide_jxr_decode_buf(const void *src, int64_t src_len, uint32_t *dst,
  }

  pDecoder->GetSize(pDecoder, &rect.Width, &rect.Height);
  int64_t out_len = rect.Width * rect.Height * 4;
  // JXR tile size may be incorrect in czi directory entries
  g_assert(out_len <= dst_len);

  pDecoder->GetPixelFormat(pDecoder, &fmt);
  PKPixelFormatGUID fmt_out;
  void (*convert)(uint8_t *, size_t, uint32_t *);
  if (IsEqualGUID(&fmt, &GUID_PKPixelFormat24bppBGR)) {
    fmt_out = GUID_PKPixelFormat24bppBGR;
    convert = _openslide_bgr24_to_argb32;
  } else if (IsEqualGUID(&fmt, &GUID_PKPixelFormat48bppRGB)) {
    /* Although the format called 48bppRGB in JXR, its color order is BGR for
     * czi. Use 48bppRGB as it is and prefer openslide function for converting
     * to argb32.
     */
    fmt_out = GUID_PKPixelFormat48bppRGB;
    convert = _openslide_bgr48_to_argb32;
  } else if (IsEqualGUID(&fmt, &GUID_PKPixelFormat8bppGray)) {
    g_set_error(err, OPENSLIDE_ERROR, OPENSLIDE_ERROR_FAILED,
                "GUID_PKPixelFormat8bppGray is not supported");
    goto Cleanup;
    fmt_out = GUID_PKPixelFormat8bppGray;
  } else if (IsEqualGUID(&fmt, &GUID_PKPixelFormat16bppGray)) {
    g_set_error(err, OPENSLIDE_ERROR, OPENSLIDE_ERROR_FAILED,
                "GUID_PKPixelFormat16bppGray is not supported");
    goto Cleanup;
    fmt_out = GUID_PKPixelFormat16bppGray;
  } else {
    g_set_error(err, OPENSLIDE_ERROR, OPENSLIDE_ERROR_FAILED,
                "Currently only support GUID_PKPixelFormat24bppBGR and "
                "GUID_PKPixelFormat48bppRGB");
                "Currently only support "
                "GUID_PKPixelFormat24bppBGR, GUID_PKPixelFormat48bppRGB, "
                "GUID_PKPixelFormat8bppGray and GUID_PKPixelFormat16bppGray");
    goto Cleanup;
  }

  uint32_t stride =
      rect.Width *
  uint32_t stride = rect.Width *
      ((MAX(get_bits_per_pixel(&fmt), get_bits_per_pixel(&fmt_out)) + 7) / 8);
  int64_t unjxr_len = stride * rect.Height;
  unjxr = g_try_malloc(unjxr_len);
  if (!unjxr) {
    g_set_error(err, OPENSLIDE_ERROR, OPENSLIDE_ERROR_FAILED,
                "Couldn't allocate %" PRId64 " bytes for decoding JXR",
                unjxr_len);
    return false;
  }
  int64_t unjxr_len = rect.Height * stride;
  // JXR tile size may be incorrect in czi directory entries
  g_assert(unjxr_len <= dst_len);

  // Create color converter
  jerr = PKCodecFactory_CreateFormatConverter(&pConverter);
@@ -157,13 +140,11 @@ bool _openslide_jxr_decode_buf(const void *src, int64_t src_len, uint32_t *dst,
    goto Cleanup;
  }

  jerr = pConverter->Copy(pConverter, &rect, unjxr, stride);
  jerr = pConverter->Copy(pConverter, &rect, dst, stride);
  if (jerr < 0) {
    goto Cleanup;
  }

  convert(unjxr, unjxr_len, dst);

Cleanup:
  print_err(jerr, err);
  CloseWS_Memory(&pStream);
+1 −1
Original line number Diff line number Diff line
@@ -25,7 +25,7 @@
#include <stdint.h>


bool _openslide_jxr_decode_buf(const void *src, int64_t src_len, uint32_t *dst,
bool _openslide_jxr_decode_buf(const void *src, int64_t src_len, uint8_t *dst,
                               int64_t dst_len, GError **err);

bool _openslide_jxr_dim(const void *data, size_t data_len, uint32_t *width,
+47 −0
Original line number Diff line number Diff line
@@ -80,4 +80,51 @@ void _openslide_restore_czi_zstd1_avx2(uint8_t *src, size_t src_len,
  }
}

void _openslide_gray16_to_gray8_avx2(uint8_t *src, size_t src_len,
                                     int pixel_real_bits, uint8_t *dst) {
  /* sixteen 16-bits pixels a time */
  int nshift = pixel_real_bits - 8;
  const int mm_step = 32;
  /* Decrease mm_len by 1 so that the last write is still 16 bytes inside
   * dst buffer.
   */
  size_t mm_len = src_len / mm_step - 1;
  __m256i gray8, tmp1, tmp2;
  __m256i hi8 = _mm256_setr_epi8(
      1, 3, 5, 7, 9, 11, 13, 15, -1, -1, -1, -1, -1, -1, -1, -1,
      1, 3, 5, 7, 9, 11, 13, 15, -1, -1, -1, -1, -1, -1, -1, -1);
  __m256i lo8 = _mm256_setr_epi8(
      0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1,
      0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1);
  __m256i allzero = _mm256_set_epi64x(0, 0, 0, 0);
  for (size_t n = 0; n < mm_len; n++) {
    tmp1 = _mm256_lddqu_si256((__m256i const *)src); // gray16
    tmp2 = _mm256_srli_epi16(tmp1, nshift);          // right shift
    gray8 = _mm256_shuffle_epi8(tmp2, lo8);          // bits 0-7 of gray16

    /* check after right shift, whether the high 8 bits are non-zero. Sometimes
     * 14 bits zeiss gray uses more than 14 bits.
     */
    tmp1 = _mm256_shuffle_epi8(tmp2, hi8);           // bits 8-15 of gray16
    /* 0xFF if high 8 bits is non-zero, 0 otherwise. The sign bit of high 8
     * bits is always zero since it has been shift right, therefor it is safe to
     * compare signed with 0.
     */
    tmp2 = _mm256_cmpgt_epi8(tmp1, allzero);
    tmp1 = _mm256_or_si256(tmp2, gray8);
    tmp2 = _mm256_permute4x64_epi64(tmp1, 0x08);
    _mm256_storeu_si256((__m256i *)dst, tmp2);

    src += mm_step;
    dst += 16;
  }

  size_t i = mm_len * mm_step;
  while (i < src_len) {
    *dst++ = gray16togray8(src, nshift);
    i += 2;
    src += 2;
  }
}

#endif
+44 −0
Original line number Diff line number Diff line
@@ -76,4 +76,48 @@ void _openslide_restore_czi_zstd1_sse3(uint8_t *src, size_t src_len,
  }
}

void _openslide_gray16_to_gray8_sse2(uint8_t *src, size_t src_len,
                                     int pixel_real_bits, uint8_t *dst) {
  /* eight 16-bits pixels a time */
  int nshift = pixel_real_bits - 8;
  const int mm_step = 16;
  /* Decrease mm_len by 1 so that the last write is still 16 bytes inside
   * dst buffer.
   */
  size_t mm_len = src_len / mm_step - 1;
  __m128i gray8, gray16, tmp1, tmp2;
  __m128i hi8 =
      _mm_setr_epi8(1, 3, 5, 7, 9, 11, 13, 15, -1, -1, -1, -1, -1, -1, -1, -1);
  __m128i lo8 =
      _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1);
  __m128i allzero = _mm_set_epi64x(0, 0);

  for (size_t n = 0; n < mm_len; n++) {
    gray16 = _mm_load_si128((__m128i const *)src);
    tmp2 = _mm_srli_epi16(gray16, nshift);
    gray8 = _mm_shuffle_epi8(tmp2, lo8);
    /* check after right shift, whether the high 8 bits are non-zero. Sometimes
     * 14 bits zeiss gray uses more than 14 bits.
     */
    tmp1 = _mm_shuffle_epi8(tmp2, hi8);
    /* 0xFF if high 8 bits is non-zero, 0 otherwise. The sign bit of high 8
     * bits is always zero since it has been shift right, therefor it is safe to
     * compare signed with 0.
     */
    tmp2 = _mm_cmpgt_epi8(tmp1, allzero);
    tmp1 = _mm_or_si128(tmp2, gray8);
    _mm_storeu_si128((__m128i *)dst, tmp1);

    src += mm_step;
    dst += 8;
  }

  size_t i = mm_len * mm_step;
  while (i < src_len) {
    *dst++ = gray16togray8(src, nshift);
    i += 2;
    src += 2;
  }
}

#endif
+90 −0
Original line number Diff line number Diff line
#include <config.h>
#include <glib.h>
#include "openslide-image.h"

static void bgr24_to_argb32_dispatch(uint8_t *src, size_t src_len,
@@ -14,6 +15,11 @@ static void restore_czi_zstd1_dispatch(uint8_t *src, size_t src_len,
static void restore_czi_zstd1_generic(uint8_t *src, size_t src_len,
                                      uint8_t *dst);

static void gray16_to_gray8_dispatch(uint8_t *src, size_t src_len,
                                     int pixel_real_bits, uint8_t *dst);
static void gray16_to_gray8_generic(uint8_t *src, size_t src_len,
                                    int pixel_real_bits, uint8_t *dst);

#ifdef USE_NEON
static void bgr24_to_argb32_neon(uint8_t *src, size_t src_len, uint32_t *dst);
static void restore_czi_zstd1_neon(uint8_t *src, size_t src_len, uint8_t *dst);
@@ -26,6 +32,9 @@ _openslide_bgr_convert_t _openslide_bgr48_to_argb32 = &bgr48_to_argb32_dispatch;
_openslide_restore_czi_zstd1_t _openslide_restore_czi_zstd1 =
    &restore_czi_zstd1_dispatch;

_openslide_gray16_to_gray8_t _openslide_gray16_to_gray8 =
    &gray16_to_gray8_dispatch;

static void bgr24_to_argb32_generic(uint8_t *src, size_t src_len,
                                    uint32_t *dst) {
  // one 24-bit pixel at a time
@@ -116,6 +125,87 @@ static void restore_czi_zstd1_dispatch(uint8_t *src, size_t src_len,
  return restore_czi_zstd1_generic(src, src_len, dst);
}

uint8_t gray16togray8(uint8_t *p, int ns) {
  uint16_t v = *((uint16_t *)p) >> ns;

  /* 14 bits gray image in zeiss Axioscan7 sometimes uses more than 14 bits,
   * these pixels appear black if treated as 14 bits */
  // sadly, conditional makes convert at least 15% slower
  return (v > 255) ? 255 : (uint8_t)v;
}

static void gray16_to_gray8_generic(uint8_t *src, size_t src_len,
                                    int pixel_real_bits, uint8_t *dst) {
  int nshift = pixel_real_bits - 8;
  size_t i = 0;
  while (i < src_len) {
    *dst++ = gray16togray8(src, nshift);
    i += 2;
    src += 2;
  }
}

/* padding rows in image so that they align to 4 byte boundary */
void _openslide_add_row_padding(uint8_t *src, size_t src_len, uint8_t *dst,
                                size_t dst_len, int pixel_bytes, int32_t w,
                                int32_t h) {
  int32_t stride = CAIRO_STRIDE_FOR_WIDTH_BPP(w, pixel_bytes * 8);
  int32_t w_bytes = w * pixel_bytes;

  g_assert((size_t) h * w_bytes == src_len);
  g_assert((size_t) h * stride == dst_len);

  for (int32_t row = 0; row < h; row++) {
    memcpy(dst, src, w_bytes);
    src += w_bytes;
    dst += stride;
  }
}

/* remove 4 byte alignment padding from image rows */
void _openslide_del_row_padding(uint8_t *src, size_t src_len, uint8_t *dst,
                                size_t dst_len, int pixel_bytes, int32_t w,
                                int32_t h) {
  int32_t stride = CAIRO_STRIDE_FOR_WIDTH_BPP(w, pixel_bytes * 8);
  int32_t w_bytes = w * pixel_bytes;

  g_assert((size_t) h * stride == src_len);
  g_assert((size_t) h * w_bytes == dst_len);

  for (int32_t row = 0; row < h; row++) {
    memcpy(dst, src, w_bytes);
    src += stride;
    dst += w_bytes;
  }
}

/*
 * non-SIMD: 1.91 GB/s
 *     SSE2: 3.70 GB/s, 1.94x
 *     AVX2: 4.01 GB/s, 2.10x
 */
static void gray16_to_gray8_dispatch(uint8_t *src, size_t src_len,
                                     int pixel_real_bits, uint8_t *dst) {
#ifdef USE_AVX2
  if (__builtin_cpu_supports("avx2")) {
    _openslide_gray16_to_gray8 = &_openslide_gray16_to_gray8_avx2;
    _openslide_gray16_to_gray8(src, src_len, pixel_real_bits, dst);
    return;
  }
#endif
#ifdef USE_SSSE3
  if (__builtin_cpu_supports("sse3")) {
    _openslide_gray16_to_gray8 = &_openslide_gray16_to_gray8_sse2;
    _openslide_gray16_to_gray8(src, src_len, pixel_real_bits, dst);
    return;
  }
#endif

  _openslide_gray16_to_gray8 = &gray16_to_gray8_generic;
  _openslide_gray16_to_gray8(src, src_len, pixel_real_bits, dst);
  return;
}

#ifdef USE_NEON
#include <arm_neon.h>

Loading