Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add utils to convert ImageProxy to Bitmap #458

Merged
merged 2 commits into from
Sep 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion dataset/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@ kotlin {
implementation project(":api")
}
}
androidMain {}
androidMain {
dependencies {
api 'androidx.camera:camera-core:1.0.0-rc03'
}
}
}
explicitApiWarning()
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
package org.jetbrains.kotlinx.dl.dataset.preprocessing.camerax

import android.graphics.*
import androidx.camera.core.ImageProxy
import org.jetbrains.kotlinx.dl.dataset.preprocessing.bitmap.Rotate
import java.io.ByteArrayOutputStream


/**
* Converts an [ImageProxy] to a [Bitmap].
* Currently only supports [ImageFormat.YUV_420_888] and [PixelFormat.RGBA_8888].
*
* @param applyRotation if true the resulting bitmap will be rotated to match target orientation of a use case.
* @throws [IllegalStateException] if the input format is [ImageFormat.YUV_420_888] and jpeg encoding of [YuvImage] fails.
* @see <a href="https://developer.android.com/reference/androidx/camera/core/ImageAnalysis.Builder#setOutputImageFormat(int)">ImageAnalysis supported output formats</a>
*/
public fun ImageProxy.toBitmap(applyRotation: Boolean = false): Bitmap {
val bitmap = when (format) {
ImageFormat.YUV_420_888 -> yuv4208888ToBitmap(this)
// It's unclear why PixelFormat is used here instead of ImageFormat, but this is documented behavior
PixelFormat.RGBA_8888 -> rgba8888ToBitmap(this)
else -> throw IllegalStateException("Unsupported image format: $format. Please check the documentation of Android ImageAnalysis API for supported formats.")
}

return if (applyRotation) {
val rotation = Rotate(imageInfo.rotationDegrees.toFloat())
rotation.apply(bitmap).also { bitmap.recycle() }
} else {
bitmap
}
}

private fun rgba8888ToBitmap(image: ImageProxy) : Bitmap {
val encodedImage = image.planes[0]
val pixelStride = encodedImage.pixelStride
val rowStride = encodedImage.rowStride
val rowPadding = rowStride - pixelStride * image.width
val bitmap = Bitmap.createBitmap(
image.width + rowPadding / pixelStride,
image.height, Bitmap.Config.ARGB_8888
)
bitmap.copyPixelsFromBuffer(encodedImage.buffer)

return bitmap
}

private fun yuv4208888ToBitmap(image: ImageProxy) : Bitmap {
val nv21 = yuv420888ToNv21(image)
val yuvImage = YuvImage(nv21, ImageFormat.NV21, image.width, image.height, null)
return yuvImage.toBitmap()
}

private fun YuvImage.toBitmap(): Bitmap {
val out = ByteArrayOutputStream()
val ok = compressToJpeg(Rect(0, 0, width, height), 100, out)
check(ok) { "Something gone wrong during conversion of YUV image to jpeg format" }

val imageBytes: ByteArray = out.toByteArray()
return BitmapFactory.decodeByteArray(imageBytes, 0, imageBytes.size)
}

private fun yuv420888ToNv21(image: ImageProxy): ByteArray {
val pixelCount = image.cropRect.width() * image.cropRect.height()
val pixelSizeBits = ImageFormat.getBitsPerPixel(ImageFormat.YUV_420_888)
val outputBuffer = ByteArray(pixelCount * pixelSizeBits / 8)
imageToByteBuffer(image, outputBuffer, pixelCount)
return outputBuffer
}

/**
* Decoding of YUV_420_888 image to NV21 byte representation.
*/
public fun imageToByteBuffer(image: ImageProxy, outputBuffer: ByteArray, pixelCount: Int) {
assert(image.format == ImageFormat.YUV_420_888)

val imageCrop = image.cropRect
val imagePlanes = image.planes

imagePlanes.forEachIndexed { planeIndex, plane ->
// How many values are read in input for each output value written
// Only the Y plane has a value for every pixel, U and V have half the resolution i.e.
//
// Y Plane U Plane V Plane
// =============== ======= =======
// Y Y Y Y Y Y Y Y U U U U V V V V
// Y Y Y Y Y Y Y Y U U U U V V V V
// Y Y Y Y Y Y Y Y U U U U V V V V
// Y Y Y Y Y Y Y Y U U U U V V V V
// Y Y Y Y Y Y Y Y
// Y Y Y Y Y Y Y Y
// Y Y Y Y Y Y Y Y
val outputStride: Int

// The index in the output buffer the next value will be written at
// For Y it's zero, for U and V we start at the end of Y and interleave them i.e.
//
// First chunk Second chunk
// =============== ===============
// Y Y Y Y Y Y Y Y V U V U V U V U
// Y Y Y Y Y Y Y Y V U V U V U V U
// Y Y Y Y Y Y Y Y V U V U V U V U
// Y Y Y Y Y Y Y Y V U V U V U V U
// Y Y Y Y Y Y Y Y
// Y Y Y Y Y Y Y Y
// Y Y Y Y Y Y Y Y
var outputOffset: Int

when (planeIndex) {
0 -> {
outputStride = 1
outputOffset = 0
}
1 -> {
outputStride = 2
// For NV21 format, U is in odd-numbered indices
outputOffset = pixelCount + 1
}
2 -> {
outputStride = 2
// For NV21 format, V is in even-numbered indices
outputOffset = pixelCount
}
else -> {
// Image contains more than 3 planes, something strange is going on
return@forEachIndexed
}
}

val planeBuffer = plane.buffer
val rowStride = plane.rowStride
val pixelStride = plane.pixelStride

// We have to divide the width and height by two if it's not the Y plane
val planeCrop = if (planeIndex == 0) {
imageCrop
} else {
Rect(
imageCrop.left / 2,
imageCrop.top / 2,
imageCrop.right / 2,
imageCrop.bottom / 2
)
}

val planeWidth = planeCrop.width()
val planeHeight = planeCrop.height()

// Intermediate buffer used to store the bytes of each row
val rowBuffer = ByteArray(plane.rowStride)

// Size of each row in bytes
val rowLength = if (pixelStride == 1 && outputStride == 1) {
planeWidth
} else {
// Take into account that the stride may include data from pixels other than this
// particular plane and row, and that could be between pixels and not after every
// pixel:
//
// |---- Pixel stride ----| Row ends here --> |
// | Pixel 1 | Other Data | Pixel 2 | Other Data | ... | Pixel N |
//
// We need to get (N-1) * (pixel stride bytes) per row + 1 byte for the last pixel
(planeWidth - 1) * pixelStride + 1
}

for (row in 0 until planeHeight) {
// Move buffer position to the beginning of this row
planeBuffer.position(
(row + planeCrop.top) * rowStride + planeCrop.left * pixelStride)

if (pixelStride == 1 && outputStride == 1) {
// When there is a single stride value for pixel and output, we can just copy
// the entire row in a single step
planeBuffer.get(outputBuffer, outputOffset, rowLength)
outputOffset += rowLength
} else {
// When either pixel or output have a stride > 1 we must copy pixel by pixel
planeBuffer.get(rowBuffer, 0, rowLength)
for (col in 0 until planeWidth) {
outputBuffer[outputOffset] = rowBuffer[col * pixelStride]
outputOffset += outputStride
}
}
}
}
}