From 035d79a7f6d1cbb34ca1cdb1e78016096d7e2195 Mon Sep 17 00:00:00 2001
From: Adam Johnson <AdamJohnso@gmail.com>
Date: Sun, 9 Dec 2018 20:36:19 -0500
Subject: [PATCH] Refactor image data handling for cube maps

Previously, we allowed OpenGL to generate all of the mip levels for us
in a mipmap. This was pretty doggone fast and worked reasonably well.
However, with cube maps, we will need to use images that are not always
backed in Blender... this is because Blender stores cube maps as one
single image instead of one image per face. So, we need to be able to
generate those mip levels, preferably without touching Blender's
`Image.pixels`, which is slower than Christmas...

Also of note... `Image.gl_load()` will actually scale the iamge to a POT
when Blender is using OpenGL ES... but not on other platforms. So, now,
we just ask Blender to load the image and deal with the POT-izing later.

The con here is that the pure python implementation of the image scaling
function is SLOOOOOOOW. We're talking ~40 seconds to process a 1024x1024
mipmap. No one should be using the reference implementation, however,
and the C++ implementation shows no noticable slowdown over the OpenGL
code.

Whew.
---
 korlib/korlib.h             |   1 +
 korlib/texture.cpp          | 346 +++++++++++++++++++++++-------------
 korman/exporter/material.py | 107 +++++------
 korman/korlib/texture.py    | 204 +++++++++++++++------
 4 files changed, 425 insertions(+), 233 deletions(-)
diff --git a/korlib/korlib.h b/korlib/korlib.h
index b59e4a2..99a8ba8 100644
--- a/korlib/korlib.h
+++ b/korlib/korlib.h
@@ -23,6 +23,7 @@
 #include <Python.h>
 
 #define _pycs(x) const_cast<char*>(x)
+#define arrsize(a) (sizeof(a) / sizeof((a)[0]))
 
 class PyObjectRef {
     PyObject* m_object;
diff --git a/korlib/texture.cpp b/korlib/texture.cpp
index 049c94e..ae26746 100644
--- a/korlib/texture.cpp
+++ b/korlib/texture.cpp
@@ -26,12 +26,40 @@
 #include <GL/gl.h>
 #include <PRP/Surface/plMipmap.h>
 
-#ifndef GL_GENERATE_MIPMAP
-#   define GL_GENERATE_MIPMAP 0x8191
-#endif // GL_GENERATE_MIPMAP
-
 #define TEXTARGET_TEXTURE_2D 0
 
+static inline void _ensure_copy_bytes(PyObject* parent, PyObject*& data) {
+    // PyBytes objects are immutable and ought not to be changed once they are returned to Python
+    // code. Therefore, this tests to see if the given bytes object is the same as one we're holding.
+    // If so, a new copy is constructed seamlessly.
+    if (parent == data) {
+        Py_ssize_t size;
+        char* buf;
+        PyBytes_AsStringAndSize(parent, &buf, &size);
+        data = PyBytes_FromStringAndSize(buf, size);
+        Py_DECREF(parent);
+    }
+}
+
+template<typename T>
+static T _ensure_power_of_two(T value) {
+    return static_cast<T>(std::pow(2, std::floor(std::log2(value))));
+}
+
+static void _flip_image(size_t width, size_t dataSize, uint8_t* data) {
+    // OpenGL returns a flipped image, so we must reflip it.
+    size_t row_stride = width * 4;
+    uint8_t* sptr = data;
+    uint8_t* eptr = data + (dataSize - row_stride);
+    uint8_t* temp = new uint8_t[row_stride];
+    do {
+        memcpy(temp, sptr, row_stride);
+        memcpy(sptr, eptr, row_stride);
+        memcpy(eptr, temp, row_stride);
+    } while ((sptr += row_stride) < (eptr -= row_stride));
+    delete[] temp;
+}
+
 static inline bool _get_float(PyObject* source, const char* attr, float& result) {
     PyObjectRef pyfloat = PyObject_GetAttrString(source, attr);
     if (pyfloat) {
@@ -41,7 +69,93 @@ static inline bool _get_float(PyObject* source, const char* attr, float& result)
     return false;
 }
 
-extern "C" {
+static inline int _get_num_levels(size_t width, size_t height) {
+    int num_levels = (int)std::floor(std::log2(std::max((float)width, (float)height))) + 1;
+
+    // Major Workaround Ahoy
+    // There is a bug in Cyan's level size algorithm that causes it to not allocate enough memory
+    // for the color block in certain mipmaps. I personally have encountered an access violation on
+    // 1x1 DXT5 mip levels -- the code only allocates an alpha block and not a color block. Paradox
+    // reports that if any dimension is smaller than 4px in a mip level, OpenGL doesn't like Cyan generated
+    // data. So, we're going to lop off the last two mip levels, which should be 1px and 2px as the smallest.
+    // This bug is basically unfixable without crazy hacks because of the way Plasma reads in texture data.
+    //     "<Deledrius> I feel like any texture at a 1x1 level is essentially academic.  I mean, JPEG/DXT
+    //                  doesn't even compress that, and what is it?  Just the average color of the whole
+    //                  texture in a single pixel?"
+    // :)
+    return std::max(num_levels - 2, 2);
+}
+
+static void _scale_image(const uint8_t* srcBuf, const size_t srcW, const size_t srcH, 
+                         uint8_t* dstBuf, const size_t dstW, const size_t dstH) {
+    float scaleX = static_cast<float>(srcW) / static_cast<float>(dstW);
+    float scaleY = static_cast<float>(srcH) / static_cast<float>(dstH);
+    float filterW = std::max(scaleX, 1.f);
+    float filterH = std::max(scaleY, 1.f);
+    size_t srcRowspan = srcW * sizeof(uint32_t);
+    size_t dstIdx = 0;
+
+    for (size_t dstY = 0; dstY < dstH; ++dstY) {
+        float srcY = dstY * scaleY;
+        ssize_t srcY_start = std::max(static_cast<ssize_t>(srcY - filterH),
+                                     static_cast<ssize_t>(0));
+        ssize_t srcY_end = std::min(static_cast<ssize_t>(srcY + filterH),
+                                   static_cast<ssize_t>(srcH - 1));
+
+        float weightsY[16];
+        for (ssize_t i = srcY_start; i <= srcY_end && i - srcY_start < arrsize(weightsY); ++i)
+            weightsY[i - srcY_start] = 1.f - std::abs((i - srcY) / filterH);
+
+        for (size_t dstX = 0; dstX < dstW; ++dstX) {
+            float srcX = dstX * scaleX;
+            ssize_t srcX_start = std::max(static_cast<ssize_t>(srcX - filterW),
+                                          static_cast<ssize_t>(0));
+            ssize_t srcX_end = std::min(static_cast<ssize_t>(srcX + filterW),
+                                        static_cast<ssize_t>(srcW - 1));
+
+            float weightsX[16];
+            for (ssize_t i = srcX_start; i <= srcX_end && i - srcX_start < arrsize(weightsX); ++i)
+                weightsX[i - srcX_start] = 1.f - std::abs((i - srcX) / filterW);
+
+            float accum_color[] = { 0.f, 0.f, 0.f, 0.f };
+            float weight_total = 0.f;
+            for (size_t i = srcY_start; i <= srcY_end; ++i) {
+                float weightY;
+                if (i - srcY_start < arrsize(weightsY))
+                    weightY = weightsY[i - srcY_start];
+                else
+                    weightY = 1.f - std::abs((i - srcY) / filterH);
+
+                if (weightY <= 0.f)
+                    continue;
+
+                size_t srcIdx = ((i * srcRowspan) + (srcX_start * sizeof(uint32_t)));
+                for (size_t j = srcX_start; j <= srcX_end; ++j, srcIdx += sizeof(uint32_t)) {
+                    float weightX;
+                    if (j - srcX_start < arrsize(weightsX))
+                        weightX = weightsX[j - srcX_start];
+                    else
+                        weightX = 1.f - std::abs((j - srcX) / filterW);
+                    float weight = weightX * weightY;
+
+                    if (weight > 0.f) {
+                        for (size_t k = 0; k < sizeof(uint32_t); ++k)
+                            accum_color[k] += (static_cast<float>(srcBuf[srcIdx+k]) / 255.f) * weight;
+                        weight_total += weight;
+                    }
+                }
+            }
+
+            for (size_t k = 0; k < sizeof(uint32_t); ++k)
+                accum_color[k] *= 1.f / weight_total;
+
+            // Whew.
+            for (size_t k = 0; k < sizeof(uint32_t); ++k)
+                dstBuf[dstIdx+k] = static_cast<uint8_t>(accum_color[k] * 255.f);
+            dstIdx += sizeof(uint32_t);
+        }
+    }
+}
 
 enum {
     TEX_DETAIL_ALPHA = 0,
@@ -53,10 +167,11 @@ typedef struct {
     PyObject_HEAD
     PyObject* m_blenderImage;
     PyObject* m_textureKey;
-    bool m_ownIt;
-    GLint m_prevImage;
-    bool m_changedState;
-    GLint m_mipmapState;
+    PyObject* m_imageData;
+    GLint m_width;
+    GLint m_height;
+    bool m_bgra;
+    bool m_imageInverted;
 } pyGLTexture;
 
 typedef struct {
@@ -66,8 +181,9 @@ typedef struct {
 } pyMipmap;
 
 static void pyGLTexture_dealloc(pyGLTexture* self) {
-    Py_XDECREF(self->m_textureKey);
-    Py_XDECREF(self->m_blenderImage);
+    Py_CLEAR(self->m_textureKey);
+    Py_CLEAR(self->m_blenderImage);
+    Py_CLEAR(self->m_imageData);
     Py_TYPE(self)->tp_free((PyObject*)self);
 }
 
@@ -75,15 +191,18 @@ static PyObject* pyGLTexture_new(PyTypeObject* type, PyObject* args, PyObject* k
     pyGLTexture* self = (pyGLTexture*)type->tp_alloc(type, 0);
     self->m_blenderImage = NULL;
     self->m_textureKey = NULL;
-    self->m_ownIt = false;
-    self->m_prevImage = 0;
-    self->m_changedState = false;
-    self->m_mipmapState = 0;
+    self->m_imageData = NULL;
+    self->m_width = 0;
+    self->m_height = 0;
+    self->m_bgra = false;
+    self->m_imageInverted = false;
     return (PyObject*)self;
 }
 
 static int pyGLTexture___init__(pyGLTexture* self, PyObject* args, PyObject* kwds) {
-    if (!PyArg_ParseTuple(args, "O", &self->m_textureKey)) {
+    static char* kwlist[] = { _pycs("texkey"), _pycs("bgra"), _pycs("fast"), NULL };
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|bb", kwlist, &self->m_textureKey,
+                                     &self->m_bgra, &self->m_imageInverted)) {
         PyErr_SetString(PyExc_TypeError, "expected a korman.exporter.material._Texture");
         return -1;
     }
@@ -115,12 +234,13 @@ static PyObject* pyGLTexture__enter__(pyGLTexture* self) {
         return NULL;
     }
 
-    glGetIntegerv(GL_TEXTURE_BINDING_2D, &self->m_prevImage);
+    GLint prevImage;
+    glGetIntegerv(GL_TEXTURE_BINDING_2D, &prevImage);
     GLuint image_bindcode = PyLong_AsUnsignedLong(bindcode);
-    self->m_ownIt = image_bindcode == 0;
+    bool ownit = image_bindcode == 0;
 
     // Load image into GL
-    if (self->m_ownIt) {
+    if (ownit) {
         PyObjectRef new_bind = PyObject_CallMethod(self->m_blenderImage, "gl_load", NULL);
         if (!PyLong_Check(new_bind)) {
             PyErr_SetString(PyExc_TypeError, "gl_load() did not return a long");
@@ -144,64 +264,39 @@ static PyObject* pyGLTexture__enter__(pyGLTexture* self) {
     }
 
     // Set image as current in GL
-    if (self->m_prevImage != image_bindcode) {
-        self->m_changedState = true;
+    bool changedState = prevImage != image_bindcode;
+    if (changedState)
         glBindTexture(GL_TEXTURE_2D, image_bindcode);
-    }
 
-    // Misc GL state
-    glGetTexParameteriv(GL_TEXTURE_2D, GL_GENERATE_MIPMAP, &self->m_mipmapState);
+    // Now we can load the image data...
+    glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &self->m_width);
+    glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &self->m_height);
+
+    size_t bufsz = self->m_width * self->m_height * sizeof(uint32_t);
+    self->m_imageData = PyBytes_FromStringAndSize(NULL, bufsz);
+    char* imbuf = PyBytes_AS_STRING(self->m_imageData);
+    GLint fmt = self->m_bgra ? GL_BGRA_EXT : GL_RGBA;
+    glGetTexImage(GL_TEXTURE_2D, 0, fmt, GL_UNSIGNED_BYTE, reinterpret_cast<GLvoid*>(imbuf));
+
+    // OpenGL returns image data flipped upside down. We'll flip it to be correct, if requested.
+    if (!self->m_imageInverted)
+        _flip_image(self->m_width, bufsz, reinterpret_cast<uint8_t*>(imbuf));
+
+    // If we had to play with ourse^H^H^H^H^Hblender's image state, let's reset it
+    if (changedState)
+        glBindTexture(GL_TEXTURE_2D, prevImage);
+    if (ownit)
+        PyObjectRef result = PyObject_CallMethod(self->m_blenderImage, "gl_free", NULL);
 
     Py_INCREF(self);
     return (PyObject*)self;
 }
 
 static PyObject* pyGLTexture__exit__(pyGLTexture* self, PyObject*) {
-    // We don't care about the args here
-    glTexParameteri(GL_TEXTURE_2D, GL_GENERATE_MIPMAP, self->m_mipmapState);
-    if (self->m_changedState)
-        glBindTexture(GL_TEXTURE_2D, self->m_prevImage);
-    Py_RETURN_NONE;
-}
-
-static PyObject* pyGLTexture_generate_mipmap(pyGLTexture* self) {
-    glTexParameteri(GL_TEXTURE_2D, GL_GENERATE_MIPMAP, 1);
+    Py_CLEAR(self->m_imageData);
     Py_RETURN_NONE;
 }
 
-struct _LevelData
-{
-    GLint   m_width;
-    GLint   m_height;
-    uint8_t* m_data;
-    size_t   m_dataSize;
-
-    _LevelData(GLint w, GLint h, uint8_t* ptr, size_t sz)
-        : m_width(w), m_height(h), m_data(ptr), m_dataSize(sz)
-    { }
-};
-
-static inline int _get_num_levels(pyGLTexture* self) {
-    PyObjectRef size = PyObject_GetAttrString(self->m_blenderImage, "size");
-    float width = (float)PyFloat_AsDouble(PySequence_GetItem(size, 0));
-    float height = (float)PyFloat_AsDouble(PySequence_GetItem(size, 1));
-
-    int num_levels = (int)std::floor(std::log2(std::max(width, height))) + 1;
-
-    // Major Workaround Ahoy
-    // There is a bug in Cyan's level size algorithm that causes it to not allocate enough memory
-    // for the color block in certain mipmaps. I personally have encountered an access violation on
-    // 1x1 DXT5 mip levels -- the code only allocates an alpha block and not a color block. Paradox
-    // reports that if any dimension is smaller than 4px in a mip level, OpenGL doesn't like Cyan generated
-    // data. So, we're going to lop off the last two mip levels, which should be 1px and 2px as the smallest.
-    // This bug is basically unfixable without crazy hacks because of the way Plasma reads in texture data.
-    //     "<Deledrius> I feel like any texture at a 1x1 level is essentially academic.  I mean, JPEG/DXT
-    //                  doesn't even compress that, and what is it?  Just the average color of the whole
-    //                  texture in a single pixel?"
-    // :)
-    return std::max(num_levels - 2, 2);
-}
-
 static int _generate_detail_alpha(pyGLTexture* self, GLint level, float* result) {
     float dropoff_start, dropoff_stop, detail_max, detail_min;
     if (!_get_float(self->m_textureKey, "detail_fade_start", dropoff_start))
@@ -214,9 +309,9 @@ static int _generate_detail_alpha(pyGLTexture* self, GLint level, float* result)
         return -1;
 
     dropoff_start /= 100.f;
-    dropoff_start *= _get_num_levels(self);
+    dropoff_start *= _get_num_levels(self->m_width, self->m_height);
     dropoff_stop /= 100.f;
-    dropoff_stop *= _get_num_levels(self);
+    dropoff_stop *= _get_num_levels(self->m_width, self->m_height);
     detail_max /= 100.f;
     detail_min /= 100.f;
 
@@ -265,102 +360,110 @@ static int _generate_detail_map(pyGLTexture* self, uint8_t* buf, size_t bufsz, G
     return 0;
 }
 
-static _LevelData _get_level_data(pyGLTexture* self, GLint level, bool bgra, PyObject* report) {
-    GLint width, height;
-    glGetTexLevelParameteriv(GL_TEXTURE_2D, level, GL_TEXTURE_WIDTH, &width);
-    glGetTexLevelParameteriv(GL_TEXTURE_2D, level, GL_TEXTURE_HEIGHT, &height);
-    GLenum fmt = bgra ? GL_BGRA_EXT : GL_RGBA;
-
-    // Print out the debug message
-    if (report && report != Py_None) {
-        PyObjectRef msg_func = PyObject_GetAttrString(report, "msg");
-        PyObjectRef args = Py_BuildValue("siii", "Level #{}: {}x{}", level, width, height);
-        PyObjectRef kwargs = Py_BuildValue("{s:i}", "indent", 2);
-        PyObjectRef result = PyObject_Call(msg_func, args, kwargs);
-    }
-
-    size_t bufsz;
-    bufsz = (width * height * 4);
-    uint8_t* buf = new uint8_t[bufsz];
-    glGetTexImage(GL_TEXTURE_2D, level, fmt, GL_UNSIGNED_BYTE, reinterpret_cast<GLvoid*>(buf));
-    return _LevelData(width, height, buf, bufsz);
-}
-
 static PyObject* pyGLTexture_get_level_data(pyGLTexture* self, PyObject* args, PyObject* kwargs) {
-    static char* kwlist[] = { _pycs("level"), _pycs("calc_alpha"), _pycs("bgra"),
-                              _pycs("report"), _pycs("fast"), NULL };
+    static char* kwlist[] = { _pycs("level"), _pycs("calc_alpha"), _pycs("report"),
+                              _pycs("indent"), _pycs("fast"), NULL };
     GLint level = 0;
     bool calc_alpha = false;
-    bool bgra = false;
     PyObject* report = nullptr;
+    int indent = 2;
     bool fast = false;
-    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ibbOb", kwlist, &level, &calc_alpha, &bgra, &report, &fast)) {
-        PyErr_SetString(PyExc_TypeError, "get_level_data expects an optional int, bool, bool, obejct, bool");
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ibOib", kwlist, &level, &calc_alpha, &report, &indent, &fast)) {
+        PyErr_SetString(PyExc_TypeError, "get_level_data expects an optional int, bool, obejct, int, bool");
         return NULL;
     }
 
-    _LevelData data = _get_level_data(self, level, bgra, report);
-    if (fast)
-        return PyBytes_FromStringAndSize((char*)data.m_data, data.m_dataSize);
+    // We only ever want to return POT images for use in Plasma
+    auto eWidth = _ensure_power_of_two(self->m_width) >> level;
+    auto eHeight = _ensure_power_of_two(self->m_height) >> level;
+    bool is_og = eWidth == self->m_width && eHeight == self->m_height;
+    size_t bufsz = eWidth * eHeight * sizeof(uint32_t);
 
-    // OpenGL returns a flipped image, so we must reflip it.
-    size_t row_stride = data.m_width * 4;
-    uint8_t* sptr = data.m_data;
-    uint8_t* eptr = data.m_data + (data.m_dataSize - row_stride);
-    uint8_t* temp = new uint8_t[row_stride];
-    do {
-        memcpy(temp, sptr, row_stride);
-        memcpy(sptr, eptr, row_stride);
-        memcpy(eptr, temp, row_stride);
-    } while ((sptr += row_stride) < (eptr -= row_stride));
-    delete[] temp;
+    // Print out the debug message
+    if (report && report != Py_None) {
+        PyObjectRef msg_func = PyObject_GetAttrString(report, "msg");
+        PyObjectRef args = Py_BuildValue("siii", "Level #{}: {}x{}", level, eWidth, eHeight);
+        PyObjectRef kwargs = Py_BuildValue("{s:i}", "indent", indent);
+        PyObjectRef result = PyObject_Call(msg_func, args, kwargs);
+    }
+
+    PyObject* data;
+    if (is_og) {
+        Py_INCREF(self->m_imageData);
+        data = self->m_imageData;
+    } else {
+        data = PyBytes_FromStringAndSize(NULL, bufsz);
+        uint8_t* dstBuf = reinterpret_cast<uint8_t*>(PyBytes_AsString(data)); // AS_STRING :(
+        uint8_t* srcBuf = reinterpret_cast<uint8_t*>(PyBytes_AsString(self->m_imageData));
+        _scale_image(srcBuf, self->m_width, self->m_height, dstBuf, eWidth, eHeight);
+    }
+
+    // Make sure the level data is not flipped upside down...
+    if (self->m_imageInverted && !fast) {
+        _ensure_copy_bytes(self->m_blenderImage, data);
+        _flip_image(eWidth, bufsz, reinterpret_cast<uint8_t*>(PyBytes_AS_STRING(data)));
+    }
 
     // Detail blend
     PyObjectRef is_detail_map = PyObject_GetAttrString(self->m_textureKey, "is_detail_map");
     if (PyLong_AsLong(is_detail_map) != 0) {
-        if (_generate_detail_map(self, data.m_data, data.m_dataSize, level) != 0) {
-            delete[] data.m_data;
+        _ensure_copy_bytes(self->m_imageData, data);
+        uint8_t* buf = reinterpret_cast<uint8_t*>(PyBytes_AS_STRING(data));
+        if (_generate_detail_map(self, buf, bufsz, level) != 0) {
             PyErr_SetString(PyExc_RuntimeError, "error while baking detail map");
+            Py_DECREF(data);
             return NULL;
         }
     }
 
     if (calc_alpha) {
-        for (size_t i = 0; i < data.m_dataSize; i += 4)
-            data.m_data[i + 3] = (data.m_data[i + 0] + data.m_data[i + 1] + data.m_data[i + 2]) / 3;
+        _ensure_copy_bytes(self->m_imageData, data);
+        char* buf = PyBytes_AS_STRING(data);
+        for (size_t i = 0; i < bufsz; i += 4)
+            buf[i + 3] = (buf[i + 0] + buf[i + 1] + buf[i + 2]) / 3;
     }
 
-    return PyBytes_FromStringAndSize((char*)data.m_data, data.m_dataSize);
+    return data;
 }
 
 static PyMethodDef pyGLTexture_Methods[] = {
     { _pycs("__enter__"), (PyCFunction)pyGLTexture__enter__, METH_NOARGS, NULL },
     { _pycs("__exit__"), (PyCFunction)pyGLTexture__exit__, METH_VARARGS, NULL },
 
-    { _pycs("generate_mipmap"), (PyCFunction)pyGLTexture_generate_mipmap, METH_NOARGS, NULL },
     { _pycs("get_level_data"), (PyCFunction)pyGLTexture_get_level_data, METH_KEYWORDS | METH_VARARGS, NULL },
     { NULL, NULL, 0, NULL }
 };
 
 static PyObject* pyGLTexture_get_has_alpha(pyGLTexture* self, void*) {
-    _LevelData data = _get_level_data(self, 0, false, nullptr);
-    for (size_t i = 3; i < data.m_dataSize; i += 4) {
-        if (data.m_data[i] != 255) {
-            delete[] data.m_data;
+    char* data = PyBytes_AsString(self->m_imageData);
+    size_t bufsz = self->m_width * self->m_height * sizeof(uint32_t);
+    for (size_t i = 3; i < bufsz; i += 4) {
+        if (data[i] != 255) {
             return PyBool_FromLong(1);
         }
     }
-    delete[] data.m_data;
     return PyBool_FromLong(0);
 }
 
 static PyObject* pyGLTexture_get_num_levels(pyGLTexture* self, void*) {
-    return PyLong_FromLong(_get_num_levels(self));
+    return PyLong_FromLong(_get_num_levels(self->m_width, self->m_height));
+}
+
+static PyObject* pyGLTexture_get_size_npot(pyGLTexture* self, void*) {
+    return Py_BuildValue("ii", self->m_width, self->m_height);
+}
+
+static PyObject* pyGLTexture_get_size_pot(pyGLTexture* self, void*) {
+    size_t width = _ensure_power_of_two(self->m_width);
+    size_t height = _ensure_power_of_two(self->m_height);
+    return Py_BuildValue("ii", width, height);
 }
 
 static PyGetSetDef pyGLTexture_GetSet[] = {
     { _pycs("has_alpha"), (getter)pyGLTexture_get_has_alpha, NULL, NULL, NULL },
     { _pycs("num_levels"), (getter)pyGLTexture_get_num_levels, NULL, NULL, NULL },
+    { _pycs("size_npot"), (getter)pyGLTexture_get_size_npot, NULL, NULL, NULL },
+    { _pycs("size_pot"), (getter)pyGLTexture_get_size_pot, NULL, NULL, NULL },
     { NULL, NULL, NULL, NULL, NULL }
 };
 
@@ -429,6 +532,3 @@ PyObject* Init_pyGLTexture_Type() {
     Py_INCREF(&pyGLTexture_Type);
     return (PyObject*)&pyGLTexture_Type;
 }
-
-};
-
diff --git a/korman/exporter/material.py b/korman/exporter/material.py
index 1654802..0258eff 100644
--- a/korman/exporter/material.py
+++ b/korman/exporter/material.py
@@ -726,9 +726,6 @@ class MaterialConverter:
                 self._report.msg("\n[Mipmap '{}']", name)
 
                 image = key.image
-                oWidth, oHeight = image.size
-                if oWidth == 0 and oHeight == 0:
-                    raise ExportError("Image '{}' could not be loaded.".format(image.name))
 
                 # Now we try to use the pile of hints we were given to figure out what format to use
                 allowed_formats = key.allowed_formats
@@ -750,48 +747,11 @@ class MaterialConverter:
                 cached_image = texcache.get_from_texture(key, compression)
 
                 if cached_image is None:
-                    eWidth = helpers.ensure_power_of_two(oWidth)
-                    eHeight = helpers.ensure_power_of_two(oHeight)
-                    if (eWidth != oWidth) or (eHeight != oHeight):
-                        self._report.msg("Image is not a POT ({}x{}) resizing to {}x{}",
-                                         oWidth, oHeight, eWidth, eHeight, indent=1)
-                        self._resize_image(image, eWidth, eHeight)
-
-                    # Grab the image data from OpenGL and stuff it into the plBitmap
-                    helper = GLTexture(key)
-                    with helper as glimage:
-                        if compression == plBitmap.kDirectXCompression:
-                            numLevels = glimage.num_levels
-                            self._report.msg("Generating mip levels", indent=1)
-                            glimage.generate_mipmap()
-                        else:
-                            numLevels = 1
-                            self._report.msg("Compressing single level", indent=1)
-
-                        # Non-DXT images are BGRA in Plasma
-                        fmt = compression != plBitmap.kDirectXCompression
-
-                        # Hold the uncompressed level data for now. We may have to make multiple copies of
-                        # this mipmap for per-page textures :(
-                        data = []
-                        for i in range(numLevels):
-                            data.append(glimage.get_level_data(i, key.calc_alpha, fmt, report=self._report))
-
-                    # Be a good citizen and reset the Blender Image to pre-futzing state
-                    image.reload()
-
-                    # If this is a DXT-compressed mipmap, we need to use a temporary mipmap
-                    # to do the compression. We'll then steal the data from it.
-                    if compression == plBitmap.kDirectXCompression:
-                        mipmap = plMipmap(name=name, width=eWidth, height=eHeight, numLevels=numLevels,
-                                          compType=compression, format=plBitmap.kRGB8888, dxtLevel=dxt)
-                        for i in range(numLevels):
-                            mipmap.CompressImage(i, data[i])
-                            data[i] = mipmap.getLevel(i)
-                    texcache.add_texture(key, numLevels, (eWidth, eHeight), compression, [data,])
+                    numLevels, width, height, data = self._finalize_single_image(key, image, name, compression, dxt)
+                    texcache.add_texture(key, numLevels, (width, height), compression, data)
                 else:
-                    eWidth, eHeight = cached_image.export_size
-                    data = cached_image.image_data[0]
+                    width, height = cached_image.export_size
+                    data = cached_image.image_data
                     numLevels = cached_image.mip_levels            
 
                 # Now we poke our new bitmap into the pending layers. Note that we have to do some funny
@@ -804,19 +764,23 @@ class MaterialConverter:
                     self._report.msg("[{} '{}']", owner.ClassName()[2:], owner_key.name, indent=2)
                     page = mgr.get_textures_page(owner_key) # Layer's page or Textures.prp
 
-                    # If we haven't created this plMipmap in the page (either layer's page or Textures.prp),
+                    # If we haven't created this texture in the page (either layer's page or Textures.prp),
                     # then we need to do that and stuff the level data. This is a little tedious, but we
                     # need to be careful to manage our resources correctly
                     if page not in pages:
-                        mipmap = plMipmap(name=name, width=eWidth, height=eHeight, numLevels=numLevels,
-                                          compType=compression, format=plBitmap.kRGB8888, dxtLevel=dxt)
-                        for i, buf in enumerate(data):
-                            mipmap.setLevel(i, buf)
+                        mipmap = plMipmap(name=name, width=width, height=height,
+                                          numLevels=numLevels, compType=compression,
+                                          format=plBitmap.kRGB8888, dxtLevel=dxt)
+                        for i in range(numLevels):
+                            mipmap.setLevel(i, data[0][i])
                         mgr.AddObject(page, mipmap)
                         pages[page] = mipmap
                     else:
                         mipmap = pages[page]
 
+                    # The object that references this image can be either a layer (will appear
+                    # in the 3d world) or an image library (will appear in a journal or in another
+                    # dynamic manner in game)
                     if isinstance(owner, plLayerInterface):
                         owner.texture = mipmap.key
                     elif isinstance(owner, plImageLibMod):
@@ -826,6 +790,40 @@ class MaterialConverter:
 
                 inc_progress()
 
+    def _finalize_single_image(self, key, image, name, compression, dxt):
+        oWidth, oHeight = image.size
+        if oWidth == 0 and oHeight == 0:
+            raise ExportError("Image '{}' could not be loaded.".format(image.name))
+
+        # Non-DXT images are BGRA in Plasma
+        bgra = compression != plBitmap.kDirectXCompression
+
+        # Grab the image data from OpenGL and stuff it into the plBitmap
+        with GLTexture(key, bgra=bgra) as glimage:
+            eWidth, eHeight = glimage.size_pot
+            if compression == plBitmap.kDirectXCompression:
+                numLevels = glimage.num_levels
+                self._report.msg("Generating mip levels", indent=1)
+            else:
+                numLevels = 1
+                self._report.msg("Compressing single level", indent=1)
+
+            # Hold the uncompressed level data for now. We may have to make multiple copies of
+            # this mipmap for per-page textures :(
+            data = []
+            for i in range(numLevels):
+                data.append(glimage.get_level_data(i, key.calc_alpha, report=self._report))
+
+        # If this is a DXT-compressed mipmap, we need to use a temporary mipmap
+        # to do the compression. We'll then steal the data from it.
+        if compression == plBitmap.kDirectXCompression:
+            mipmap = plMipmap(name=name, width=eWidth, height=eHeight, numLevels=numLevels,
+                              compType=compression, format=plBitmap.kRGB8888, dxtLevel=dxt)
+            for i in range(numLevels):
+                mipmap.CompressImage(i, data[i])
+                data[i] = mipmap.getLevel(i)
+        return numLevels, eWidth, eHeight, [data,]
+
     def get_materials(self, bo):
         return self._obj2mat.get(bo, [])
 
@@ -882,15 +880,6 @@ class MaterialConverter:
     def _report(self):
         return self._exporter().report
 
-    def _resize_image(self, image, width, height):
-        image.scale(width, height)
-        image.update()
-
-        # If the image is already loaded into OpenGL, we need to refresh it to get the scaling.
-        if image.bindcode[0] != 0:
-            image.gl_free()
-            image.gl_load()
-
     def _test_image_alpha(self, image):
         """Tests to see if this image has any alpha data"""
 
@@ -906,7 +895,7 @@ class MaterialConverter:
         else:
             # Using bpy.types.Image.pixels is VERY VERY VERY slow...
             key = _Texture(image=image)
-            with GLTexture(key) as glimage:
+            with GLTexture(key, fast=True) as glimage:
                 result = glimage.has_alpha
 
         self._alphatest[image] = result
diff --git a/korman/korlib/texture.py b/korman/korlib/texture.py
index 10d9388..766b68b 100644
--- a/korman/korlib/texture.py
+++ b/korman/korlib/texture.py
@@ -13,12 +13,13 @@
 #    You should have received a copy of the GNU General Public License
 #    along with Korman.  If not, see <http://www.gnu.org/licenses/>.
 
+import array
 import bgl
+from ..helpers import ensure_power_of_two
 import math
 from PyHSPlasma import plBitmap
 
 # BGL doesn't know about this as of Blender 2.74
-bgl.GL_GENERATE_MIPMAP = 0x8191
 bgl.GL_BGRA = 0x80E1
 
 # Some texture generation flags
@@ -26,35 +27,121 @@ TEX_DETAIL_ALPHA = 0
 TEX_DETAIL_ADD = 1
 TEX_DETAIL_MULTIPLY = 2
 
+def _scale_image(buf, srcW, srcH, dstW, dstH):
+    """Scales an RGBA image using the algorithm from CWE's plMipmap::ScaleNicely"""
+    dst, dst_idx = bytearray(dstW * dstH * 4), 0
+    scaleX, scaleY = (srcW / dstW), (srcH / dstH)
+    filterW, filterH = max(scaleX, 1.0), max(scaleY, 1.0)
+
+    src_rowspan = srcW * 4
+    weightsY = array.array("f", [0.0] * 16)
+    weightsX = array.array("f", [0.0] * 16)
+
+    # I hope you're in no particular hurry...
+    for dstY in range(dstH):
+        srcY = dstY * scaleY
+        srcY_start = int(max(srcY - filterH, 0))
+        srcY_end = int(min(srcY + filterH, srcH - 1))
+
+        #weightsY = { i - srcY_start: 1.0 - abs(i - srcY) / scaleY \
+        #             for i in range(srcY_start, srcY_end+1, 1) if i - srcY_start < 16 }
+        for i in range(16):
+            idx = i + srcY_start
+            if idx > srcY_end:
+                break
+            weightsY[i] = 1.0 - abs(idx - srcY) / filterH
+
+        for dstX in range(dstW):
+            srcX = dstX * scaleX
+            srcX_start = int(max(srcX - filterW, 0))
+            srcX_end = int(min(srcX + filterW, srcW - 1))
+
+            #weightsX = { i - srcX_start: 1.0 - abs(i - srcX) / scaleX \
+            #             for i in range(srcX_start, srcX_end+1, 1) if i - srcX_start < 16 }
+            for i in range(16):
+                idx = i + srcX_start
+                if idx > srcX_end:
+                    break
+                weightsX[i] = 1.0 - abs(idx - srcX) / filterW
+
+            accum_color = [0.0, 0.0, 0.0, 0.0]
+            weight_total = 0.0
+            for i in range(srcY_start, srcY_end+1, 1):
+                weightY_idx = i - srcY_start
+                weightY = weightsY[weightY_idx] if weightY_idx < 16 else 1.0 - abs(i - srcY) / filterH
+                weightY = 1.0 - abs(i - srcY) / filterH
+
+                src_idx = (i * src_rowspan) + (srcX_start * 4)
+                for j in range(srcX_start, srcX_end+1, 1):
+                    weightX_idx = j - srcX_start
+                    weightX = weightsX[weightX_idx] if weightX_idx < 16 else 1.0 - abs(j - srcX) / filterW
+                    weight = weightY * weightX
+
+                    if weight > 0.0:
+                        # According to profiling, a list comprehension here doubles the execution time of this
+                        # function. I know this function is supposed to be slow, but dayum... I've unrolled it
+                        # to avoid all the extra allocations.
+                        for k in range(4):
+                            accum_color[k] = accum_color[k] + buf[src_idx+k] * weight
+                        weight_total += weight
+                    src_idx += 4
+
+            weight_total = max(weight_total, 0.0001)
+            for i in range(4):
+                accum_color[i] = int(accum_color[i] * (1.0 / weight_total))
+            dst[dst_idx:dst_idx+4] = accum_color
+            dst_idx += 4
+
+    return bytes(dst)
+
+
 class GLTexture:
-    def __init__(self, texkey=None):
+    def __init__(self, texkey=None, bgra=False, fast=False):
         self._texkey = texkey
-        self._ownit = (self._blimg.bindcode[0] == 0)
+        self._image_inverted = fast
+        self._bgra = bgra
 
     @property
     def _blimg(self):
         return self._texkey.image
 
     def __enter__(self):
-        """Sets the Blender Image as the active OpenGL texture"""
-        if self._ownit:
+        """Loads the image data using OpenGL"""
+
+        # Set image active in OpenGL
+        ownit = self._blimg.bindcode[0] == 0
+        if ownit:
             if self._blimg.gl_load() != 0:
                 raise RuntimeError("failed to load image")
-
-        self._previous_texture = self._get_integer(bgl.GL_TEXTURE_BINDING_2D)
-        self._changed_state = (self._previous_texture != self._blimg.bindcode[0])
-        if self._changed_state:
+        previous_texture = self._get_integer(bgl.GL_TEXTURE_BINDING_2D)
+        changed_state = (previous_texture != self._blimg.bindcode[0])
+        if changed_state:
             bgl.glBindTexture(bgl.GL_TEXTURE_2D, self._blimg.bindcode[0])
+
+        # Grab the image data
+        self._width = self._get_tex_param(bgl.GL_TEXTURE_WIDTH, 0)
+        self._height = self._get_tex_param(bgl.GL_TEXTURE_HEIGHT, 0)
+        size = self._width * self._height * 4
+        buf = bgl.Buffer(bgl.GL_BYTE, size)
+        fmt = bgl.GL_BGRA if self._bgra else bgl.GL_RGBA
+        bgl.glGetTexImage(bgl.GL_TEXTURE_2D, 0, fmt, bgl.GL_UNSIGNED_BYTE, buf)
+
+        # OpenGL returns the images upside down, so we're going to rotate it in memory.
+        # ... But only if requested... :)
+        if self._image_inverted:
+            self._image_data = bytes(buf)
+        else:
+            self._image_data = self._invert_image(self._width, self._height, buf)
+
+        # Restore previous OpenGL state
+        if changed_state:
+            bgl.glBindTexture(bgl.GL_TEXTURE_2D, previous_texture)
+        if ownit:
+            self._blimg.gl_free()
         return self
 
     def __exit__(self, type, value, traceback):
-        mipmap_state = getattr(self, "_mipmap_state", None)
-        if mipmap_state is not None:
-            bgl.glTexParameteri(bgl.GL_TEXTURE_2D, bgl.GL_GENERATE_MIPMAP, mipmap_state)
-        if self._changed_state:
-            bgl.glBindTexture(bgl.GL_TEXTURE_2D, self._previous_texture)
-        if self._ownit:
-            self._blimg.gl_free()
+        del self._image_data
 
     @property
     def _detail_falloff(self):
@@ -64,57 +151,55 @@ class GLTexture:
                  self._texkey.detail_opacity_start / 100.0,
                  self._texkey.detail_opacity_stop / 100.0)
 
-    def generate_mipmap(self):
-        """Generates all mip levels for this texture"""
-        self._mipmap_state = self._get_tex_param(bgl.GL_GENERATE_MIPMAP)
-
-        # Note that this is a very old feature from OpenGL 1.x -- it's new enough that Windows (and
-        # Blender apparently) don't support it natively and yet old enough that it was thrown away
-        # in OpenGL 3.0. The new way is glGenerateMipmap, but Blender likes oldgl, so we don't have that
-        # function available to us in BGL. I don't want to deal with loading the GL dll in ctypes on
-        # many platforms right now (or context headaches). If someone wants to fix this, be my guest!
-        # It will simplify our state tracking a bit.
-        bgl.glTexParameteri(bgl.GL_TEXTURE_2D, bgl.GL_GENERATE_MIPMAP, 1)
-
-    def get_level_data(self, level=0, calc_alpha=False, bgra=False, report=None, fast=False):
+    def get_level_data(self, level=0, calc_alpha=False, report=None, indent=2, fast=False):
         """Gets the uncompressed pixel data for a requested mip level, optionally calculating the alpha
            channel from the image color data
         """
-        width = self._get_tex_param(bgl.GL_TEXTURE_WIDTH, level)
-        height = self._get_tex_param(bgl.GL_TEXTURE_HEIGHT, level)
+
+        # Previously, we would leave the texture bound in OpenGL and use it to do the mipmapping, using
+        # old, deprecated OpenGL features. With the introduction of plCubicEnvironmap support to Korman,
+        # we wind up needing to get an NPOT image from OpenGL. Unfortunately, Blender will sometimes scale
+        # images to be POT _before_ loading them into OpenGL. Thereofre, we now use OpenGL to grab the first
+        # level, then scale down to the new level from there.
+        oWidth, oHeight = self.size_npot
+        eWidth = ensure_power_of_two(oWidth) >> level
+        eHeight = ensure_power_of_two(oHeight) >> level
+
         if report is not None:
-            report.msg("Level #{}: {}x{}", level, width, height, indent=2)
+            report.msg("Level #{}: {}x{}", level, eWidth, eHeight, indent=indent)
 
-        # Grab the image data
-        size = width * height * 4
-        buf = bgl.Buffer(bgl.GL_BYTE, size)
-        fmt = bgl.GL_BGRA if bgra else bgl.GL_RGBA
-        bgl.glGetTexImage(bgl.GL_TEXTURE_2D, level, fmt, bgl.GL_UNSIGNED_BYTE, buf);
+        # Scale, if needed...
+        if oWidth != eWidth or oHeight != eHeight:
+            buf = _scale_image(self._image_data, oWidth, oHeight, eWidth, eHeight)
+        else:
+            buf = self._image_data
+
+        # Some operations, like alpha testing, don't care about the fact that OpenGL flips
+        # the images in memory. Give an opportunity to bail here...
         if fast:
-            return bytes(buf)
+            return self._image_data
+        else:
+            buf = bytearray(self._image_data)
 
-        # OpenGL returns the images upside down, so we're going to rotate it in memory.
-        finalBuf = bytearray(size)
-        row_stride = width * 4
-        for i in range(height):
-            src, dst = i * row_stride, (height - (i+1)) * row_stride
-            finalBuf[dst:dst+row_stride] = buf[src:src+row_stride]
+
+        if self._image_inverted:
+            buf = self._invert_image(eWidth, eHeight, buf)
 
         # If this is a detail map, then we need to bake that per-level here.
         if self._texkey.is_detail_map:
             detail_blend = self._texkey.detail_blend
             if detail_blend == TEX_DETAIL_ALPHA:
-                self._make_detail_map_alpha(finalBuf, level)
+                self._make_detail_map_alpha(buf, level)
             elif detail_blend == TEX_DETAIL_ADD:
-                self._make_detail_map_alpha(finalBuf, level)
+                self._make_detail_map_alpha(buf, level)
             elif detail_blend == TEX_DETAIL_MULTIPLY:
-                self._make_detail_map_mult(finalBuf, level)
+                self._make_detail_map_mult(buf, level)
 
         # Do we need to calculate the alpha component?
         if calc_alpha:
             for i in range(0, size, 4):
-                finalBuf[i+3] = int(sum(finalBuf[i:i+3]) / 3)
-        return bytes(finalBuf)
+                buf[i+3] = int(sum(buf[i:i+3]) / 3)
+        return bytes(buf)
 
     def _get_detail_alpha(self, level, dropoff_start, dropoff_stop, detail_max, detail_min):
         alpha = (level - dropoff_start) * (detail_min - detail_max) / (dropoff_stop - dropoff_start) + detail_max
@@ -138,12 +223,21 @@ class GLTexture:
 
     @property
     def has_alpha(self):
-        data = self.get_level_data(report=None, fast=True)
+        data = self._image_data
         for i in range(3, len(data), 4):
             if data[i] != 255:
                 return True
         return False
 
+    def _invert_image(self, width, height, buf):
+        size = width * height * 4
+        finalBuf = bytearray(size)
+        row_stride = width * 4
+        for i in range(height):
+            src, dst = i * row_stride, (height - (i+1)) * row_stride
+            finalBuf[dst:dst+row_stride] = buf[src:src+row_stride]
+        return bytes(finalBuf)
+
     def _make_detail_map_add(self, data, level):
         dropoff_start, dropoff_stop, detail_max, detail_min = self._detail_falloff
         alpha = self._get_detail_alpha(level, dropoff_start, dropoff_stop, detail_max, detail_min)
@@ -167,7 +261,7 @@ class GLTexture:
 
     @property
     def num_levels(self):
-        numLevels = math.floor(math.log(max(self._blimg.size), 2)) + 1
+        numLevels = math.floor(math.log(max(self.size_npot), 2)) + 1
 
         # Major Workaround Ahoy
         # There is a bug in Cyan's level size algorithm that causes it to not allocate enough memory
@@ -181,3 +275,11 @@ class GLTexture:
         #                  texture in a single pixel?"
         # :)
         return max(numLevels - 2, 2)
+
+    @property
+    def size_npot(self):
+        return self._width, self._height
+
+    @property
+    def size_pot(self):
+        return ensure_power_of_two(self._width), ensure_power_of_two(self._height)