acid-drop

- Hacking the planet from a LilyGo T-Deck using custom firmware
git clone git://git.acid.vegas/acid-drop.git
Log | Files | Refs | Archive | README | LICENSE

tjpgd.c (43134B)

      1 /*----------------------------------------------------------------------------/
      2 / TJpgDec - Tiny JPEG Decompressor R0.03                      (C)ChaN, 2021
      3 /-----------------------------------------------------------------------------/
      4 / The TJpgDec is a generic JPEG decompressor module for tiny embedded systems.
      5 / This is a free software that opened for education, research and commercial
      6 /  developments under license policy of following terms.
      7 /
      8 /  Copyright (C) 2021, ChaN, all right reserved.
      9 /
     10 / * The TJpgDec module is a free software and there is NO WARRANTY.
     11 / * No restriction on use. You can use, modify and redistribute it for
     12 /   personal, non-profit or commercial products UNDER YOUR RESPONSIBILITY.
     13 / * Redistributions of source code must retain the above copyright notice.
     14 /
     15 /-----------------------------------------------------------------------------/
     16 / Oct 04, 2011 R0.01  First release.
     17 / Feb 19, 2012 R0.01a Fixed decompression fails when scan starts with an escape seq.
     18 / Sep 03, 2012 R0.01b Added JD_TBLCLIP option.
     19 / Mar 16, 2019 R0.01c Supprted stdint.h.
     20 / Jul 01, 2020 R0.01d Fixed wrong integer type usage.
     21 / May 08, 2021 R0.02  Supprted grayscale image. Separated configuration options.
     22 / Jun 11, 2021 R0.02a Some performance improvement.
     23 / Jul 01, 2021 R0.03  Added JD_FASTDECODE option.
     24 /                     Some performance improvement.
     25 /----------------------------------------------------------------------------*/
     26 
     27 #include "tjpgd.h"
     28 #if LV_USE_SJPG
     29 
     30 #if JD_FASTDECODE == 2
     31 #define HUFF_BIT	10	/* Bit length to apply fast huffman decode */
     32 #define HUFF_LEN	(1 << HUFF_BIT)
     33 #define HUFF_MASK	(HUFF_LEN - 1)
     34 #endif
     35 
     36 
     37 /*-----------------------------------------------*/
     38 /* Zigzag-order to raster-order conversion table */
     39 /*-----------------------------------------------*/
     40 
     41 static const uint8_t Zig[64] = {	/* Zigzag-order to raster-order conversion table */
     42 	 0,  1,  8, 16,  9,  2,  3, 10, 17, 24, 32, 25, 18, 11,  4,  5,
     43 	12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13,  6,  7, 14, 21, 28,
     44 	35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
     45 	58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
     46 };
     47 
     48 
     49 
     50 /*-------------------------------------------------*/
     51 /* Input scale factor of Arai algorithm            */
     52 /* (scaled up 16 bits for fixed point operations)  */
     53 /*-------------------------------------------------*/
     54 
     55 static const uint16_t Ipsf[64] = {	/* See also aa_idct.png */
     56 	(uint16_t)(1.00000*8192), (uint16_t)(1.38704*8192), (uint16_t)(1.30656*8192), (uint16_t)(1.17588*8192), (uint16_t)(1.00000*8192), (uint16_t)(0.78570*8192), (uint16_t)(0.54120*8192), (uint16_t)(0.27590*8192),
     57 	(uint16_t)(1.38704*8192), (uint16_t)(1.92388*8192), (uint16_t)(1.81226*8192), (uint16_t)(1.63099*8192), (uint16_t)(1.38704*8192), (uint16_t)(1.08979*8192), (uint16_t)(0.75066*8192), (uint16_t)(0.38268*8192),
     58 	(uint16_t)(1.30656*8192), (uint16_t)(1.81226*8192), (uint16_t)(1.70711*8192), (uint16_t)(1.53636*8192), (uint16_t)(1.30656*8192), (uint16_t)(1.02656*8192), (uint16_t)(0.70711*8192), (uint16_t)(0.36048*8192),
     59 	(uint16_t)(1.17588*8192), (uint16_t)(1.63099*8192), (uint16_t)(1.53636*8192), (uint16_t)(1.38268*8192), (uint16_t)(1.17588*8192), (uint16_t)(0.92388*8192), (uint16_t)(0.63638*8192), (uint16_t)(0.32442*8192),
     60 	(uint16_t)(1.00000*8192), (uint16_t)(1.38704*8192), (uint16_t)(1.30656*8192), (uint16_t)(1.17588*8192), (uint16_t)(1.00000*8192), (uint16_t)(0.78570*8192), (uint16_t)(0.54120*8192), (uint16_t)(0.27590*8192),
     61 	(uint16_t)(0.78570*8192), (uint16_t)(1.08979*8192), (uint16_t)(1.02656*8192), (uint16_t)(0.92388*8192), (uint16_t)(0.78570*8192), (uint16_t)(0.61732*8192), (uint16_t)(0.42522*8192), (uint16_t)(0.21677*8192),
     62 	(uint16_t)(0.54120*8192), (uint16_t)(0.75066*8192), (uint16_t)(0.70711*8192), (uint16_t)(0.63638*8192), (uint16_t)(0.54120*8192), (uint16_t)(0.42522*8192), (uint16_t)(0.29290*8192), (uint16_t)(0.14932*8192),
     63 	(uint16_t)(0.27590*8192), (uint16_t)(0.38268*8192), (uint16_t)(0.36048*8192), (uint16_t)(0.32442*8192), (uint16_t)(0.27590*8192), (uint16_t)(0.21678*8192), (uint16_t)(0.14932*8192), (uint16_t)(0.07612*8192)
     64 };
     65 
     66 
     67 
     68 /*---------------------------------------------*/
     69 /* Conversion table for fast clipping process  */
     70 /*---------------------------------------------*/
     71 
     72 #if JD_TBLCLIP
     73 
     74 #define BYTECLIP(v) Clip8[(unsigned int)(v) & 0x3FF]
     75 
     76 static const uint8_t Clip8[1024] = {
     77 	/* 0..255 */
     78 	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
     79 	32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
     80 	64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
     81 	96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
     82 	128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
     83 	160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
     84 	192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
     85 	224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
     86 	/* 256..511 */
     87 	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     88 	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     89 	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     90 	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     91 	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     92 	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     93 	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     94 	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     95 	/* -512..-257 */
     96 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     97 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     98 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     99 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    100 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    101 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    102 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    103 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    104 	/* -256..-1 */
    105 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    106 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    107 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    108 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    109 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    110 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    111 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    112 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    113 };
    114 
    115 #else	/* JD_TBLCLIP */
    116 
    117 static uint8_t BYTECLIP (int val)
    118 {
    119 	if (val < 0) return 0;
    120 	if (val > 255) return 255;
    121 	return (uint8_t)val;
    122 }
    123 
    124 #endif
    125 
    126 
    127 
    128 /*-----------------------------------------------------------------------*/
    129 /* Allocate a memory block from memory pool                              */
    130 /*-----------------------------------------------------------------------*/
    131 
    132 static void* alloc_pool (	/* Pointer to allocated memory block (NULL:no memory available) */
    133 	JDEC* jd,				/* Pointer to the decompressor object */
    134 	size_t ndata			/* Number of bytes to allocate */
    135 )
    136 {
    137 	char *rp = 0;
    138 
    139 
    140 	ndata = (ndata + 3) & ~3;			/* Align block size to the word boundary */
    141 
    142 	if (jd->sz_pool >= ndata) {
    143 		jd->sz_pool -= ndata;
    144 		rp = (char*)jd->pool;			/* Get start of available memory pool */
    145 		jd->pool = (void*)(rp + ndata);	/* Allocate requierd bytes */
    146 	}
    147 
    148 	return (void*)rp;	/* Return allocated memory block (NULL:no memory to allocate) */
    149 }
    150 
    151 
    152 
    153 
    154 /*-----------------------------------------------------------------------*/
    155 /* Create de-quantization and prescaling tables with a DQT segment       */
    156 /*-----------------------------------------------------------------------*/
    157 
    158 static JRESULT create_qt_tbl (	/* 0:OK, !0:Failed */
    159 	JDEC* jd,				/* Pointer to the decompressor object */
    160 	const uint8_t* data,	/* Pointer to the quantizer tables */
    161 	size_t ndata			/* Size of input data */
    162 )
    163 {
    164 	unsigned int i, zi;
    165 	uint8_t d;
    166 	int32_t *pb;
    167 
    168 
    169 	while (ndata) {	/* Process all tables in the segment */
    170 		if (ndata < 65) return JDR_FMT1;	/* Err: table size is unaligned */
    171 		ndata -= 65;
    172 		d = *data++;							/* Get table property */
    173 		if (d & 0xF0) return JDR_FMT1;			/* Err: not 8-bit resolution */
    174 		i = d & 3;								/* Get table ID */
    175 		pb = alloc_pool(jd, 64 * sizeof (int32_t));/* Allocate a memory block for the table */
    176 		if (!pb) return JDR_MEM1;				/* Err: not enough memory */
    177 		jd->qttbl[i] = pb;						/* Register the table */
    178 		for (i = 0; i < 64; i++) {				/* Load the table */
    179 			zi = Zig[i];						/* Zigzag-order to raster-order conversion */
    180 			pb[zi] = (int32_t)((uint32_t)*data++ * Ipsf[zi]);	/* Apply scale factor of Arai algorithm to the de-quantizers */
    181 		}
    182 	}
    183 
    184 	return JDR_OK;
    185 }
    186 
    187 
    188 
    189 
    190 /*-----------------------------------------------------------------------*/
    191 /* Create huffman code tables with a DHT segment                         */
    192 /*-----------------------------------------------------------------------*/
    193 
    194 static JRESULT create_huffman_tbl (	/* 0:OK, !0:Failed */
    195 	JDEC* jd,					/* Pointer to the decompressor object */
    196 	const uint8_t* data,		/* Pointer to the packed huffman tables */
    197 	size_t ndata				/* Size of input data */
    198 )
    199 {
    200 	unsigned int i, j, b, cls, num;
    201 	size_t np;
    202 	uint8_t d, *pb, *pd;
    203 	uint16_t hc, *ph;
    204 
    205 
    206 	while (ndata) {	/* Process all tables in the segment */
    207 		if (ndata < 17) return JDR_FMT1;	/* Err: wrong data size */
    208 		ndata -= 17;
    209 		d = *data++;						/* Get table number and class */
    210 		if (d & 0xEE) return JDR_FMT1;		/* Err: invalid class/number */
    211 		cls = d >> 4; num = d & 0x0F;		/* class = dc(0)/ac(1), table number = 0/1 */
    212 		pb = alloc_pool(jd, 16);			/* Allocate a memory block for the bit distribution table */
    213 		if (!pb) return JDR_MEM1;			/* Err: not enough memory */
    214 		jd->huffbits[num][cls] = pb;
    215 		for (np = i = 0; i < 16; i++) {		/* Load number of patterns for 1 to 16-bit code */
    216 			np += (pb[i] = *data++);		/* Get sum of code words for each code */
    217 		}
    218 		ph = alloc_pool(jd, np * sizeof (uint16_t));/* Allocate a memory block for the code word table */
    219 		if (!ph) return JDR_MEM1;			/* Err: not enough memory */
    220 		jd->huffcode[num][cls] = ph;
    221 		hc = 0;
    222 		for (j = i = 0; i < 16; i++) {		/* Re-build huffman code word table */
    223 			b = pb[i];
    224 			while (b--) ph[j++] = hc++;
    225 			hc <<= 1;
    226 		}
    227 
    228 		if (ndata < np) return JDR_FMT1;	/* Err: wrong data size */
    229 		ndata -= np;
    230 		pd = alloc_pool(jd, np);			/* Allocate a memory block for the decoded data */
    231 		if (!pd) return JDR_MEM1;			/* Err: not enough memory */
    232 		jd->huffdata[num][cls] = pd;
    233 		for (i = 0; i < np; i++) {			/* Load decoded data corresponds to each code word */
    234 			d = *data++;
    235 			if (!cls && d > 11) return JDR_FMT1;
    236 			pd[i] = d;
    237 		}
    238 #if JD_FASTDECODE == 2
    239 		{	/* Create fast huffman decode table */
    240 			unsigned int span, td, ti;
    241 			uint16_t *tbl_ac = 0;
    242 			uint8_t *tbl_dc = 0;
    243 
    244 			if (cls) {
    245 				tbl_ac = alloc_pool(jd, HUFF_LEN * sizeof (uint16_t));	/* LUT for AC elements */
    246 				if (!tbl_ac) return JDR_MEM1;		/* Err: not enough memory */
    247 				jd->hufflut_ac[num] = tbl_ac;
    248 				memset(tbl_ac, 0xFF, HUFF_LEN * sizeof (uint16_t));		/* Default value (0xFFFF: may be long code) */
    249 			} else {
    250 				tbl_dc = alloc_pool(jd, HUFF_LEN * sizeof (uint8_t));	/* LUT for AC elements */
    251 				if (!tbl_dc) return JDR_MEM1;		/* Err: not enough memory */
    252 				jd->hufflut_dc[num] = tbl_dc;
    253 				memset(tbl_dc, 0xFF, HUFF_LEN * sizeof (uint8_t));		/* Default value (0xFF: may be long code) */
    254 			}
    255 			for (i = b = 0; b < HUFF_BIT; b++) {	/* Create LUT */
    256 				for (j = pb[b]; j; j--) {
    257 					ti = ph[i] << (HUFF_BIT - 1 - b) & HUFF_MASK;	/* Index of input pattern for the code */
    258 					if (cls) {
    259 						td = pd[i++] | ((b + 1) << 8);	/* b15..b8: code length, b7..b0: zero run and data length */
    260 						for (span = 1 << (HUFF_BIT - 1 - b); span; span--, tbl_ac[ti++] = (uint16_t)td) ;
    261 					} else {
    262 						td = pd[i++] | ((b + 1) << 4);	/* b7..b4: code length, b3..b0: data length */
    263 						for (span = 1 << (HUFF_BIT - 1 - b); span; span--, tbl_dc[ti++] = (uint8_t)td) ;
    264 					}
    265 				}
    266 			}
    267 			jd->longofs[num][cls] = i;	/* Code table offset for long code */
    268 		}
    269 #endif
    270 	}
    271 
    272 	return JDR_OK;
    273 }
    274 
    275 
    276 
    277 
    278 /*-----------------------------------------------------------------------*/
    279 /* Extract a huffman decoded data from input stream                      */
    280 /*-----------------------------------------------------------------------*/
    281 
    282 static int huffext (	/* >=0: decoded data, <0: error code */
    283 	JDEC* jd,			/* Pointer to the decompressor object */
    284 	unsigned int id,	/* Table ID (0:Y, 1:C) */
    285 	unsigned int cls	/* Table class (0:DC, 1:AC) */
    286 )
    287 {
    288 	size_t dc = jd->dctr;
    289 	uint8_t *dp = jd->dptr;
    290 	unsigned int d, flg = 0;
    291 
    292 #if JD_FASTDECODE == 0
    293 	uint8_t bm, nd, bl;
    294 	const uint8_t *hb = jd->huffbits[id][cls];	/* Bit distribution table */
    295 	const uint16_t *hc = jd->huffcode[id][cls];	/* Code word table */
    296 	const uint8_t *hd = jd->huffdata[id][cls];	/* Data table */
    297 
    298 
    299 	bm = jd->dbit;	/* Bit mask to extract */
    300 	d = 0; bl = 16;	/* Max code length */
    301 	do {
    302 		if (!bm) {		/* Next byte? */
    303 			if (!dc) {	/* No input data is available, re-fill input buffer */
    304 				dp = jd->inbuf;	/* Top of input buffer */
    305 				dc = jd->infunc(jd, dp, JD_SZBUF);
    306 				if (!dc) return 0 - (int)JDR_INP;	/* Err: read error or wrong stream termination */
    307 			} else {
    308 				dp++;	/* Next data ptr */
    309 			}
    310 			dc--;		/* Decrement number of available bytes */
    311 			if (flg) {		/* In flag sequence? */
    312 				flg = 0;	/* Exit flag sequence */
    313 				if (*dp != 0) return 0 - (int)JDR_FMT1;	/* Err: unexpected flag is detected (may be collapted data) */
    314 				*dp = 0xFF;				/* The flag is a data 0xFF */
    315 			} else {
    316 				if (*dp == 0xFF) {		/* Is start of flag sequence? */
    317 					flg = 1; continue;	/* Enter flag sequence, get trailing byte */
    318 				}
    319 			}
    320 			bm = 0x80;		/* Read from MSB */
    321 		}
    322 		d <<= 1;			/* Get a bit */
    323 		if (*dp & bm) d++;
    324 		bm >>= 1;
    325 
    326 		for (nd = *hb++; nd; nd--) {	/* Search the code word in this bit length */
    327 			if (d == *hc++) {	/* Matched? */
    328 				jd->dbit = bm; jd->dctr = dc; jd->dptr = dp;
    329 				return *hd;		/* Return the decoded data */
    330 			}
    331 			hd++;
    332 		}
    333 		bl--;
    334 	} while (bl);
    335 
    336 #else
    337 	const uint8_t *hb, *hd;
    338 	const uint16_t *hc;
    339 	unsigned int nc, bl, wbit = jd->dbit % 32;
    340 	uint32_t w = jd->wreg & ((1UL << wbit) - 1);
    341 
    342 
    343 	while (wbit < 16) {	/* Prepare 16 bits into the working register */
    344 		if (jd->marker) {
    345 			d = 0xFF;	/* Input stream has stalled for a marker. Generate stuff bits */
    346 		} else {
    347 			if (!dc) {	/* Buffer empty, re-fill input buffer */
    348 				dp = jd->inbuf;						/* Top of input buffer */
    349 				dc = jd->infunc(jd, dp, JD_SZBUF);
    350 				if (!dc) return 0 - (int)JDR_INP;	/* Err: read error or wrong stream termination */
    351 			}
    352 			d = *dp++; dc--;
    353 			if (flg) {		/* In flag sequence? */
    354 				flg = 0;	/* Exit flag sequence */
    355 				if (d != 0) jd->marker = d;	/* Not an escape of 0xFF but a marker */
    356 				d = 0xFF;
    357 			} else {
    358 				if (d == 0xFF) {		/* Is start of flag sequence? */
    359 					flg = 1; continue;	/* Enter flag sequence, get trailing byte */
    360 				}
    361 			}
    362 		}
    363 		w = w << 8 | d;	/* Shift 8 bits in the working register */
    364 		wbit += 8;
    365 	}
    366 	jd->dctr = dc; jd->dptr = dp;
    367 	jd->wreg = w;
    368 
    369 #if JD_FASTDECODE == 2
    370 	/* Table serch for the short codes */
    371 	d = (unsigned int)(w >> (wbit - HUFF_BIT));	/* Short code as table index */
    372 	if (cls) {	/* AC element */
    373 		d = jd->hufflut_ac[id][d];	/* Table decode */
    374 		if (d != 0xFFFF) {	/* It is done if hit in short code */
    375 			jd->dbit = wbit - (d >> 8);	/* Snip the code length */
    376 			return d & 0xFF;	/* b7..0: zero run and following data bits */
    377 		}
    378 	} else {	/* DC element */
    379 		d = jd->hufflut_dc[id][d];	/* Table decode */
    380 		if (d != 0xFF) {	/* It is done if hit in short code */
    381 			jd->dbit = wbit - (d >> 4);	/* Snip the code length  */
    382 			return d & 0xF;	/* b3..0: following data bits */
    383 		}
    384 	}
    385 
    386 	/* Incremental serch for the codes longer than HUFF_BIT */
    387 	hb = jd->huffbits[id][cls] + HUFF_BIT;				/* Bit distribution table */
    388 	hc = jd->huffcode[id][cls] + jd->longofs[id][cls];	/* Code word table */
    389 	hd = jd->huffdata[id][cls] + jd->longofs[id][cls];	/* Data table */
    390 	bl = HUFF_BIT + 1;
    391 #else
    392 	/* Incremental serch for all codes */
    393 	hb = jd->huffbits[id][cls];	/* Bit distribution table */
    394 	hc = jd->huffcode[id][cls];	/* Code word table */
    395 	hd = jd->huffdata[id][cls];	/* Data table */
    396 	bl = 1;
    397 #endif
    398 	for ( ; bl <= 16; bl++) {	/* Incremental search */
    399 		nc = *hb++;
    400 		if (nc) {
    401 			d = w >> (wbit - bl);
    402 			do {	/* Search the code word in this bit length */
    403 				if (d == *hc++) {		/* Matched? */
    404 					jd->dbit = wbit - bl;	/* Snip the huffman code */
    405 					return *hd;			/* Return the decoded data */
    406 				}
    407 				hd++;
    408 			} while (--nc);
    409 		}
    410 	}
    411 #endif
    412 
    413 	return 0 - (int)JDR_FMT1;	/* Err: code not found (may be collapted data) */
    414 }
    415 
    416 
    417 
    418 
    419 /*-----------------------------------------------------------------------*/
    420 /* Extract N bits from input stream                                      */
    421 /*-----------------------------------------------------------------------*/
    422 
    423 static int bitext (	/* >=0: extracted data, <0: error code */
    424 	JDEC* jd,			/* Pointer to the decompressor object */
    425 	unsigned int nbit	/* Number of bits to extract (1 to 16) */
    426 )
    427 {
    428 	size_t dc = jd->dctr;
    429 	uint8_t *dp = jd->dptr;
    430 	unsigned int d, flg = 0;
    431 
    432 #if JD_FASTDECODE == 0
    433 	uint8_t mbit = jd->dbit;
    434 
    435 	d = 0;
    436 	do {
    437 		if (!mbit) {			/* Next byte? */
    438 			if (!dc) {			/* No input data is available, re-fill input buffer */
    439 				dp = jd->inbuf;	/* Top of input buffer */
    440 				dc = jd->infunc(jd, dp, JD_SZBUF);
    441 				if (!dc) return 0 - (int)JDR_INP;	/* Err: read error or wrong stream termination */
    442 			} else {
    443 				dp++;			/* Next data ptr */
    444 			}
    445 			dc--;				/* Decrement number of available bytes */
    446 			if (flg) {			/* In flag sequence? */
    447 				flg = 0;		/* Exit flag sequence */
    448 				if (*dp != 0) return 0 - (int)JDR_FMT1;	/* Err: unexpected flag is detected (may be collapted data) */
    449 				*dp = 0xFF;		/* The flag is a data 0xFF */
    450 			} else {
    451 				if (*dp == 0xFF) {		/* Is start of flag sequence? */
    452 					flg = 1; continue;	/* Enter flag sequence */
    453 				}
    454 			}
    455 			mbit = 0x80;		/* Read from MSB */
    456 		}
    457 		d <<= 1;	/* Get a bit */
    458 		if (*dp & mbit) d |= 1;
    459 		mbit >>= 1;
    460 		nbit--;
    461 	} while (nbit);
    462 
    463 	jd->dbit = mbit; jd->dctr = dc; jd->dptr = dp;
    464 	return (int)d;
    465 
    466 #else
    467 	unsigned int wbit = jd->dbit % 32;
    468 	uint32_t w = jd->wreg & ((1UL << wbit) - 1);
    469 
    470 
    471 	while (wbit < nbit) {	/* Prepare nbit bits into the working register */
    472 		if (jd->marker) {
    473 			d = 0xFF;	/* Input stream stalled, generate stuff bits */
    474 		} else {
    475 			if (!dc) {	/* Buffer empty, re-fill input buffer */
    476 				dp = jd->inbuf;	/* Top of input buffer */
    477 				dc = jd->infunc(jd, dp, JD_SZBUF);
    478 				if (!dc) return 0 - (int)JDR_INP;	/* Err: read error or wrong stream termination */
    479 			}
    480 			d = *dp++; dc--;
    481 			if (flg) {		/* In flag sequence? */
    482 				flg = 0;	/* Exit flag sequence */
    483 				if (d != 0) jd->marker = d;	/* Not an escape of 0xFF but a marker */
    484 				d = 0xFF;
    485 			} else {
    486 				if (d == 0xFF) {		/* Is start of flag sequence? */
    487 					flg = 1; continue;	/* Enter flag sequence, get trailing byte */
    488 				}
    489 			}
    490 		}
    491 		w = w << 8 | d;	/* Get 8 bits into the working register */
    492 		wbit += 8;
    493 	}
    494 	jd->wreg = w; jd->dbit = wbit - nbit;
    495 	jd->dctr = dc; jd->dptr = dp;
    496 
    497 	return (int)(w >> ((wbit - nbit) % 32));
    498 #endif
    499 }
    500 
    501 
    502 
    503 
    504 /*-----------------------------------------------------------------------*/
    505 /* Process restart interval                                              */
    506 /*-----------------------------------------------------------------------*/
    507 
    508 static JRESULT restart (
    509 	JDEC* jd,		/* Pointer to the decompressor object */
    510 	uint16_t rstn	/* Expected restert sequense number */
    511 )
    512 {
    513 	unsigned int i;
    514 	uint8_t *dp = jd->dptr;
    515 	size_t dc = jd->dctr;
    516 
    517 #if JD_FASTDECODE == 0
    518 	uint16_t d = 0;
    519 
    520 	/* Get two bytes from the input stream */
    521 	for (i = 0; i < 2; i++) {
    522 		if (!dc) {	/* No input data is available, re-fill input buffer */
    523 			dp = jd->inbuf;
    524 			dc = jd->infunc(jd, dp, JD_SZBUF);
    525 			if (!dc) return JDR_INP;
    526 		} else {
    527 			dp++;
    528 		}
    529 		dc--;
    530 		d = d << 8 | *dp;	/* Get a byte */
    531 	}
    532 	jd->dptr = dp; jd->dctr = dc; jd->dbit = 0;
    533 
    534 	/* Check the marker */
    535 	if ((d & 0xFFD8) != 0xFFD0 || (d & 7) != (rstn & 7)) {
    536 		return JDR_FMT1;	/* Err: expected RSTn marker is not detected (may be collapted data) */
    537 	}
    538 
    539 #else
    540 	uint16_t marker;
    541 
    542 
    543 	if (jd->marker) {	/* Generate a maker if it has been detected */
    544 		marker = 0xFF00 | jd->marker;
    545 		jd->marker = 0;
    546 	} else {
    547 		marker = 0;
    548 		for (i = 0; i < 2; i++) {	/* Get a restart marker */
    549 			if (!dc) {		/* No input data is available, re-fill input buffer */
    550 				dp = jd->inbuf;
    551 				dc = jd->infunc(jd, dp, JD_SZBUF);
    552 				if (!dc) return JDR_INP;
    553 			}
    554 			marker = (marker << 8) | *dp++;	/* Get a byte */
    555 			dc--;
    556 		}
    557 		jd->dptr = dp; jd->dctr = dc;
    558 	}
    559 
    560 	/* Check the marker */
    561 	if ((marker & 0xFFD8) != 0xFFD0 || (marker & 7) != (rstn & 7)) {
    562 		return JDR_FMT1;	/* Err: expected RSTn marker was not detected (may be collapted data) */
    563 	}
    564 
    565 	jd->dbit = 0;			/* Discard stuff bits */
    566 #endif
    567 
    568 	jd->dcv[2] = jd->dcv[1] = jd->dcv[0] = 0;	/* Reset DC offset */
    569 	return JDR_OK;
    570 }
    571 
    572 
    573 
    574 
    575 /*-----------------------------------------------------------------------*/
    576 /* Apply Inverse-DCT in Arai Algorithm (see also aa_idct.png)            */
    577 /*-----------------------------------------------------------------------*/
    578 
    579 static void block_idct (
    580 	int32_t* src,	/* Input block data (de-quantized and pre-scaled for Arai Algorithm) */
    581 	jd_yuv_t* dst	/* Pointer to the destination to store the block as byte array */
    582 )
    583 {
    584 	const int32_t M13 = (int32_t)(1.41421*4096), M2 = (int32_t)(1.08239*4096), M4 = (int32_t)(2.61313*4096), M5 = (int32_t)(1.84776*4096);
    585 	int32_t v0, v1, v2, v3, v4, v5, v6, v7;
    586 	int32_t t10, t11, t12, t13;
    587 	int i;
    588 
    589 	/* Process columns */
    590 	for (i = 0; i < 8; i++) {
    591 		v0 = src[8 * 0];	/* Get even elements */
    592 		v1 = src[8 * 2];
    593 		v2 = src[8 * 4];
    594 		v3 = src[8 * 6];
    595 
    596 		t10 = v0 + v2;		/* Process the even elements */
    597 		t12 = v0 - v2;
    598 		t11 = (v1 - v3) * M13 >> 12;
    599 		v3 += v1;
    600 		t11 -= v3;
    601 		v0 = t10 + v3;
    602 		v3 = t10 - v3;
    603 		v1 = t11 + t12;
    604 		v2 = t12 - t11;
    605 
    606 		v4 = src[8 * 7];	/* Get odd elements */
    607 		v5 = src[8 * 1];
    608 		v6 = src[8 * 5];
    609 		v7 = src[8 * 3];
    610 
    611 		t10 = v5 - v4;		/* Process the odd elements */
    612 		t11 = v5 + v4;
    613 		t12 = v6 - v7;
    614 		v7 += v6;
    615 		v5 = (t11 - v7) * M13 >> 12;
    616 		v7 += t11;
    617 		t13 = (t10 + t12) * M5 >> 12;
    618 		v4 = t13 - (t10 * M2 >> 12);
    619 		v6 = t13 - (t12 * M4 >> 12) - v7;
    620 		v5 -= v6;
    621 		v4 -= v5;
    622 
    623 		src[8 * 0] = v0 + v7;	/* Write-back transformed values */
    624 		src[8 * 7] = v0 - v7;
    625 		src[8 * 1] = v1 + v6;
    626 		src[8 * 6] = v1 - v6;
    627 		src[8 * 2] = v2 + v5;
    628 		src[8 * 5] = v2 - v5;
    629 		src[8 * 3] = v3 + v4;
    630 		src[8 * 4] = v3 - v4;
    631 
    632 		src++;	/* Next column */
    633 	}
    634 
    635 	/* Process rows */
    636 	src -= 8;
    637 	for (i = 0; i < 8; i++) {
    638 		v0 = src[0] + (128L << 8);	/* Get even elements (remove DC offset (-128) here) */
    639 		v1 = src[2];
    640 		v2 = src[4];
    641 		v3 = src[6];
    642 
    643 		t10 = v0 + v2;				/* Process the even elements */
    644 		t12 = v0 - v2;
    645 		t11 = (v1 - v3) * M13 >> 12;
    646 		v3 += v1;
    647 		t11 -= v3;
    648 		v0 = t10 + v3;
    649 		v3 = t10 - v3;
    650 		v1 = t11 + t12;
    651 		v2 = t12 - t11;
    652 
    653 		v4 = src[7];				/* Get odd elements */
    654 		v5 = src[1];
    655 		v6 = src[5];
    656 		v7 = src[3];
    657 
    658 		t10 = v5 - v4;				/* Process the odd elements */
    659 		t11 = v5 + v4;
    660 		t12 = v6 - v7;
    661 		v7 += v6;
    662 		v5 = (t11 - v7) * M13 >> 12;
    663 		v7 += t11;
    664 		t13 = (t10 + t12) * M5 >> 12;
    665 		v4 = t13 - (t10 * M2 >> 12);
    666 		v6 = t13 - (t12 * M4 >> 12) - v7;
    667 		v5 -= v6;
    668 		v4 -= v5;
    669 
    670 		/* Descale the transformed values 8 bits and output a row */
    671 #if JD_FASTDECODE >= 1
    672 		dst[0] = (int16_t)((v0 + v7) >> 8);
    673 		dst[7] = (int16_t)((v0 - v7) >> 8);
    674 		dst[1] = (int16_t)((v1 + v6) >> 8);
    675 		dst[6] = (int16_t)((v1 - v6) >> 8);
    676 		dst[2] = (int16_t)((v2 + v5) >> 8);
    677 		dst[5] = (int16_t)((v2 - v5) >> 8);
    678 		dst[3] = (int16_t)((v3 + v4) >> 8);
    679 		dst[4] = (int16_t)((v3 - v4) >> 8);
    680 #else
    681 		dst[0] = BYTECLIP((v0 + v7) >> 8);
    682 		dst[7] = BYTECLIP((v0 - v7) >> 8);
    683 		dst[1] = BYTECLIP((v1 + v6) >> 8);
    684 		dst[6] = BYTECLIP((v1 - v6) >> 8);
    685 		dst[2] = BYTECLIP((v2 + v5) >> 8);
    686 		dst[5] = BYTECLIP((v2 - v5) >> 8);
    687 		dst[3] = BYTECLIP((v3 + v4) >> 8);
    688 		dst[4] = BYTECLIP((v3 - v4) >> 8);
    689 #endif
    690 
    691 		dst += 8; src += 8;	/* Next row */
    692 	}
    693 }
    694 
    695 
    696 
    697 
    698 /*-----------------------------------------------------------------------*/
    699 /* Load all blocks in an MCU into working buffer                         */
    700 /*-----------------------------------------------------------------------*/
    701 
    702 static JRESULT mcu_load (
    703 	JDEC* jd		/* Pointer to the decompressor object */
    704 )
    705 {
    706 	int32_t *tmp = (int32_t*)jd->workbuf;	/* Block working buffer for de-quantize and IDCT */
    707 	int d, e;
    708 	unsigned int blk, nby, i, bc, z, id, cmp;
    709 	jd_yuv_t *bp;
    710 	const int32_t *dqf;
    711 
    712 
    713 	nby = jd->msx * jd->msy;	/* Number of Y blocks (1, 2 or 4) */
    714 	bp = jd->mcubuf;			/* Pointer to the first block of MCU */
    715 
    716 	for (blk = 0; blk < nby + 2; blk++) {	/* Get nby Y blocks and two C blocks */
    717 		cmp = (blk < nby) ? 0 : blk - nby + 1;	/* Component number 0:Y, 1:Cb, 2:Cr */
    718 
    719 		if (cmp && jd->ncomp != 3) {		/* Clear C blocks if not exist (monochrome image) */
    720 			for (i = 0; i < 64; bp[i++] = 128) ;
    721 
    722 		} else {							/* Load Y/C blocks from input stream */
    723 			id = cmp ? 1 : 0;						/* Huffman table ID of this component */
    724 
    725 			/* Extract a DC element from input stream */
    726 			d = huffext(jd, id, 0);					/* Extract a huffman coded data (bit length) */
    727 			if (d < 0) return (JRESULT)(0 - d);		/* Err: invalid code or input */
    728 			bc = (unsigned int)d;
    729 			d = jd->dcv[cmp];						/* DC value of previous block */
    730 			if (bc) {								/* If there is any difference from previous block */
    731 				e = bitext(jd, bc);					/* Extract data bits */
    732 				if (e < 0) return (JRESULT)(0 - e);	/* Err: input */
    733 				bc = 1 << (bc - 1);					/* MSB position */
    734 				if (!(e & bc)) e -= (bc << 1) - 1;	/* Restore negative value if needed */
    735 				d += e;								/* Get current value */
    736 				jd->dcv[cmp] = (int16_t)d;			/* Save current DC value for next block */
    737 			}
    738 			dqf = jd->qttbl[jd->qtid[cmp]];			/* De-quantizer table ID for this component */
    739 			tmp[0] = d * dqf[0] >> 8;				/* De-quantize, apply scale factor of Arai algorithm and descale 8 bits */
    740 
    741 			/* Extract following 63 AC elements from input stream */
    742 			memset(&tmp[1], 0, 63 * sizeof (int32_t));	/* Initialize all AC elements */
    743 			z = 1;		/* Top of the AC elements (in zigzag-order) */
    744 			do {
    745 				d = huffext(jd, id, 1);				/* Extract a huffman coded value (zero runs and bit length) */
    746 				if (d == 0) break;					/* EOB? */
    747 				if (d < 0) return (JRESULT)(0 - d);	/* Err: invalid code or input error */
    748 				bc = (unsigned int)d;
    749 				z += bc >> 4;						/* Skip leading zero run */
    750 				if (z >= 64) return JDR_FMT1;		/* Too long zero run */
    751 				if (bc &= 0x0F) {					/* Bit length? */
    752 					d = bitext(jd, bc);				/* Extract data bits */
    753 					if (d < 0) return (JRESULT)(0 - d);	/* Err: input device */
    754 					bc = 1 << (bc - 1);				/* MSB position */
    755 					if (!(d & bc)) d -= (bc << 1) - 1;	/* Restore negative value if needed */
    756 					i = Zig[z];						/* Get raster-order index */
    757 					tmp[i] = d * dqf[i] >> 8;		/* De-quantize, apply scale factor of Arai algorithm and descale 8 bits */
    758 				}
    759 			} while (++z < 64);		/* Next AC element */
    760 
    761 			if (JD_FORMAT != 2 || !cmp) {	/* C components may not be processed if in grayscale output */
    762 				if (z == 1 || (JD_USE_SCALE && jd->scale == 3)) {	/* If no AC element or scale ratio is 1/8, IDCT can be ommited and the block is filled with DC value */
    763 					d = (jd_yuv_t)((*tmp / 256) + 128);
    764 					if (JD_FASTDECODE >= 1) {
    765 						for (i = 0; i < 64; bp[i++] = d) ;
    766 					} else {
    767 						memset(bp, d, 64);
    768 					}
    769 				} else {
    770 					block_idct(tmp, bp);	/* Apply IDCT and store the block to the MCU buffer */
    771 				}
    772 			}
    773 		}
    774 
    775 		bp += 64;				/* Next block */
    776 	}
    777 
    778 	return JDR_OK;	/* All blocks have been loaded successfully */
    779 }
    780 
    781 
    782 
    783 
    784 /*-----------------------------------------------------------------------*/
    785 /* Output an MCU: Convert YCrCb to RGB and output it in RGB form         */
    786 /*-----------------------------------------------------------------------*/
    787 
    788 static JRESULT mcu_output (
    789 	JDEC* jd,			/* Pointer to the decompressor object */
    790 	int (*outfunc)(JDEC*, void*, JRECT*),	/* RGB output function */
    791 	unsigned int img_x,		/* MCU location in the image */
    792 	unsigned int img_y		/* MCU location in the image */
    793 )
    794 {
    795 	const int CVACC = (sizeof (int) > 2) ? 1024 : 128;	/* Adaptive accuracy for both 16-/32-bit systems */
    796 	unsigned int ix, iy, mx, my, rx, ry;
    797 	int yy, cb, cr;
    798 	jd_yuv_t *py, *pc;
    799 	uint8_t *pix;
    800 	JRECT rect;
    801 
    802 
    803 	mx = jd->msx * 8; my = jd->msy * 8;					/* MCU size (pixel) */
    804 	rx = (img_x + mx <= jd->width) ? mx : jd->width - img_x;	/* Output rectangular size (it may be clipped at right/bottom end of image) */
    805 	ry = (img_y + my <= jd->height) ? my : jd->height - img_y;
    806 	if (JD_USE_SCALE) {
    807 		rx >>= jd->scale; ry >>= jd->scale;
    808 		if (!rx || !ry) return JDR_OK;					/* Skip this MCU if all pixel is to be rounded off */
    809 		img_x >>= jd->scale; img_y >>= jd->scale;
    810 	}
    811 	rect.left = img_x; rect.right = img_x + rx - 1;				/* Rectangular area in the frame buffer */
    812 	rect.top = img_y; rect.bottom = img_y + ry - 1;
    813 
    814 
    815 	if (!JD_USE_SCALE || jd->scale != 3) {	/* Not for 1/8 scaling */
    816 		pix = (uint8_t*)jd->workbuf;
    817 
    818 		if (JD_FORMAT != 2) {	/* RGB output (build an RGB MCU from Y/C component) */
    819 			for (iy = 0; iy < my; iy++) {
    820 				pc = py = jd->mcubuf;
    821 				if (my == 16) {		/* Double block height? */
    822 					pc += 64 * 4 + (iy >> 1) * 8;
    823 					if (iy >= 8) py += 64;
    824 				} else {			/* Single block height */
    825 					pc += mx * 8 + iy * 8;
    826 				}
    827 				py += iy * 8;
    828 				for (ix = 0; ix < mx; ix++) {
    829 					cb = pc[0] - 128; 	/* Get Cb/Cr component and remove offset */
    830 					cr = pc[64] - 128;
    831 					if (mx == 16) {					/* Double block width? */
    832 						if (ix == 8) py += 64 - 8;	/* Jump to next block if double block heigt */
    833 						pc += ix & 1;				/* Step forward chroma pointer every two pixels */
    834 					} else {						/* Single block width */
    835 						pc++;						/* Step forward chroma pointer every pixel */
    836 					}
    837 					yy = *py++;			/* Get Y component */
    838 					*pix++ = /*R*/ BYTECLIP(yy + ((int)(1.402 * CVACC) * cr) / CVACC);
    839 					*pix++ = /*G*/ BYTECLIP(yy - ((int)(0.344 * CVACC) * cb + (int)(0.714 * CVACC) * cr) / CVACC);
    840 					*pix++ = /*B*/ BYTECLIP(yy + ((int)(1.772 * CVACC) * cb) / CVACC);
    841 				}
    842 			}
    843 		} else {	/* Monochrome output (build a grayscale MCU from Y comopnent) */
    844 			for (iy = 0; iy < my; iy++) {
    845 				py = jd->mcubuf + iy * 8;
    846 				if (my == 16) {		/* Double block height? */
    847 					if (iy >= 8) py += 64;
    848 				}
    849 				for (ix = 0; ix < mx; ix++) {
    850 					if (mx == 16) {					/* Double block width? */
    851 						if (ix == 8) py += 64 - 8;	/* Jump to next block if double block height */
    852 					}
    853 					*pix++ = (uint8_t)*py++;			/* Get and store a Y value as grayscale */
    854 				}
    855 			}
    856 		}
    857 
    858 		/* Descale the MCU rectangular if needed */
    859 		if (JD_USE_SCALE && jd->scale) {
    860 			unsigned int x, y, r, g, b, s, w, a;
    861 			uint8_t *op;
    862 
    863 			/* Get averaged RGB value of each square correcponds to a pixel */
    864 			s = jd->scale * 2;	/* Number of shifts for averaging */
    865 			w = 1 << jd->scale;	/* Width of square */
    866 			a = (mx - w) * (JD_FORMAT != 2 ? 3 : 1);	/* Bytes to skip for next line in the square */
    867 			op = (uint8_t*)jd->workbuf;
    868 			for (iy = 0; iy < my; iy += w) {
    869 				for (ix = 0; ix < mx; ix += w) {
    870 					pix = (uint8_t*)jd->workbuf + (iy * mx + ix) * (JD_FORMAT != 2 ? 3 : 1);
    871 					r = g = b = 0;
    872 					for (y = 0; y < w; y++) {	/* Accumulate RGB value in the square */
    873 						for (x = 0; x < w; x++) {
    874 							r += *pix++;	/* Accumulate R or Y (monochrome output) */
    875 							if (JD_FORMAT != 2) {	/* RGB output? */
    876 								g += *pix++;	/* Accumulate G */
    877 								b += *pix++;	/* Accumulate B */
    878 							}
    879 						}
    880 						pix += a;
    881 					}							/* Put the averaged pixel value */
    882 					*op++ = (uint8_t)(r >> s);	/* Put R or Y (monochrome output) */
    883 					if (JD_FORMAT != 2) {	/* RGB output? */
    884 						*op++ = (uint8_t)(g >> s);	/* Put G */
    885 						*op++ = (uint8_t)(b >> s);	/* Put B */
    886 					}
    887 				}
    888 			}
    889 		}
    890 
    891 	} else {	/* For only 1/8 scaling (left-top pixel in each block are the DC value of the block) */
    892 
    893 		/* Build a 1/8 descaled RGB MCU from discrete comopnents */
    894 		pix = (uint8_t*)jd->workbuf;
    895 		pc = jd->mcubuf + mx * my;
    896 		cb = pc[0] - 128;		/* Get Cb/Cr component and restore right level */
    897 		cr = pc[64] - 128;
    898 		for (iy = 0; iy < my; iy += 8) {
    899 			py = jd->mcubuf;
    900 			if (iy == 8) py += 64 * 2;
    901 			for (ix = 0; ix < mx; ix += 8) {
    902 				yy = *py;	/* Get Y component */
    903 				py += 64;
    904 				if (JD_FORMAT != 2) {
    905 					*pix++ = /*R*/ BYTECLIP(yy + ((int)(1.402 * CVACC) * cr / CVACC));
    906 					*pix++ = /*G*/ BYTECLIP(yy - ((int)(0.344 * CVACC) * cb + (int)(0.714 * CVACC) * cr) / CVACC);
    907 					*pix++ = /*B*/ BYTECLIP(yy + ((int)(1.772 * CVACC) * cb / CVACC));
    908 				} else {
    909 					*pix++ = yy;
    910 				}
    911 			}
    912 		}
    913 	}
    914 
    915 	/* Squeeze up pixel table if a part of MCU is to be truncated */
    916 	mx >>= jd->scale;
    917 	if (rx < mx) {	/* Is the MCU spans rigit edge? */
    918 		uint8_t *s, *d;
    919 		unsigned int x, y;
    920 
    921 		s = d = (uint8_t*)jd->workbuf;
    922 		for (y = 0; y < ry; y++) {
    923 			for (x = 0; x < rx; x++) {	/* Copy effective pixels */
    924 				*d++ = *s++;
    925 				if (JD_FORMAT != 2) {
    926 					*d++ = *s++;
    927 					*d++ = *s++;
    928 				}
    929 			}
    930 			s += (mx - rx) * (JD_FORMAT != 2 ? 3 : 1);	/* Skip truncated pixels */
    931 		}
    932 	}
    933 
    934 	/* Convert RGB888 to RGB565 if needed */
    935 	if (JD_FORMAT == 1) {
    936 		uint8_t *s = (uint8_t*)jd->workbuf;
    937 		uint16_t w, *d = (uint16_t*)s;
    938 		unsigned int n = rx * ry;
    939 
    940 		do {
    941 			w = (*s++ & 0xF8) << 8;		/* RRRRR----------- */
    942 			w |= (*s++ & 0xFC) << 3;	/* -----GGGGGG----- */
    943 			w |= *s++ >> 3;				/* -----------BBBBB */
    944 			*d++ = w;
    945 		} while (--n);
    946 	}
    947 
    948 	/* Output the rectangular */
    949 	return outfunc(jd, jd->workbuf, &rect) ? JDR_OK : JDR_INTR;
    950 }
    951 
    952 
    953 
    954 
    955 /*-----------------------------------------------------------------------*/
    956 /* Analyze the JPEG image and Initialize decompressor object             */
    957 /*-----------------------------------------------------------------------*/
    958 
    959 #define	LDB_WORD(ptr)		(uint16_t)(((uint16_t)*((uint8_t*)(ptr))<<8)|(uint16_t)*(uint8_t*)((ptr)+1))
    960 
    961 
    962 JRESULT jd_prepare (
    963 	JDEC* jd,				/* Blank decompressor object */
    964 	size_t (*infunc)(JDEC*, uint8_t*, size_t),	/* JPEG strem input function */
    965 	void* pool,				/* Working buffer for the decompression session */
    966 	size_t sz_pool,			/* Size of working buffer */
    967 	void* dev				/* I/O device identifier for the session */
    968 )
    969 {
    970 	uint8_t *seg, b;
    971 	uint16_t marker;
    972 	unsigned int n, i, ofs;
    973 	size_t len;
    974 	JRESULT rc;
    975 
    976 
    977 	memset(jd, 0, sizeof (JDEC));	/* Clear decompression object (this might be a problem if machine's null pointer is not all bits zero) */
    978 	jd->pool = pool;		/* Work memroy */
    979 	jd->sz_pool = sz_pool;	/* Size of given work memory */
    980 	jd->infunc = infunc;	/* Stream input function */
    981 	jd->device = dev;		/* I/O device identifier */
    982 
    983 	jd->inbuf = seg = alloc_pool(jd, JD_SZBUF);		/* Allocate stream input buffer */
    984 	if (!seg) return JDR_MEM1;
    985 
    986 	ofs = marker = 0;		/* Find SOI marker */
    987 	do {
    988 		if (jd->infunc(jd, seg, 1) != 1) return JDR_INP;	/* Err: SOI was not detected */
    989 		ofs++;
    990 		marker = marker << 8 | seg[0];
    991 	} while (marker != 0xFFD8);
    992 
    993 	for (;;) {				/* Parse JPEG segments */
    994 		/* Get a JPEG marker */
    995 		if (jd->infunc(jd, seg, 4) != 4) return JDR_INP;
    996 		marker = LDB_WORD(seg);		/* Marker */
    997 		len = LDB_WORD(seg + 2);	/* Length field */
    998 		if (len <= 2 || (marker >> 8) != 0xFF) return JDR_FMT1;
    999 		len -= 2;			/* Segent content size */
   1000 		ofs += 4 + len;		/* Number of bytes loaded */
   1001 
   1002 		switch (marker & 0xFF) {
   1003 		case 0xC0:	/* SOF0 (baseline JPEG) */
   1004 			if (len > JD_SZBUF) return JDR_MEM2;
   1005 			if (jd->infunc(jd, seg, len) != len) return JDR_INP;	/* Load segment data */
   1006 
   1007 			jd->width = LDB_WORD(&seg[3]);		/* Image width in unit of pixel */
   1008 			jd->height = LDB_WORD(&seg[1]);		/* Image height in unit of pixel */
   1009 			jd->ncomp = seg[5];					/* Number of color components */
   1010 			if (jd->ncomp != 3 && jd->ncomp != 1) return JDR_FMT3;	/* Err: Supports only Grayscale and Y/Cb/Cr */
   1011 
   1012 			/* Check each image component */
   1013 			for (i = 0; i < jd->ncomp; i++) {
   1014 				b = seg[7 + 3 * i];							/* Get sampling factor */
   1015 				if (i == 0) {	/* Y component */
   1016 					if (b != 0x11 && b != 0x22 && b != 0x21) {	/* Check sampling factor */
   1017 						return JDR_FMT3;					/* Err: Supports only 4:4:4, 4:2:0 or 4:2:2 */
   1018 					}
   1019 					jd->msx = b >> 4; jd->msy = b & 15;		/* Size of MCU [blocks] */
   1020 				} else {		/* Cb/Cr component */
   1021 					if (b != 0x11) return JDR_FMT3;			/* Err: Sampling factor of Cb/Cr must be 1 */
   1022 				}
   1023 				jd->qtid[i] = seg[8 + 3 * i];				/* Get dequantizer table ID for this component */
   1024 				if (jd->qtid[i] > 3) return JDR_FMT3;		/* Err: Invalid ID */
   1025 			}
   1026 			break;
   1027 
   1028 		case 0xDD:	/* DRI - Define Restart Interval */
   1029 			if (len > JD_SZBUF) return JDR_MEM2;
   1030 			if (jd->infunc(jd, seg, len) != len) return JDR_INP;	/* Load segment data */
   1031 
   1032 			jd->nrst = LDB_WORD(seg);	/* Get restart interval (MCUs) */
   1033 			break;
   1034 
   1035 		case 0xC4:	/* DHT - Define Huffman Tables */
   1036 			if (len > JD_SZBUF) return JDR_MEM2;
   1037 			if (jd->infunc(jd, seg, len) != len) return JDR_INP;	/* Load segment data */
   1038 
   1039 			rc = create_huffman_tbl(jd, seg, len);	/* Create huffman tables */
   1040 			if (rc) return rc;
   1041 			break;
   1042 
   1043 		case 0xDB:	/* DQT - Define Quaitizer Tables */
   1044 			if (len > JD_SZBUF) return JDR_MEM2;
   1045 			if (jd->infunc(jd, seg, len) != len) return JDR_INP;	/* Load segment data */
   1046 
   1047 			rc = create_qt_tbl(jd, seg, len);	/* Create de-quantizer tables */
   1048 			if (rc) return rc;
   1049 			break;
   1050 
   1051 		case 0xDA:	/* SOS - Start of Scan */
   1052 			if (len > JD_SZBUF) return JDR_MEM2;
   1053 			if (jd->infunc(jd, seg, len) != len) return JDR_INP;	/* Load segment data */
   1054 
   1055 			if (!jd->width || !jd->height) return JDR_FMT1;	/* Err: Invalid image size */
   1056 			if (seg[0] != jd->ncomp) return JDR_FMT3;		/* Err: Wrong color components */
   1057 
   1058 			/* Check if all tables corresponding to each components have been loaded */
   1059 			for (i = 0; i < jd->ncomp; i++) {
   1060 				b = seg[2 + 2 * i];	/* Get huffman table ID */
   1061 				if (b != 0x00 && b != 0x11)	return JDR_FMT3;	/* Err: Different table number for DC/AC element */
   1062 				n = i ? 1 : 0;							/* Component class */
   1063 				if (!jd->huffbits[n][0] || !jd->huffbits[n][1]) {	/* Check huffman table for this component */
   1064 					return JDR_FMT1;					/* Err: Nnot loaded */
   1065 				}
   1066 				if (!jd->qttbl[jd->qtid[i]]) {			/* Check dequantizer table for this component */
   1067 					return JDR_FMT1;					/* Err: Not loaded */
   1068 				}
   1069 			}
   1070 
   1071 			/* Allocate working buffer for MCU and pixel output */
   1072 			n = jd->msy * jd->msx;						/* Number of Y blocks in the MCU */
   1073 			if (!n) return JDR_FMT1;					/* Err: SOF0 has not been loaded */
   1074 			len = n * 64 * 2 + 64;						/* Allocate buffer for IDCT and RGB output */
   1075 			if (len < 256) len = 256;					/* but at least 256 byte is required for IDCT */
   1076 			jd->workbuf = alloc_pool(jd, len);			/* and it may occupy a part of following MCU working buffer for RGB output */
   1077 			if (!jd->workbuf) return JDR_MEM1;			/* Err: not enough memory */
   1078 			jd->mcubuf = alloc_pool(jd, (n + 2) * 64 * sizeof (jd_yuv_t));	/* Allocate MCU working buffer */
   1079 			if (!jd->mcubuf) return JDR_MEM1;			/* Err: not enough memory */
   1080 
   1081 			/* Align stream read offset to JD_SZBUF */
   1082 			if (ofs %= JD_SZBUF) {
   1083 				jd->dctr = jd->infunc(jd, seg + ofs, (size_t)(JD_SZBUF - ofs));
   1084 			}
   1085 			jd->dptr = seg + ofs - (JD_FASTDECODE ? 0 : 1);
   1086 
   1087 			return JDR_OK;		/* Initialization succeeded. Ready to decompress the JPEG image. */
   1088 
   1089 		case 0xC1:	/* SOF1 */
   1090 		case 0xC2:	/* SOF2 */
   1091 		case 0xC3:	/* SOF3 */
   1092 		case 0xC5:	/* SOF5 */
   1093 		case 0xC6:	/* SOF6 */
   1094 		case 0xC7:	/* SOF7 */
   1095 		case 0xC9:	/* SOF9 */
   1096 		case 0xCA:	/* SOF10 */
   1097 		case 0xCB:	/* SOF11 */
   1098 		case 0xCD:	/* SOF13 */
   1099 		case 0xCE:	/* SOF14 */
   1100 		case 0xCF:	/* SOF15 */
   1101 		case 0xD9:	/* EOI */
   1102 			return JDR_FMT3;	/* Unsuppoted JPEG standard (may be progressive JPEG) */
   1103 
   1104 		default:	/* Unknown segment (comment, exif or etc..) */
   1105 			/* Skip segment data (null pointer specifies to remove data from the stream) */
   1106 			if (jd->infunc(jd, 0, len) != len) return JDR_INP;
   1107 		}
   1108 	}
   1109 }
   1110 
   1111 
   1112 
   1113 
   1114 /*-----------------------------------------------------------------------*/
   1115 /* Start to decompress the JPEG picture                                  */
   1116 /*-----------------------------------------------------------------------*/
   1117 
   1118 JRESULT jd_decomp (
   1119 	JDEC* jd,								/* Initialized decompression object */
   1120 	int (*outfunc)(JDEC*, void*, JRECT*),	/* RGB output function */
   1121 	uint8_t scale							/* Output de-scaling factor (0 to 3) */
   1122 )
   1123 {
   1124 	unsigned int x, y, mx, my;
   1125 	uint16_t rst, rsc;
   1126 	JRESULT rc;
   1127 
   1128 
   1129 	if (scale > (JD_USE_SCALE ? 3 : 0)) return JDR_PAR;
   1130 	jd->scale = scale;
   1131 
   1132 	mx = jd->msx * 8; my = jd->msy * 8;			/* Size of the MCU (pixel) */
   1133 
   1134 	jd->dcv[2] = jd->dcv[1] = jd->dcv[0] = 0;	/* Initialize DC values */
   1135 	rst = rsc = 0;
   1136 
   1137 	rc = JDR_OK;
   1138 	for (y = 0; y < jd->height; y += my) {		/* Vertical loop of MCUs */
   1139 		for (x = 0; x < jd->width; x += mx) {	/* Horizontal loop of MCUs */
   1140 			if (jd->nrst && rst++ == jd->nrst) {	/* Process restart interval if enabled */
   1141 				rc = restart(jd, rsc++);
   1142 				if (rc != JDR_OK) return rc;
   1143 				rst = 1;
   1144 			}
   1145 			rc = mcu_load(jd);					/* Load an MCU (decompress huffman coded stream, dequantize and apply IDCT) */
   1146 			if (rc != JDR_OK) return rc;
   1147 			rc = mcu_output(jd, outfunc, x, y);	/* Output the MCU (YCbCr to RGB, scaling and output) */
   1148 			if (rc != JDR_OK) return rc;
   1149 		}
   1150 	}
   1151 
   1152 	return rc;
   1153 }
   1154 
   1155 #endif /*LV_USE_SJPG*/