acid-drop

- Hacking the planet from a LilyGo T-Deck using custom firmware
git clone git://git.acid.vegas/acid-drop.git
mp3_decoder.cpp (178676B)
      1 /*
      2  * mp3_decoder.cpp
      3  * libhelix_HMP3DECODER
      4  *
      5  *  Created on: 26.10.2018
      6  *  Updated on: 27.05.2022
      7  */
      8 #include "mp3_decoder.h"
      9 /* clip to range [-2^n, 2^n - 1] */
     10 #if 0 //Fast on ARM:
     11 #define CLIP_2N(y, n) { \
     12 	int sign = (y) >> 31;  \
     13 	if (sign != (y) >> (n))  { \
     14 		(y) = sign ^ ((1 << (n)) - 1); \
     15 	} \
     16 }
     17 #else //on xtensa this is faster, due to asm min/max instructions:
     18 #define CLIP_2N(y, n) { \
     19     int x = 1 << n; \
     20     if (y < -x) y = -x; \
     21     x--; \
     22     if (y > x) y = x; \
     23 }
     24 #endif
     25 
     26 const uint8_t  m_SYNCWORDH              =0xff;
     27 const uint8_t  m_SYNCWORDL              =0xf0;
     28 const uint8_t  m_DQ_FRACBITS_OUT        =25;  // number of fraction bits in output of dequant
     29 const uint8_t  m_CSHIFT                 =12;  // coefficients have 12 leading sign bits for early-terminating mulitplies
     30 const uint8_t  m_SIBYTES_MPEG1_MONO     =17;
     31 const uint8_t  m_SIBYTES_MPEG1_STEREO   =32;
     32 const uint8_t  m_SIBYTES_MPEG2_MONO     =9;
     33 const uint8_t  m_SIBYTES_MPEG2_STEREO   =17;
     34 const uint8_t  m_IMDCT_SCALE            =2;   // additional scaling (by sqrt(2)) for fast IMDCT36
     35 const uint8_t  m_NGRANS_MPEG1           =2;
     36 const uint8_t  m_NGRANS_MPEG2           =1;
     37 const uint32_t m_SQRTHALF               =0x5a82799a;  // sqrt(0.5) in Q31 format
     38 
     39 
     40 MP3FrameInfo_t *m_MP3FrameInfo;
     41 SFBandTable_t m_SFBandTable;
     42 StereoMode_t m_sMode;  /* mono/stereo mode */
     43 MPEGVersion_t m_MPEGVersion;  /* version ID */
     44 FrameHeader_t *m_FrameHeader;
     45 SideInfoSub_t m_SideInfoSub[m_MAX_NGRAN][m_MAX_NCHAN];
     46 SideInfo_t *m_SideInfo;
     47 CriticalBandInfo_t m_CriticalBandInfo[m_MAX_NCHAN];  /* filled in dequantizer, used in joint stereo reconstruction */
     48 DequantInfo_t *m_DequantInfo;
     49 HuffmanInfo_t *m_HuffmanInfo;
     50 IMDCTInfo_t *m_IMDCTInfo;
     51 ScaleFactorInfoSub_t m_ScaleFactorInfoSub[m_MAX_NGRAN][m_MAX_NCHAN];
     52 ScaleFactorJS_t *m_ScaleFactorJS;
     53 SubbandInfo_t *m_SubbandInfo;
     54 MP3DecInfo_t *m_MP3DecInfo;
     55 
     56 const unsigned short huffTable[4242] PROGMEM = {
     57     /* huffTable01[9] */
     58     0xf003, 0x3112, 0x3101, 0x2011, 0x2011, 0x1000, 0x1000, 0x1000, 0x1000,
     59     /* huffTable02[65] */
     60     0xf006, 0x6222, 0x6201, 0x5212, 0x5212, 0x5122, 0x5122, 0x5021, 0x5021, 0x3112, 0x3112, 0x3112,
     61     0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101,
     62     0x3101, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x1000, 0x1000, 0x1000,
     63     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
     64     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
     65     0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
     66     /* huffTable03[65] */
     67     0xf006, 0x6222, 0x6201, 0x5212, 0x5212, 0x5122, 0x5122, 0x5021, 0x5021, 0x3011, 0x3011, 0x3011,
     68     0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112,
     69     0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2101, 0x2101, 0x2101,
     70     0x2101, 0x2101, 0x2101, 0x2101, 0x2101, 0x2101, 0x2101, 0x2101, 0x2101, 0x2101, 0x2101, 0x2101,
     71     0x2101, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
     72     0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
     73     /* huffTable05[257] */
     74     0xf008, 0x8332, 0x8322, 0x7232, 0x7232, 0x6132, 0x6132, 0x6132, 0x6132, 0x7312, 0x7312, 0x7301,
     75     0x7301, 0x7031, 0x7031, 0x7222, 0x7222, 0x6212, 0x6212, 0x6212, 0x6212, 0x6122, 0x6122, 0x6122,
     76     0x6122, 0x6201, 0x6201, 0x6201, 0x6201, 0x6021, 0x6021, 0x6021, 0x6021, 0x3112, 0x3112, 0x3112,
     77     0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112,
     78     0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112,
     79     0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101,
     80     0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101,
     81     0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101,
     82     0x3101, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011,
     83     0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011,
     84     0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x1000, 0x1000, 0x1000,
     85     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
     86     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
     87     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
     88     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
     89     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
     90     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
     91     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
     92     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
     93     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
     94     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
     95     0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
     96     /* huffTable06[129] */
     97     0xf007, 0x7332, 0x7301, 0x6322, 0x6322, 0x6232, 0x6232, 0x6031, 0x6031, 0x5312, 0x5312, 0x5312,
     98     0x5312, 0x5132, 0x5132, 0x5132, 0x5132, 0x5222, 0x5222, 0x5222, 0x5222, 0x5201, 0x5201, 0x5201,
     99     0x5201, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4122, 0x4122, 0x4122,
    100     0x4122, 0x4122, 0x4122, 0x4122, 0x4122, 0x4021, 0x4021, 0x4021, 0x4021, 0x4021, 0x4021, 0x4021,
    101     0x4021, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101,
    102     0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112,
    103     0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112,
    104     0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112,
    105     0x2112, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011,
    106     0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000,
    107     0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000,
    108     /* huffTable07[110] */
    109     0xf006, 0x0041, 0x0052, 0x005b, 0x0060, 0x0063, 0x0068, 0x006b, 0x6212, 0x5122, 0x5122, 0x6201,
    110     0x6021, 0x4112, 0x4112, 0x4112, 0x4112, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101,
    111     0x3101, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x1000, 0x1000, 0x1000,
    112     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    113     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    114     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0xf004, 0x4552, 0x4542, 0x4452, 0x4352, 0x3532, 0x3532,
    115     0x3442, 0x3442, 0x3522, 0x3522, 0x3252, 0x3252, 0x2512, 0x2512, 0x2512, 0x2512, 0xf003, 0x2152,
    116     0x2152, 0x3501, 0x3432, 0x2051, 0x2051, 0x3342, 0x3332, 0xf002, 0x2422, 0x2242, 0x1412, 0x1412,
    117     0xf001, 0x1142, 0x1041, 0xf002, 0x2401, 0x2322, 0x2232, 0x2301, 0xf001, 0x1312, 0x1132, 0xf001,
    118     0x1031, 0x1222,
    119     /* huffTable08[280] */
    120     0xf008, 0x0101, 0x010a, 0x010f, 0x8512, 0x8152, 0x0112, 0x0115, 0x8422, 0x8242, 0x8412, 0x7142,
    121     0x7142, 0x8401, 0x8041, 0x8322, 0x8232, 0x8312, 0x8132, 0x8301, 0x8031, 0x6222, 0x6222, 0x6222,
    122     0x6222, 0x6201, 0x6201, 0x6201, 0x6201, 0x6021, 0x6021, 0x6021, 0x6021, 0x4212, 0x4212, 0x4212,
    123     0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212,
    124     0x4212, 0x4122, 0x4122, 0x4122, 0x4122, 0x4122, 0x4122, 0x4122, 0x4122, 0x4122, 0x4122, 0x4122,
    125     0x4122, 0x4122, 0x4122, 0x4122, 0x4122, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112,
    126     0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112,
    127     0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112,
    128     0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112,
    129     0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112,
    130     0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x2112, 0x3101, 0x3101, 0x3101,
    131     0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101,
    132     0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101,
    133     0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011,
    134     0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011,
    135     0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011,
    136     0x3011, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
    137     0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
    138     0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
    139     0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
    140     0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
    141     0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0xf003, 0x3552, 0x3452, 0x2542, 0x2542, 0x1352, 0x1352,
    142     0x1352, 0x1352, 0xf002, 0x2532, 0x2442, 0x1522, 0x1522, 0xf001, 0x1252, 0x1501, 0xf001, 0x1432,
    143     0x1342, 0xf001, 0x1051, 0x1332,
    144     /* huffTable09[93] */
    145     0xf006, 0x0041, 0x004a, 0x004f, 0x0052, 0x0057, 0x005a, 0x6412, 0x6142, 0x6322, 0x6232, 0x5312,
    146     0x5312, 0x5132, 0x5132, 0x6301, 0x6031, 0x5222, 0x5222, 0x5201, 0x5201, 0x4212, 0x4212, 0x4212,
    147     0x4212, 0x4122, 0x4122, 0x4122, 0x4122, 0x4021, 0x4021, 0x4021, 0x4021, 0x3112, 0x3112, 0x3112,
    148     0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101,
    149     0x3101, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3000, 0x3000, 0x3000,
    150     0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0xf003, 0x3552, 0x3542, 0x2532, 0x2532, 0x2352, 0x2352,
    151     0x3452, 0x3501, 0xf002, 0x2442, 0x2522, 0x2252, 0x2512, 0xf001, 0x1152, 0x1432, 0xf002, 0x1342,
    152     0x1342, 0x2051, 0x2401, 0xf001, 0x1422, 0x1242, 0xf001, 0x1332, 0x1041,
    153     /* huffTable10[320] */
    154     0xf008, 0x0101, 0x010a, 0x010f, 0x0118, 0x011b, 0x0120, 0x0125, 0x8712, 0x8172, 0x012a, 0x012d,
    155     0x0132, 0x8612, 0x8162, 0x8061, 0x0137, 0x013a, 0x013d, 0x8412, 0x8142, 0x8041, 0x8322, 0x8232,
    156     0x8301, 0x7312, 0x7312, 0x7132, 0x7132, 0x7031, 0x7031, 0x7222, 0x7222, 0x6212, 0x6212, 0x6212,
    157     0x6212, 0x6122, 0x6122, 0x6122, 0x6122, 0x6201, 0x6201, 0x6201, 0x6201, 0x6021, 0x6021, 0x6021,
    158     0x6021, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112,
    159     0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101,
    160     0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101,
    161     0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101,
    162     0x3101, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011,
    163     0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011,
    164     0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x1000, 0x1000, 0x1000,
    165     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    166     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    167     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    168     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    169     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    170     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    171     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    172     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    173     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    174     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    175     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0xf003, 0x3772, 0x3762, 0x3672, 0x3752, 0x3572, 0x3662,
    176     0x2742, 0x2742, 0xf002, 0x2472, 0x2652, 0x2562, 0x2732, 0xf003, 0x2372, 0x2372, 0x2642, 0x2642,
    177     0x3552, 0x3452, 0x2362, 0x2362, 0xf001, 0x1722, 0x1272, 0xf002, 0x2462, 0x2701, 0x1071, 0x1071,
    178     0xf002, 0x1262, 0x1262, 0x2542, 0x2532, 0xf002, 0x1601, 0x1601, 0x2352, 0x2442, 0xf001, 0x1632,
    179     0x1622, 0xf002, 0x2522, 0x2252, 0x1512, 0x1512, 0xf002, 0x1152, 0x1152, 0x2432, 0x2342, 0xf001,
    180     0x1501, 0x1051, 0xf001, 0x1422, 0x1242, 0xf001, 0x1332, 0x1401,
    181     /* huffTable11[296] */
    182     0xf008, 0x0101, 0x0106, 0x010f, 0x0114, 0x0117, 0x8722, 0x8272, 0x011c, 0x7172, 0x7172, 0x8712,
    183     0x8071, 0x8632, 0x8362, 0x8061, 0x011f, 0x0122, 0x8512, 0x7262, 0x7262, 0x8622, 0x8601, 0x7612,
    184     0x7612, 0x7162, 0x7162, 0x8152, 0x8432, 0x8051, 0x0125, 0x8422, 0x8242, 0x8412, 0x8142, 0x8401,
    185     0x8041, 0x7322, 0x7322, 0x7232, 0x7232, 0x6312, 0x6312, 0x6312, 0x6312, 0x6132, 0x6132, 0x6132,
    186     0x6132, 0x7301, 0x7301, 0x7031, 0x7031, 0x6222, 0x6222, 0x6222, 0x6222, 0x5122, 0x5122, 0x5122,
    187     0x5122, 0x5122, 0x5122, 0x5122, 0x5122, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212,
    188     0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x5201, 0x5201, 0x5201,
    189     0x5201, 0x5201, 0x5201, 0x5201, 0x5201, 0x5021, 0x5021, 0x5021, 0x5021, 0x5021, 0x5021, 0x5021,
    190     0x5021, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112,
    191     0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112,
    192     0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3101, 0x3101, 0x3101,
    193     0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101,
    194     0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101,
    195     0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011,
    196     0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011,
    197     0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011,
    198     0x3011, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
    199     0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
    200     0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
    201     0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
    202     0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
    203     0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0xf002, 0x2772, 0x2762, 0x2672, 0x2572, 0xf003, 0x2662,
    204     0x2662, 0x2742, 0x2742, 0x2472, 0x2472, 0x3752, 0x3552, 0xf002, 0x2652, 0x2562, 0x1732, 0x1732,
    205     0xf001, 0x1372, 0x1642, 0xf002, 0x2542, 0x2452, 0x2532, 0x2352, 0xf001, 0x1462, 0x1701, 0xf001,
    206     0x1442, 0x1522, 0xf001, 0x1252, 0x1501, 0xf001, 0x1342, 0x1332,
    207     /* huffTable12[185] */
    208     0xf007, 0x0081, 0x008a, 0x008f, 0x0092, 0x0097, 0x009a, 0x009d, 0x00a2, 0x00a5, 0x00a8, 0x7622,
    209     0x7262, 0x7162, 0x00ad, 0x00b0, 0x00b3, 0x7512, 0x7152, 0x7432, 0x7342, 0x00b6, 0x7422, 0x7242,
    210     0x7412, 0x6332, 0x6332, 0x6142, 0x6142, 0x6322, 0x6322, 0x6232, 0x6232, 0x7041, 0x7301, 0x6031,
    211     0x6031, 0x5312, 0x5312, 0x5312, 0x5312, 0x5132, 0x5132, 0x5132, 0x5132, 0x5222, 0x5222, 0x5222,
    212     0x5222, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4212, 0x4122, 0x4122, 0x4122,
    213     0x4122, 0x4122, 0x4122, 0x4122, 0x4122, 0x5201, 0x5201, 0x5201, 0x5201, 0x5021, 0x5021, 0x5021,
    214     0x5021, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x3112, 0x3112, 0x3112,
    215     0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112,
    216     0x3112, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3101,
    217     0x3101, 0x3101, 0x3101, 0x3101, 0x3101, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011,
    218     0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0xf003, 0x3772, 0x3762,
    219     0x2672, 0x2672, 0x2752, 0x2752, 0x2572, 0x2572, 0xf002, 0x2662, 0x2742, 0x2472, 0x2562, 0xf001,
    220     0x1652, 0x1732, 0xf002, 0x2372, 0x2552, 0x1722, 0x1722, 0xf001, 0x1272, 0x1642, 0xf001, 0x1462,
    221     0x1712, 0xf002, 0x1172, 0x1172, 0x2701, 0x2071, 0xf001, 0x1632, 0x1362, 0xf001, 0x1542, 0x1452,
    222     0xf002, 0x1442, 0x1442, 0x2601, 0x2501, 0xf001, 0x1612, 0x1061, 0xf001, 0x1532, 0x1352, 0xf001,
    223     0x1522, 0x1252, 0xf001, 0x1051, 0x1401,
    224     /* huffTable13[497] */
    225     0xf006, 0x0041, 0x0082, 0x00c3, 0x00e4, 0x0105, 0x0116, 0x011f, 0x0130, 0x0139, 0x013e, 0x0143,
    226     0x0146, 0x6212, 0x6122, 0x6201, 0x6021, 0x4112, 0x4112, 0x4112, 0x4112, 0x4101, 0x4101, 0x4101,
    227     0x4101, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x1000, 0x1000, 0x1000,
    228     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    229     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    230     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0xf006, 0x0108, 0x0111, 0x011a, 0x0123, 0x012c, 0x0131,
    231     0x0136, 0x013f, 0x0144, 0x0147, 0x014c, 0x0151, 0x0156, 0x015b, 0x6f12, 0x61f2, 0x60f1, 0x0160,
    232     0x0163, 0x0166, 0x62e2, 0x0169, 0x6e12, 0x61e2, 0x016c, 0x016f, 0x0172, 0x0175, 0x0178, 0x017b,
    233     0x66c2, 0x6d32, 0x017e, 0x6d22, 0x62d2, 0x6d12, 0x67b2, 0x0181, 0x0184, 0x63c2, 0x0187, 0x6b42,
    234     0x51d2, 0x51d2, 0x6d01, 0x60d1, 0x6a82, 0x68a2, 0x6c42, 0x64c2, 0x6b62, 0x66b2, 0x5c32, 0x5c32,
    235     0x5c22, 0x5c22, 0x52c2, 0x52c2, 0x5b52, 0x5b52, 0x65b2, 0x6982, 0x5c12, 0x5c12, 0xf006, 0x51c2,
    236     0x51c2, 0x6892, 0x6c01, 0x50c1, 0x50c1, 0x64b2, 0x6a62, 0x66a2, 0x6972, 0x5b32, 0x5b32, 0x53b2,
    237     0x53b2, 0x6882, 0x6a52, 0x5b22, 0x5b22, 0x65a2, 0x6962, 0x54a2, 0x54a2, 0x6872, 0x6782, 0x5492,
    238     0x5492, 0x6772, 0x6672, 0x42b2, 0x42b2, 0x42b2, 0x42b2, 0x4b12, 0x4b12, 0x4b12, 0x4b12, 0x41b2,
    239     0x41b2, 0x41b2, 0x41b2, 0x5b01, 0x5b01, 0x50b1, 0x50b1, 0x5692, 0x5692, 0x5a42, 0x5a42, 0x5a32,
    240     0x5a32, 0x53a2, 0x53a2, 0x5952, 0x5952, 0x5592, 0x5592, 0x4a22, 0x4a22, 0x4a22, 0x4a22, 0x42a2,
    241     0x42a2, 0x42a2, 0x42a2, 0xf005, 0x4a12, 0x4a12, 0x41a2, 0x41a2, 0x5a01, 0x5862, 0x40a1, 0x40a1,
    242     0x5682, 0x5942, 0x4392, 0x4392, 0x5932, 0x5852, 0x5582, 0x5762, 0x4922, 0x4922, 0x4292, 0x4292,
    243     0x5752, 0x5572, 0x4832, 0x4832, 0x4382, 0x4382, 0x5662, 0x5742, 0x5472, 0x5652, 0x5562, 0x5372,
    244     0xf005, 0x3912, 0x3912, 0x3912, 0x3912, 0x3192, 0x3192, 0x3192, 0x3192, 0x4901, 0x4901, 0x4091,
    245     0x4091, 0x4842, 0x4842, 0x4482, 0x4482, 0x4272, 0x4272, 0x5642, 0x5462, 0x3822, 0x3822, 0x3822,
    246     0x3822, 0x3282, 0x3282, 0x3282, 0x3282, 0x3812, 0x3812, 0x3812, 0x3812, 0xf004, 0x4732, 0x4722,
    247     0x3712, 0x3712, 0x3172, 0x3172, 0x4552, 0x4701, 0x4071, 0x4632, 0x4362, 0x4542, 0x4452, 0x4622,
    248     0x4262, 0x4532, 0xf003, 0x2182, 0x2182, 0x3801, 0x3081, 0x3612, 0x3162, 0x3601, 0x3061, 0xf004,
    249     0x4352, 0x4442, 0x3522, 0x3522, 0x3252, 0x3252, 0x3501, 0x3501, 0x2512, 0x2512, 0x2512, 0x2512,
    250     0x2152, 0x2152, 0x2152, 0x2152, 0xf003, 0x3432, 0x3342, 0x3051, 0x3422, 0x3242, 0x3332, 0x2412,
    251     0x2412, 0xf002, 0x1142, 0x1142, 0x2401, 0x2041, 0xf002, 0x2322, 0x2232, 0x1312, 0x1312, 0xf001,
    252     0x1132, 0x1301, 0xf001, 0x1031, 0x1222, 0xf003, 0x0082, 0x008b, 0x008e, 0x0091, 0x0094, 0x0097,
    253     0x3ce2, 0x3dd2, 0xf003, 0x0093, 0x3eb2, 0x3be2, 0x3f92, 0x39f2, 0x3ae2, 0x3db2, 0x3bd2, 0xf003,
    254     0x3f82, 0x38f2, 0x3cc2, 0x008d, 0x3e82, 0x0090, 0x27f2, 0x27f2, 0xf003, 0x2ad2, 0x2ad2, 0x3da2,
    255     0x3cb2, 0x3bc2, 0x36f2, 0x2f62, 0x2f62, 0xf002, 0x28e2, 0x2f52, 0x2d92, 0x29d2, 0xf002, 0x25f2,
    256     0x27e2, 0x2ca2, 0x2bb2, 0xf003, 0x2f42, 0x2f42, 0x24f2, 0x24f2, 0x3ac2, 0x36e2, 0x23f2, 0x23f2,
    257     0xf002, 0x1f32, 0x1f32, 0x2d82, 0x28d2, 0xf001, 0x1f22, 0x12f2, 0xf002, 0x2e62, 0x2c92, 0x1f01,
    258     0x1f01, 0xf002, 0x29c2, 0x2e52, 0x1ba2, 0x1ba2, 0xf002, 0x2d72, 0x27d2, 0x1e42, 0x1e42, 0xf002,
    259     0x28c2, 0x26d2, 0x1e32, 0x1e32, 0xf002, 0x19b2, 0x19b2, 0x2b92, 0x2aa2, 0xf001, 0x1ab2, 0x15e2,
    260     0xf001, 0x14e2, 0x1c82, 0xf001, 0x1d62, 0x13e2, 0xf001, 0x1e22, 0x1e01, 0xf001, 0x10e1, 0x1d52,
    261     0xf001, 0x15d2, 0x1c72, 0xf001, 0x17c2, 0x1d42, 0xf001, 0x1b82, 0x18b2, 0xf001, 0x14d2, 0x1a92,
    262     0xf001, 0x19a2, 0x1c62, 0xf001, 0x13d2, 0x1b72, 0xf001, 0x1c52, 0x15c2, 0xf001, 0x1992, 0x1a72,
    263     0xf001, 0x17a2, 0x1792, 0xf003, 0x0023, 0x3df2, 0x2de2, 0x2de2, 0x1ff2, 0x1ff2, 0x1ff2, 0x1ff2,
    264     0xf001, 0x1fe2, 0x1fd2, 0xf001, 0x1ee2, 0x1fc2, 0xf001, 0x1ed2, 0x1fb2, 0xf001, 0x1bf2, 0x1ec2,
    265     0xf002, 0x1cd2, 0x1cd2, 0x2fa2, 0x29e2, 0xf001, 0x1af2, 0x1dc2, 0xf001, 0x1ea2, 0x1e92, 0xf001,
    266     0x1f72, 0x1e72, 0xf001, 0x1ef2, 0x1cf2,
    267     /* huffTable15[580] */
    268     0xf008, 0x0101, 0x0122, 0x0143, 0x0154, 0x0165, 0x0176, 0x017f, 0x0188, 0x0199, 0x01a2, 0x01ab,
    269     0x01b4, 0x01bd, 0x01c2, 0x01cb, 0x01d4, 0x01d9, 0x01de, 0x01e3, 0x01e8, 0x01ed, 0x01f2, 0x01f7,
    270     0x01fc, 0x0201, 0x0204, 0x0207, 0x020a, 0x020f, 0x0212, 0x0215, 0x021a, 0x021d, 0x0220, 0x8192,
    271     0x0223, 0x0226, 0x0229, 0x022c, 0x022f, 0x8822, 0x8282, 0x8812, 0x8182, 0x0232, 0x0235, 0x0238,
    272     0x023b, 0x8722, 0x8272, 0x8462, 0x8712, 0x8552, 0x8172, 0x023e, 0x8632, 0x8362, 0x8542, 0x8452,
    273     0x8622, 0x8262, 0x8612, 0x0241, 0x8532, 0x7162, 0x7162, 0x8352, 0x8442, 0x7522, 0x7522, 0x7252,
    274     0x7252, 0x7512, 0x7512, 0x7152, 0x7152, 0x8501, 0x8051, 0x7432, 0x7432, 0x7342, 0x7342, 0x7422,
    275     0x7422, 0x7242, 0x7242, 0x7332, 0x7332, 0x6142, 0x6142, 0x6142, 0x6142, 0x7412, 0x7412, 0x7401,
    276     0x7401, 0x6322, 0x6322, 0x6322, 0x6322, 0x6232, 0x6232, 0x6232, 0x6232, 0x7041, 0x7041, 0x7301,
    277     0x7301, 0x6312, 0x6312, 0x6312, 0x6312, 0x6132, 0x6132, 0x6132, 0x6132, 0x6031, 0x6031, 0x6031,
    278     0x6031, 0x5222, 0x5222, 0x5222, 0x5222, 0x5222, 0x5222, 0x5222, 0x5222, 0x5212, 0x5212, 0x5212,
    279     0x5212, 0x5212, 0x5212, 0x5212, 0x5212, 0x5122, 0x5122, 0x5122, 0x5122, 0x5122, 0x5122, 0x5122,
    280     0x5122, 0x5201, 0x5201, 0x5201, 0x5201, 0x5201, 0x5201, 0x5201, 0x5201, 0x5021, 0x5021, 0x5021,
    281     0x5021, 0x5021, 0x5021, 0x5021, 0x5021, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112,
    282     0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112,
    283     0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112, 0x3112,
    284     0x3112, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101,
    285     0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011,
    286     0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x3000, 0x3000, 0x3000,
    287     0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000,
    288     0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0x3000,
    289     0x3000, 0x3000, 0x3000, 0x3000, 0x3000, 0xf005, 0x5ff2, 0x5fe2, 0x5ef2, 0x5fd2, 0x4ee2, 0x4ee2,
    290     0x5df2, 0x5fc2, 0x5cf2, 0x5ed2, 0x5de2, 0x5fb2, 0x4bf2, 0x4bf2, 0x5ec2, 0x5ce2, 0x4dd2, 0x4dd2,
    291     0x4fa2, 0x4fa2, 0x4af2, 0x4af2, 0x4eb2, 0x4eb2, 0x4be2, 0x4be2, 0x4dc2, 0x4dc2, 0x4cd2, 0x4cd2,
    292     0x4f92, 0x4f92, 0xf005, 0x49f2, 0x49f2, 0x4ae2, 0x4ae2, 0x4db2, 0x4db2, 0x4bd2, 0x4bd2, 0x4f82,
    293     0x4f82, 0x48f2, 0x48f2, 0x4cc2, 0x4cc2, 0x4e92, 0x4e92, 0x49e2, 0x49e2, 0x4f72, 0x4f72, 0x47f2,
    294     0x47f2, 0x4da2, 0x4da2, 0x4ad2, 0x4ad2, 0x4cb2, 0x4cb2, 0x4f62, 0x4f62, 0x5ea2, 0x5f01, 0xf004,
    295     0x3bc2, 0x3bc2, 0x36f2, 0x36f2, 0x4e82, 0x48e2, 0x4f52, 0x4d92, 0x35f2, 0x35f2, 0x3e72, 0x3e72,
    296     0x37e2, 0x37e2, 0x3ca2, 0x3ca2, 0xf004, 0x3ac2, 0x3ac2, 0x3bb2, 0x3bb2, 0x49d2, 0x4d82, 0x3f42,
    297     0x3f42, 0x34f2, 0x34f2, 0x3f32, 0x3f32, 0x33f2, 0x33f2, 0x38d2, 0x38d2, 0xf004, 0x36e2, 0x36e2,
    298     0x3f22, 0x3f22, 0x32f2, 0x32f2, 0x4e62, 0x40f1, 0x3f12, 0x3f12, 0x31f2, 0x31f2, 0x3c92, 0x3c92,
    299     0x39c2, 0x39c2, 0xf003, 0x3e52, 0x3ba2, 0x3ab2, 0x35e2, 0x3d72, 0x37d2, 0x3e42, 0x34e2, 0xf003,
    300     0x3c82, 0x38c2, 0x3e32, 0x3d62, 0x36d2, 0x33e2, 0x3b92, 0x39b2, 0xf004, 0x3e22, 0x3e22, 0x3aa2,
    301     0x3aa2, 0x32e2, 0x32e2, 0x3e12, 0x3e12, 0x31e2, 0x31e2, 0x4e01, 0x40e1, 0x3d52, 0x3d52, 0x35d2,
    302     0x35d2, 0xf003, 0x3c72, 0x37c2, 0x3d42, 0x3b82, 0x24d2, 0x24d2, 0x38b2, 0x3a92, 0xf003, 0x39a2,
    303     0x3c62, 0x36c2, 0x3d32, 0x23d2, 0x23d2, 0x22d2, 0x22d2, 0xf003, 0x3d22, 0x3d01, 0x2d12, 0x2d12,
    304     0x2b72, 0x2b72, 0x27b2, 0x27b2, 0xf003, 0x21d2, 0x21d2, 0x3c52, 0x30d1, 0x25c2, 0x25c2, 0x2a82,
    305     0x2a82, 0xf002, 0x28a2, 0x2c42, 0x24c2, 0x2b62, 0xf003, 0x26b2, 0x26b2, 0x3992, 0x3c01, 0x2c32,
    306     0x2c32, 0x23c2, 0x23c2, 0xf003, 0x2a72, 0x2a72, 0x27a2, 0x27a2, 0x26a2, 0x26a2, 0x30c1, 0x3b01,
    307     0xf002, 0x12c2, 0x12c2, 0x2c22, 0x2b52, 0xf002, 0x25b2, 0x2c12, 0x2982, 0x2892, 0xf002, 0x21c2,
    308     0x2b42, 0x24b2, 0x2a62, 0xf002, 0x2b32, 0x2972, 0x13b2, 0x13b2, 0xf002, 0x2792, 0x2882, 0x2b22,
    309     0x2a52, 0xf002, 0x12b2, 0x12b2, 0x25a2, 0x2b12, 0xf002, 0x11b2, 0x11b2, 0x20b1, 0x2962, 0xf002,
    310     0x2692, 0x2a42, 0x24a2, 0x2872, 0xf002, 0x2782, 0x2a32, 0x13a2, 0x13a2, 0xf001, 0x1952, 0x1592,
    311     0xf001, 0x1a22, 0x12a2, 0xf001, 0x1a12, 0x11a2, 0xf002, 0x2a01, 0x20a1, 0x1862, 0x1862, 0xf001,
    312     0x1682, 0x1942, 0xf001, 0x1492, 0x1932, 0xf002, 0x1392, 0x1392, 0x2772, 0x2901, 0xf001, 0x1852,
    313     0x1582, 0xf001, 0x1922, 0x1762, 0xf001, 0x1672, 0x1292, 0xf001, 0x1912, 0x1091, 0xf001, 0x1842,
    314     0x1482, 0xf001, 0x1752, 0x1572, 0xf001, 0x1832, 0x1382, 0xf001, 0x1662, 0x1742, 0xf001, 0x1472,
    315     0x1801, 0xf001, 0x1081, 0x1652, 0xf001, 0x1562, 0x1732, 0xf001, 0x1372, 0x1642, 0xf001, 0x1701,
    316     0x1071, 0xf001, 0x1601, 0x1061,
    317     /* huffTable16[651] */
    318     0xf008, 0x0101, 0x010a, 0x0113, 0x8ff2, 0x0118, 0x011d, 0x0120, 0x82f2, 0x0131, 0x8f12, 0x81f2,
    319     0x0134, 0x0145, 0x0156, 0x0167, 0x0178, 0x0189, 0x019a, 0x01a3, 0x01ac, 0x01b5, 0x01be, 0x01c7,
    320     0x01d0, 0x01d9, 0x01de, 0x01e3, 0x01e6, 0x01eb, 0x01f0, 0x8152, 0x01f3, 0x01f6, 0x01f9, 0x01fc,
    321     0x8412, 0x8142, 0x01ff, 0x8322, 0x8232, 0x7312, 0x7312, 0x7132, 0x7132, 0x8301, 0x8031, 0x7222,
    322     0x7222, 0x6212, 0x6212, 0x6212, 0x6212, 0x6122, 0x6122, 0x6122, 0x6122, 0x6201, 0x6201, 0x6201,
    323     0x6201, 0x6021, 0x6021, 0x6021, 0x6021, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112,
    324     0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4101, 0x4101, 0x4101,
    325     0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101,
    326     0x4101, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011,
    327     0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011,
    328     0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x3011, 0x1000, 0x1000, 0x1000,
    329     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    330     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    331     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    332     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    333     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    334     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    335     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    336     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    337     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    338     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
    339     0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0xf003, 0x3fe2, 0x3ef2, 0x3fd2, 0x3df2, 0x3fc2, 0x3cf2,
    340     0x3fb2, 0x3bf2, 0xf003, 0x2fa2, 0x2fa2, 0x3af2, 0x3f92, 0x39f2, 0x38f2, 0x2f82, 0x2f82, 0xf002,
    341     0x2f72, 0x27f2, 0x2f62, 0x26f2, 0xf002, 0x2f52, 0x25f2, 0x1f42, 0x1f42, 0xf001, 0x14f2, 0x13f2,
    342     0xf004, 0x10f1, 0x10f1, 0x10f1, 0x10f1, 0x10f1, 0x10f1, 0x10f1, 0x10f1, 0x2f32, 0x2f32, 0x2f32,
    343     0x2f32, 0x00e2, 0x00f3, 0x00fc, 0x0105, 0xf001, 0x1f22, 0x1f01, 0xf004, 0x00fa, 0x00ff, 0x0104,
    344     0x0109, 0x010c, 0x0111, 0x0116, 0x0119, 0x011e, 0x0123, 0x0128, 0x43e2, 0x012d, 0x0130, 0x0133,
    345     0x0136, 0xf004, 0x0128, 0x012b, 0x012e, 0x4d01, 0x0131, 0x0134, 0x0137, 0x4c32, 0x013a, 0x4c12,
    346     0x40c1, 0x013d, 0x32e2, 0x32e2, 0x4e22, 0x4e12, 0xf004, 0x43d2, 0x4d22, 0x42d2, 0x41d2, 0x4b32,
    347     0x012f, 0x3d12, 0x3d12, 0x44c2, 0x4b62, 0x43c2, 0x47a2, 0x3c22, 0x3c22, 0x42c2, 0x45b2, 0xf004,
    348     0x41c2, 0x4c01, 0x4b42, 0x44b2, 0x4a62, 0x46a2, 0x33b2, 0x33b2, 0x4a52, 0x45a2, 0x3b22, 0x3b22,
    349     0x32b2, 0x32b2, 0x3b12, 0x3b12, 0xf004, 0x31b2, 0x31b2, 0x4b01, 0x40b1, 0x4962, 0x4692, 0x4a42,
    350     0x44a2, 0x4872, 0x4782, 0x33a2, 0x33a2, 0x4a32, 0x4952, 0x3a22, 0x3a22, 0xf004, 0x4592, 0x4862,
    351     0x31a2, 0x31a2, 0x4682, 0x4772, 0x3492, 0x3492, 0x4942, 0x4752, 0x3762, 0x3762, 0x22a2, 0x22a2,
    352     0x22a2, 0x22a2, 0xf003, 0x2a12, 0x2a12, 0x3a01, 0x30a1, 0x3932, 0x3392, 0x3852, 0x3582, 0xf003,
    353     0x2922, 0x2922, 0x2292, 0x2292, 0x3672, 0x3901, 0x2912, 0x2912, 0xf003, 0x2192, 0x2192, 0x3091,
    354     0x3842, 0x3482, 0x3572, 0x3832, 0x3382, 0xf003, 0x3662, 0x3822, 0x2282, 0x2282, 0x3742, 0x3472,
    355     0x2812, 0x2812, 0xf003, 0x2182, 0x2182, 0x2081, 0x2081, 0x3801, 0x3652, 0x2732, 0x2732, 0xf003,
    356     0x2372, 0x2372, 0x3562, 0x3642, 0x2722, 0x2722, 0x2272, 0x2272, 0xf003, 0x3462, 0x3552, 0x2701,
    357     0x2701, 0x1712, 0x1712, 0x1712, 0x1712, 0xf002, 0x1172, 0x1172, 0x2071, 0x2632, 0xf002, 0x2362,
    358     0x2542, 0x2452, 0x2622, 0xf001, 0x1262, 0x1612, 0xf002, 0x1162, 0x1162, 0x2601, 0x2061, 0xf002,
    359     0x1352, 0x1352, 0x2532, 0x2442, 0xf001, 0x1522, 0x1252, 0xf001, 0x1512, 0x1501, 0xf001, 0x1432,
    360     0x1342, 0xf001, 0x1051, 0x1422, 0xf001, 0x1242, 0x1332, 0xf001, 0x1401, 0x1041, 0xf004, 0x4ec2,
    361     0x0086, 0x3ed2, 0x3ed2, 0x39e2, 0x39e2, 0x4ae2, 0x49d2, 0x2ee2, 0x2ee2, 0x2ee2, 0x2ee2, 0x3de2,
    362     0x3de2, 0x3be2, 0x3be2, 0xf003, 0x2eb2, 0x2eb2, 0x2dc2, 0x2dc2, 0x3cd2, 0x3bd2, 0x2ea2, 0x2ea2,
    363     0xf003, 0x2cc2, 0x2cc2, 0x3da2, 0x3ad2, 0x3e72, 0x3ca2, 0x2ac2, 0x2ac2, 0xf003, 0x39c2, 0x3d72,
    364     0x2e52, 0x2e52, 0x1db2, 0x1db2, 0x1db2, 0x1db2, 0xf002, 0x1e92, 0x1e92, 0x2cb2, 0x2bc2, 0xf002,
    365     0x2e82, 0x28e2, 0x2d92, 0x27e2, 0xf002, 0x2bb2, 0x2d82, 0x28d2, 0x2e62, 0xf001, 0x16e2, 0x1c92,
    366     0xf002, 0x2ba2, 0x2ab2, 0x25e2, 0x27d2, 0xf002, 0x1e42, 0x1e42, 0x24e2, 0x2c82, 0xf001, 0x18c2,
    367     0x1e32, 0xf002, 0x1d62, 0x1d62, 0x26d2, 0x2b92, 0xf002, 0x29b2, 0x2aa2, 0x11e2, 0x11e2, 0xf002,
    368     0x14d2, 0x14d2, 0x28b2, 0x29a2, 0xf002, 0x1b72, 0x1b72, 0x27b2, 0x20d1, 0xf001, 0x1e01, 0x10e1,
    369     0xf001, 0x1d52, 0x15d2, 0xf001, 0x1c72, 0x17c2, 0xf001, 0x1d42, 0x1b82, 0xf001, 0x1a92, 0x1c62,
    370     0xf001, 0x16c2, 0x1d32, 0xf001, 0x1c52, 0x15c2, 0xf001, 0x1a82, 0x18a2, 0xf001, 0x1992, 0x1c42,
    371     0xf001, 0x16b2, 0x1a72, 0xf001, 0x1b52, 0x1982, 0xf001, 0x1892, 0x1972, 0xf001, 0x1792, 0x1882,
    372     0xf001, 0x1ce2, 0x1dd2,
    373     /* huffTable24[705] */
    374     0xf009, 0x8fe2, 0x8fe2, 0x8ef2, 0x8ef2, 0x8fd2, 0x8fd2, 0x8df2, 0x8df2, 0x8fc2, 0x8fc2, 0x8cf2,
    375     0x8cf2, 0x8fb2, 0x8fb2, 0x8bf2, 0x8bf2, 0x7af2, 0x7af2, 0x7af2, 0x7af2, 0x8fa2, 0x8fa2, 0x8f92,
    376     0x8f92, 0x79f2, 0x79f2, 0x79f2, 0x79f2, 0x78f2, 0x78f2, 0x78f2, 0x78f2, 0x8f82, 0x8f82, 0x8f72,
    377     0x8f72, 0x77f2, 0x77f2, 0x77f2, 0x77f2, 0x7f62, 0x7f62, 0x7f62, 0x7f62, 0x76f2, 0x76f2, 0x76f2,
    378     0x76f2, 0x7f52, 0x7f52, 0x7f52, 0x7f52, 0x75f2, 0x75f2, 0x75f2, 0x75f2, 0x7f42, 0x7f42, 0x7f42,
    379     0x7f42, 0x74f2, 0x74f2, 0x74f2, 0x74f2, 0x7f32, 0x7f32, 0x7f32, 0x7f32, 0x73f2, 0x73f2, 0x73f2,
    380     0x73f2, 0x7f22, 0x7f22, 0x7f22, 0x7f22, 0x72f2, 0x72f2, 0x72f2, 0x72f2, 0x71f2, 0x71f2, 0x71f2,
    381     0x71f2, 0x8f12, 0x8f12, 0x80f1, 0x80f1, 0x9f01, 0x0201, 0x0206, 0x020b, 0x0210, 0x0215, 0x021a,
    382     0x021f, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2,
    383     0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2,
    384     0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x4ff2, 0x0224, 0x0229, 0x0232,
    385     0x0237, 0x023a, 0x023f, 0x0242, 0x0245, 0x024a, 0x024d, 0x0250, 0x0253, 0x0256, 0x0259, 0x025c,
    386     0x025f, 0x0262, 0x0265, 0x0268, 0x026b, 0x026e, 0x0271, 0x0274, 0x0277, 0x027a, 0x027d, 0x0280,
    387     0x0283, 0x0288, 0x028b, 0x028e, 0x0291, 0x0294, 0x0297, 0x029a, 0x029f, 0x94b2, 0x02a4, 0x02a7,
    388     0x02aa, 0x93b2, 0x9882, 0x02af, 0x92b2, 0x02b2, 0x02b5, 0x9692, 0x94a2, 0x02b8, 0x9782, 0x9a32,
    389     0x93a2, 0x9952, 0x9592, 0x9a22, 0x92a2, 0x91a2, 0x9862, 0x9682, 0x9772, 0x9942, 0x9492, 0x9932,
    390     0x9392, 0x9852, 0x9582, 0x9922, 0x9762, 0x9672, 0x9292, 0x9912, 0x9192, 0x9842, 0x9482, 0x9752,
    391     0x9572, 0x9832, 0x9382, 0x9662, 0x9822, 0x9282, 0x9812, 0x9742, 0x9472, 0x9182, 0x02bb, 0x9652,
    392     0x9562, 0x9712, 0x02be, 0x8372, 0x8372, 0x9732, 0x9722, 0x8272, 0x8272, 0x8642, 0x8642, 0x8462,
    393     0x8462, 0x8552, 0x8552, 0x8172, 0x8172, 0x8632, 0x8632, 0x8362, 0x8362, 0x8542, 0x8542, 0x8452,
    394     0x8452, 0x8622, 0x8622, 0x8262, 0x8262, 0x8612, 0x8612, 0x8162, 0x8162, 0x9601, 0x9061, 0x8532,
    395     0x8532, 0x8352, 0x8352, 0x8442, 0x8442, 0x8522, 0x8522, 0x8252, 0x8252, 0x8512, 0x8512, 0x9501,
    396     0x9051, 0x7152, 0x7152, 0x7152, 0x7152, 0x8432, 0x8432, 0x8342, 0x8342, 0x7422, 0x7422, 0x7422,
    397     0x7422, 0x7242, 0x7242, 0x7242, 0x7242, 0x7332, 0x7332, 0x7332, 0x7332, 0x7412, 0x7412, 0x7412,
    398     0x7412, 0x7142, 0x7142, 0x7142, 0x7142, 0x8401, 0x8401, 0x8041, 0x8041, 0x7322, 0x7322, 0x7322,
    399     0x7322, 0x7232, 0x7232, 0x7232, 0x7232, 0x6312, 0x6312, 0x6312, 0x6312, 0x6312, 0x6312, 0x6312,
    400     0x6312, 0x6132, 0x6132, 0x6132, 0x6132, 0x6132, 0x6132, 0x6132, 0x6132, 0x7301, 0x7301, 0x7301,
    401     0x7301, 0x7031, 0x7031, 0x7031, 0x7031, 0x6222, 0x6222, 0x6222, 0x6222, 0x6222, 0x6222, 0x6222,
    402     0x6222, 0x5212, 0x5212, 0x5212, 0x5212, 0x5212, 0x5212, 0x5212, 0x5212, 0x5212, 0x5212, 0x5212,
    403     0x5212, 0x5212, 0x5212, 0x5212, 0x5212, 0x5122, 0x5122, 0x5122, 0x5122, 0x5122, 0x5122, 0x5122,
    404     0x5122, 0x5122, 0x5122, 0x5122, 0x5122, 0x5122, 0x5122, 0x5122, 0x5122, 0x6201, 0x6201, 0x6201,
    405     0x6201, 0x6201, 0x6201, 0x6201, 0x6201, 0x6021, 0x6021, 0x6021, 0x6021, 0x6021, 0x6021, 0x6021,
    406     0x6021, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112,
    407     0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112,
    408     0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4112, 0x4101, 0x4101, 0x4101,
    409     0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101,
    410     0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4101,
    411     0x4101, 0x4101, 0x4101, 0x4101, 0x4101, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011,
    412     0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011,
    413     0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011, 0x4011,
    414     0x4011, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000,
    415     0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000,
    416     0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0xf002, 0x2ee2, 0x2ed2,
    417     0x2de2, 0x2ec2, 0xf002, 0x2ce2, 0x2dd2, 0x2eb2, 0x2be2, 0xf002, 0x2dc2, 0x2cd2, 0x2ea2, 0x2ae2,
    418     0xf002, 0x2db2, 0x2bd2, 0x2cc2, 0x2e92, 0xf002, 0x29e2, 0x2da2, 0x2ad2, 0x2cb2, 0xf002, 0x2bc2,
    419     0x2e82, 0x28e2, 0x2d92, 0xf002, 0x29d2, 0x2e72, 0x27e2, 0x2ca2, 0xf002, 0x2ac2, 0x2bb2, 0x2d82,
    420     0x28d2, 0xf003, 0x3e01, 0x30e1, 0x2d01, 0x2d01, 0x16e2, 0x16e2, 0x16e2, 0x16e2, 0xf002, 0x2e62,
    421     0x2c92, 0x19c2, 0x19c2, 0xf001, 0x1e52, 0x1ab2, 0xf002, 0x15e2, 0x15e2, 0x2ba2, 0x2d72, 0xf001,
    422     0x17d2, 0x14e2, 0xf001, 0x1c82, 0x18c2, 0xf002, 0x2e42, 0x2e22, 0x1e32, 0x1e32, 0xf001, 0x1d62,
    423     0x16d2, 0xf001, 0x13e2, 0x1b92, 0xf001, 0x19b2, 0x1aa2, 0xf001, 0x12e2, 0x1e12, 0xf001, 0x11e2,
    424     0x1d52, 0xf001, 0x15d2, 0x1c72, 0xf001, 0x17c2, 0x1d42, 0xf001, 0x1b82, 0x18b2, 0xf001, 0x14d2,
    425     0x1a92, 0xf001, 0x19a2, 0x1c62, 0xf001, 0x16c2, 0x1d32, 0xf001, 0x13d2, 0x1d22, 0xf001, 0x12d2,
    426     0x1d12, 0xf001, 0x1b72, 0x17b2, 0xf001, 0x11d2, 0x1c52, 0xf001, 0x15c2, 0x1a82, 0xf001, 0x18a2,
    427     0x1992, 0xf001, 0x1c42, 0x14c2, 0xf001, 0x1b62, 0x16b2, 0xf002, 0x20d1, 0x2c01, 0x1c32, 0x1c32,
    428     0xf001, 0x13c2, 0x1a72, 0xf001, 0x17a2, 0x1c22, 0xf001, 0x12c2, 0x1b52, 0xf001, 0x15b2, 0x1c12,
    429     0xf001, 0x1982, 0x1892, 0xf001, 0x11c2, 0x1b42, 0xf002, 0x20c1, 0x2b01, 0x1b32, 0x1b32, 0xf002,
    430     0x20b1, 0x2a01, 0x1a12, 0x1a12, 0xf001, 0x1a62, 0x16a2, 0xf001, 0x1972, 0x1792, 0xf002, 0x20a1,
    431     0x2901, 0x1091, 0x1091, 0xf001, 0x1b22, 0x1a52, 0xf001, 0x15a2, 0x1b12, 0xf001, 0x11b2, 0x1962,
    432     0xf001, 0x1a42, 0x1872, 0xf001, 0x1801, 0x1081, 0xf001, 0x1701, 0x1071,
    433 };
    434 /* pow(2,-i/4) * pow(j,4/3) for i=0..3 j=0..15, Q25 format */
    435 const int pow43_14[4][16] PROGMEM = { /* Q28 */
    436 {   0x00000000, 0x10000000, 0x285145f3, 0x453a5cdb, 0x0cb2ff53, 0x111989d6,
    437     0x15ce31c8, 0x1ac7f203, 0x20000000, 0x257106b9, 0x2b16b4a3, 0x30ed74b4,
    438     0x36f23fa5, 0x3d227bd3, 0x437be656, 0x49fc823c, },
    439 
    440 {   0x00000000, 0x0d744fcd, 0x21e71f26, 0x3a36abd9, 0x0aadc084, 0x0e610e6e,
    441     0x12560c1d, 0x168523cf, 0x1ae89f99, 0x1f7c03a4, 0x243bae49, 0x29249c67,
    442     0x2e34420f, 0x33686f85, 0x38bf3dff, 0x3e370182, },
    443 
    444 {   0x00000000, 0x0b504f33, 0x1c823e07, 0x30f39a55, 0x08facd62, 0x0c176319,
    445     0x0f6b3522, 0x12efe2ad, 0x16a09e66, 0x1a79a317, 0x1e77e301, 0x2298d5b4,
    446     0x26da56fc, 0x2b3a902a, 0x2fb7e7e7, 0x3450f650, },
    447 
    448 {   0x00000000, 0x09837f05, 0x17f910d7, 0x2929c7a9, 0x078d0dfa, 0x0a2ae661,
    449     0x0cf73154, 0x0fec91cb, 0x1306fe0a, 0x16434a6c, 0x199ee595, 0x1d17ae3d,
    450     0x20abd76a, 0x2459d551, 0x28204fbb, 0x2bfe1808, },
    451 };
    452 
    453 /* pow(j,4/3) for j=16..63, Q23 format */
    454 const int pow43[48] PROGMEM = {
    455     0x1428a2fa, 0x15db1bd6, 0x1796302c, 0x19598d85, 0x1b24e8bb, 0x1cf7fcfa,
    456     0x1ed28af2, 0x20b4582a, 0x229d2e6e, 0x248cdb55, 0x26832fda, 0x28800000,
    457     0x2a832287, 0x2c8c70a8, 0x2e9bc5d8, 0x30b0ff99, 0x32cbfd4a, 0x34eca001,
    458     0x3712ca62, 0x393e6088, 0x3b6f47e0, 0x3da56717, 0x3fe0a5fc, 0x4220ed72,
    459     0x44662758, 0x46b03e7c, 0x48ff1e87, 0x4b52b3f3, 0x4daaebfd, 0x5007b497,
    460     0x5268fc62, 0x54ceb29c, 0x5738c721, 0x59a72a59, 0x5c19cd35, 0x5e90a129,
    461     0x610b9821, 0x638aa47f, 0x660db90f, 0x6894c90b, 0x6b1fc80c, 0x6daeaa0d,
    462     0x70416360, 0x72d7e8b0, 0x75722ef9, 0x78102b85, 0x7ab1d3ec, 0x7d571e09,
    463 };
    464 
    465 const uint32_t polyCoef[264] PROGMEM = {
    466     /* shuffled vs. original from 0, 1, ... 15 to 0, 15, 2, 13, ... 14, 1 */
    467     0x00000000, 0x00000074, 0x00000354, 0x0000072c, 0x00001fd4, 0x00005084, 0x000066b8, 0x000249c4,
    468     0x00049478, 0xfffdb63c, 0x000066b8, 0xffffaf7c, 0x00001fd4, 0xfffff8d4, 0x00000354, 0xffffff8c,
    469     0xfffffffc, 0x00000068, 0x00000368, 0x00000644, 0x00001f40, 0x00004ad0, 0x00005d1c, 0x00022ce0,
    470     0x000493c0, 0xfffd9960, 0x00006f78, 0xffffa9cc, 0x0000203c, 0xfffff7e4, 0x00000340, 0xffffff84,
    471     0xfffffffc, 0x00000060, 0x00000378, 0x0000056c, 0x00001e80, 0x00004524, 0x000052a0, 0x00020ffc,
    472     0x000491a0, 0xfffd7ca0, 0x00007760, 0xffffa424, 0x00002080, 0xfffff6ec, 0x00000328, 0xffffff74,
    473     0xfffffffc, 0x00000054, 0x00000384, 0x00000498, 0x00001d94, 0x00003f7c, 0x00004744, 0x0001f32c,
    474     0x00048e18, 0xfffd6008, 0x00007e70, 0xffff9e8c, 0x0000209c, 0xfffff5ec, 0x00000310, 0xffffff68,
    475     0xfffffffc, 0x0000004c, 0x0000038c, 0x000003d0, 0x00001c78, 0x000039e4, 0x00003b00, 0x0001d680,
    476     0x00048924, 0xfffd43ac, 0x000084b0, 0xffff990c, 0x00002094, 0xfffff4e4, 0x000002f8, 0xffffff5c,
    477     0xfffffffc, 0x00000044, 0x00000390, 0x00000314, 0x00001b2c, 0x0000345c, 0x00002ddc, 0x0001ba04,
    478     0x000482d0, 0xfffd279c, 0x00008a20, 0xffff93a4, 0x0000206c, 0xfffff3d4, 0x000002dc, 0xffffff4c,
    479     0xfffffffc, 0x00000040, 0x00000390, 0x00000264, 0x000019b0, 0x00002ef0, 0x00001fd4, 0x00019dc8,
    480     0x00047b1c, 0xfffd0be8, 0x00008ecc, 0xffff8e64, 0x00002024, 0xfffff2c0, 0x000002c0, 0xffffff3c,
    481     0xfffffff8, 0x00000038, 0x0000038c, 0x000001bc, 0x000017fc, 0x0000299c, 0x000010e8, 0x000181d8,
    482     0x0004720c, 0xfffcf09c, 0x000092b4, 0xffff894c, 0x00001fc0, 0xfffff1a4, 0x000002a4, 0xffffff2c,
    483     0xfffffff8, 0x00000034, 0x00000380, 0x00000120, 0x00001618, 0x00002468, 0x00000118, 0x00016644,
    484     0x000467a4, 0xfffcd5cc, 0x000095e0, 0xffff8468, 0x00001f44, 0xfffff084, 0x00000284, 0xffffff18,
    485     0xfffffff8, 0x0000002c, 0x00000374, 0x00000090, 0x00001400, 0x00001f58, 0xfffff068, 0x00014b14,
    486     0x00045bf0, 0xfffcbb88, 0x00009858, 0xffff7fbc, 0x00001ea8, 0xffffef60, 0x00000268, 0xffffff04,
    487     0xfffffff8, 0x00000028, 0x0000035c, 0x00000008, 0x000011ac, 0x00001a70, 0xffffded8, 0x00013058,
    488     0x00044ef8, 0xfffca1d8, 0x00009a1c, 0xffff7b54, 0x00001dfc, 0xffffee3c, 0x0000024c, 0xfffffef0,
    489     0xfffffff4, 0x00000024, 0x00000340, 0xffffff8c, 0x00000f28, 0x000015b0, 0xffffcc70, 0x0001161c,
    490     0x000440bc, 0xfffc88d8, 0x00009b3c, 0xffff7734, 0x00001d38, 0xffffed18, 0x0000022c, 0xfffffedc,
    491     0xfffffff4, 0x00000020, 0x00000320, 0xffffff1c, 0x00000c68, 0x0000111c, 0xffffb92c, 0x0000fc6c,
    492     0x00043150, 0xfffc708c, 0x00009bb8, 0xffff7368, 0x00001c64, 0xffffebf4, 0x00000210, 0xfffffec4,
    493     0xfffffff0, 0x0000001c, 0x000002f4, 0xfffffeb4, 0x00000974, 0x00000cb8, 0xffffa518, 0x0000e350,
    494     0x000420b4, 0xfffc5908, 0x00009b9c, 0xffff6ff4, 0x00001b7c, 0xffffead0, 0x000001f4, 0xfffffeac,
    495     0xfffffff0, 0x0000001c, 0x000002c4, 0xfffffe58, 0x00000648, 0x00000884, 0xffff9038, 0x0000cad0,
    496     0x00040ef8, 0xfffc425c, 0x00009af0, 0xffff6ce0, 0x00001a88, 0xffffe9b0, 0x000001d4, 0xfffffe94,
    497     0xffffffec, 0x00000018, 0x0000028c, 0xfffffe04, 0x000002e4, 0x00000480, 0xffff7a90, 0x0000b2fc,
    498     0x0003fc28, 0xfffc2c90, 0x000099b8, 0xffff6a3c, 0x00001988, 0xffffe898, 0x000001bc, 0xfffffe7c,
    499     0x000001a0, 0x0000187c, 0x000097fc, 0x0003e84c, 0xffff6424, 0xffffff4c, 0x00000248, 0xffffffec,
    500 };
    501 
    502 /* format = Q30, range = [0.0981, 1.9976]
    503  *
    504  * n = 16;
    505  * k = 0;
    506  * for(i=0; i<5; i++, n=n/2) {
    507  *   for(p=0; p<n; p++, k++) {
    508  *     t = (PI / (4*n)) * (2*p + 1);
    509  *     coef32[k] = 2.0 * cos(t);
    510  *   }
    511  * }
    512  * coef32[30] *= 0.5;   / *** for initial back butterfly (i.e. two-point DCT) *** /
    513  */
    514 const int coef32[31] PROGMEM = {
    515     0x7fd8878d, 0x7e9d55fc, 0x7c29fbee, 0x78848413, 0x73b5ebd0, 0x6dca0d14, 0x66cf811f, 0x5ed77c89,
    516     0x55f5a4d2, 0x4c3fdff3, 0x41ce1e64, 0x36ba2013, 0x2b1f34eb, 0x1f19f97b, 0x12c8106e, 0x0647d97c,
    517     0x7f62368f, 0x7a7d055b, 0x70e2cbc6, 0x62f201ac, 0x5133cc94, 0x3c56ba70, 0x25280c5d, 0x0c8bd35e,
    518     0x7d8a5f3f, 0x6a6d98a4, 0x471cece6, 0x18f8b83c, 0x7641af3c, 0x30fbc54d, 0x2d413ccc,
    519 };
    520 
    521 /* let c(j) = cos(M_PI/36 * ((j)+0.5)), s(j) = sin(M_PI/36 * ((j)+0.5))
    522  * then fastWin[2*j+0] = c(j)*(s(j) + c(j)), j = [0, 8]
    523  *      fastWin[2*j+1] = c(j)*(s(j) - c(j))
    524  * format = Q30
    525  */
    526 const uint32_t fastWin36[18] PROGMEM = {
    527         0x42aace8b, 0xc2e92724, 0x47311c28, 0xc95f619a, 0x4a868feb, 0xd0859d8c,
    528         0x4c913b51, 0xd8243ea0, 0x4d413ccc, 0xe0000000, 0x4c913b51, 0xe7dbc161,
    529         0x4a868feb, 0xef7a6275, 0x47311c28, 0xf6a09e67, 0x42aace8b, 0xfd16d8dd
    530 };
    531 
    532 /* tables for quadruples
    533  * format 0xAB
    534  *  A = length of codeword
    535  *  B = codeword
    536  */
    537 const unsigned char quadTable[64+16] PROGMEM = {
    538     /* table A */
    539     0x6b, 0x6f, 0x6d, 0x6e, 0x67, 0x65, 0x59, 0x59, 0x56, 0x56, 0x53, 0x53, 0x5a, 0x5a, 0x5c, 0x5c,
    540     0x42, 0x42, 0x42, 0x42, 0x41, 0x41, 0x41, 0x41, 0x44, 0x44, 0x44, 0x44, 0x48, 0x48, 0x48, 0x48,
    541     0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
    542     0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
    543     /* table B */
    544     0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41, 0x40,
    545 };
    546 
    547 /* indexing = [version][layer][bitrate index]
    548  * bitrate (kbps) of frame
    549  *   - bitrate index == 0 is "free" mode (bitrate determined on the fly by
    550  *       counting bits between successive sync words)
    551  */
    552 const short bitrateTab[3][3][15] PROGMEM = { {
    553 /* MPEG-1 */
    554 { 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448 }, /* Layer 1 */
    555 { 0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384 }, /* Layer 2 */
    556 { 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320 }, /* Layer 3 */
    557 }, {
    558 /* MPEG-2 */
    559 { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256 }, /* Layer 1 */
    560 { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160 }, /* Layer 2 */
    561 { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160 }, /* Layer 3 */
    562 }, {
    563 /* MPEG-2.5 */
    564 { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256 }, /* Layer 1 */
    565 { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160 }, /* Layer 2 */
    566 { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160 }, /* Layer 3 */
    567 }, };
    568 
    569 /* indexing = [version][sampleRate][bitRate]
    570  * for layer3, nSlots = floor(samps/frame * bitRate / sampleRate / 8)
    571  *   - add one pad slot if necessary
    572  */
    573 const short slotTab[3][3][15] PROGMEM = {
    574     { /* MPEG-1 */
    575         { 0, 104, 130, 156, 182, 208, 261, 313, 365, 417, 522, 626, 731, 835, 1044 }, /* 44 kHz */
    576         { 0, 96, 120, 144, 168, 192, 240, 288, 336, 384, 480, 576, 672, 768, 960 }, /* 48 kHz */
    577         { 0, 144, 180, 216, 252, 288, 360, 432, 504, 576, 720, 864, 1008, 1152, 1440 }, /* 32 kHz */
    578     },
    579     { /* MPEG-2 */
    580         { 0, 26, 52, 78, 104, 130, 156, 182, 208, 261, 313, 365, 417, 470, 522 }, /* 22 kHz */
    581         { 0, 24, 48, 72, 96, 120, 144, 168, 192, 240, 288, 336, 384, 432, 480 }, /* 24 kHz */
    582         { 0, 36, 72, 108, 144, 180, 216, 252, 288, 360, 432, 504, 576, 648, 720 }, /* 16 kHz */
    583     },
    584     { /* MPEG-2.5 */
    585         { 0, 52, 104, 156, 208, 261, 313, 365, 417, 522, 626, 731, 835, 940, 1044 }, /* 11 kHz */
    586         { 0, 48, 96, 144, 192, 240, 288, 336, 384, 480, 576, 672, 768, 864, 960 }, /* 12 kHz */
    587         { 0, 72, 144, 216, 288, 360, 432, 504, 576, 720, 864, 1008, 1152, 1296, 1440 }, /*  8 kHz */
    588     },
    589 };
    590 
    591 const uint32_t imdctWin[4][36] PROGMEM = {
    592     {
    593     0x02aace8b, 0x07311c28, 0x0a868fec, 0x0c913b52, 0x0d413ccd, 0x0c913b52, 0x0a868fec, 0x07311c28,
    594     0x02aace8b, 0xfd16d8dd, 0xf6a09e66, 0xef7a6275, 0xe7dbc161, 0xe0000000, 0xd8243e9f, 0xd0859d8b,
    595     0xc95f619a, 0xc2e92723, 0xbd553175, 0xb8cee3d8, 0xb5797014, 0xb36ec4ae, 0xb2bec333, 0xb36ec4ae,
    596     0xb5797014, 0xb8cee3d8, 0xbd553175, 0xc2e92723, 0xc95f619a, 0xd0859d8b, 0xd8243e9f, 0xe0000000,
    597     0xe7dbc161, 0xef7a6275, 0xf6a09e66, 0xfd16d8dd  },
    598     {
    599     0x02aace8b, 0x07311c28, 0x0a868fec, 0x0c913b52, 0x0d413ccd, 0x0c913b52, 0x0a868fec, 0x07311c28,
    600     0x02aace8b, 0xfd16d8dd, 0xf6a09e66, 0xef7a6275, 0xe7dbc161, 0xe0000000, 0xd8243e9f, 0xd0859d8b,
    601     0xc95f619a, 0xc2e92723, 0xbd44ef14, 0xb831a052, 0xb3aa3837, 0xafb789a4, 0xac6145bb, 0xa9adecdc,
    602     0xa864491f, 0xad1868f0, 0xb8431f49, 0xc8f42236, 0xdda8e6b1, 0xf47755dc, 0x00000000, 0x00000000,
    603     0x00000000, 0x00000000, 0x00000000, 0x00000000  },
    604     {
    605     0x07311c28, 0x0d413ccd, 0x07311c28, 0xf6a09e66, 0xe0000000, 0xc95f619a, 0xb8cee3d8, 0xb2bec333,
    606     0xb8cee3d8, 0xc95f619a, 0xe0000000, 0xf6a09e66, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    607     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    608     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    609     0x00000000, 0x00000000, 0x00000000, 0x00000000  },
    610     {
    611     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x028e9709, 0x04855ec0,
    612     0x026743a1, 0xfcde2c10, 0xf515dc82, 0xec93e53b, 0xe4c880f8, 0xdd5d0b08, 0xd63510b7, 0xcf5e834a,
    613     0xc8e6b562, 0xc2da4105, 0xbd553175, 0xb8cee3d8, 0xb5797014, 0xb36ec4ae, 0xb2bec333, 0xb36ec4ae,
    614     0xb5797014, 0xb8cee3d8, 0xbd553175, 0xc2e92723, 0xc95f619a, 0xd0859d8b, 0xd8243e9f, 0xe0000000,
    615     0xe7dbc161, 0xef7a6275, 0xf6a09e66, 0xfd16d8dd  },
    616 };
    617 
    618 const int ISFMpeg1[2][7] PROGMEM = {
    619     {0x00000000, 0x0d8658ba, 0x176cf5d0, 0x20000000, 0x28930a2f, 0x3279a745, 0x40000000},
    620     {0x00000000, 0x13207f5c, 0x2120fb83, 0x2d413ccc, 0x39617e16, 0x4761fa3d, 0x5a827999}
    621 };
    622 
    623 const int ISFMpeg2[2][2][16] PROGMEM = {
    624 {   {   /* intensityScale off, mid-side off */
    625         0x40000000, 0x35d13f32, 0x2d413ccc, 0x260dfc14, 0x1fffffff, 0x1ae89f99, 0x16a09e66, 0x1306fe0a,
    626         0x0fffffff, 0x0d744fcc, 0x0b504f33, 0x09837f05, 0x07ffffff, 0x06ba27e6, 0x05a82799, 0x04c1bf82 },
    627     {   /* intensityScale off, mid-side on */
    628         0x5a827999, 0x4c1bf827, 0x3fffffff, 0x35d13f32, 0x2d413ccc, 0x260dfc13, 0x1fffffff, 0x1ae89f99,
    629         0x16a09e66, 0x1306fe09, 0x0fffffff, 0x0d744fcc, 0x0b504f33, 0x09837f04, 0x07ffffff, 0x06ba27e6 },  },
    630 {   {   /* intensityScale on, mid-side off */
    631         0x40000000, 0x2d413ccc, 0x20000000, 0x16a09e66, 0x10000000, 0x0b504f33, 0x08000000, 0x05a82799,
    632         0x04000000, 0x02d413cc, 0x02000000, 0x016a09e6, 0x01000000, 0x00b504f3, 0x00800000, 0x005a8279 },
    633     {   /* intensityScale on, mid-side on */
    634         0x5a827999, 0x3fffffff, 0x2d413ccc, 0x1fffffff, 0x16a09e66, 0x0fffffff, 0x0b504f33, 0x07ffffff,
    635         0x05a82799, 0x03ffffff, 0x02d413cc, 0x01ffffff, 0x016a09e6, 0x00ffffff, 0x00b504f3, 0x007fffff }   }
    636 };
    637 
    638 const uint32_t m_COS0_0 = 0x4013c251;  /* Q31 */
    639 const uint32_t m_COS0_1 = 0x40b345bd;  /* Q31 */
    640 const uint32_t m_COS0_2 = 0x41fa2d6d;  /* Q31 */
    641 const uint32_t m_COS0_3 = 0x43f93421;  /* Q31 */
    642 const uint32_t m_COS0_4 = 0x46cc1bc4;  /* Q31 */
    643 const uint32_t m_COS0_5 = 0x4a9d9cf0;  /* Q31 */
    644 const uint32_t m_COS0_6 = 0x4fae3711;  /* Q31 */
    645 const uint32_t m_COS0_7 = 0x56601ea7;  /* Q31 */
    646 const uint32_t m_COS0_8 = 0x5f4cf6eb;  /* Q31 */
    647 const uint32_t m_COS0_9 = 0x6b6fcf26;  /* Q31 */
    648 const uint32_t m_COS0_10= 0x7c7d1db3;  /* Q31 */
    649 const uint32_t m_COS0_11= 0x4ad81a97;  /* Q30 */
    650 const uint32_t m_COS0_12= 0x5efc8d96;  /* Q30 */
    651 const uint32_t m_COS0_13= 0x41d95790;  /* Q29 */
    652 const uint32_t m_COS0_14= 0x6d0b20cf;  /* Q29 */
    653 const uint32_t m_COS0_15= 0x518522fb;  /* Q27 */
    654 const uint32_t m_COS1_0 = 0x404f4672;  /* Q31 */
    655 const uint32_t m_COS1_1 = 0x42e13c10;  /* Q31 */
    656 const uint32_t m_COS1_2 = 0x48919f44;  /* Q31 */
    657 const uint32_t m_COS1_3 = 0x52cb0e63;  /* Q31 */
    658 const uint32_t m_COS1_4 = 0x64e2402e;  /* Q31 */
    659 const uint32_t m_COS1_5 = 0x43e224a9;  /* Q30 */
    660 const uint32_t m_COS1_6 = 0x6e3c92c1;  /* Q30 */
    661 const uint32_t m_COS1_7 = 0x519e4e04;  /* Q28 */
    662 const uint32_t m_COS2_0 = 0x4140fb46;  /* Q31 */
    663 const uint32_t m_COS2_1 = 0x4cf8de88;  /* Q31 */
    664 const uint32_t m_COS2_2 = 0x73326bbf;  /* Q31 */
    665 const uint32_t m_COS2_3 = 0x52036742;  /* Q29 */
    666 const uint32_t m_COS3_0 = 0x4545e9ef;  /* Q31 */
    667 const uint32_t m_COS3_1 = 0x539eba45;  /* Q30 */
    668 const uint32_t m_COS4_0 = 0x5a82799a;  /* Q31 */
    669 
    670 const uint32_t m_dcttab[48] PROGMEM = { // faster in ROM
    671     /* first pass */
    672      m_COS0_0,  m_COS0_15, m_COS1_0,    /* 31, 27, 31 */
    673      m_COS0_1,  m_COS0_14, m_COS1_1,    /* 31, 29, 31 */
    674      m_COS0_2,  m_COS0_13, m_COS1_2,    /* 31, 29, 31 */
    675      m_COS0_3,  m_COS0_12, m_COS1_3,    /* 31, 30, 31 */
    676      m_COS0_4,  m_COS0_11, m_COS1_4,    /* 31, 30, 31 */
    677      m_COS0_5,  m_COS0_10, m_COS1_5,    /* 31, 31, 30 */
    678      m_COS0_6,  m_COS0_9,  m_COS1_6,    /* 31, 31, 30 */
    679      m_COS0_7,  m_COS0_8,  m_COS1_7,    /* 31, 31, 28 */
    680     /* second pass */
    681      m_COS2_0,  m_COS2_3,  m_COS3_0,   /* 31, 29, 31 */
    682      m_COS2_1,  m_COS2_2,  m_COS3_1,   /* 31, 31, 30 */
    683     -m_COS2_0, -m_COS2_3,  m_COS3_0,   /* 31, 29, 31 */
    684     -m_COS2_1, -m_COS2_2,  m_COS3_1,   /* 31, 31, 30 */
    685      m_COS2_0,  m_COS2_3,  m_COS3_0,   /* 31, 29, 31 */
    686      m_COS2_1,  m_COS2_2,  m_COS3_1,   /* 31, 31, 30 */
    687     -m_COS2_0, -m_COS2_3,  m_COS3_0,   /* 31, 29, 31 */
    688     -m_COS2_1, -m_COS2_2,  m_COS3_1,   /* 31, 31, 30 */
    689 };
    690 
    691 /***********************************************************************************************************************
    692  * B I T S T R E A M
    693  **********************************************************************************************************************/
    694 
    695 void SetBitstreamPointer(BitStreamInfo_t *bsi, int nBytes, unsigned char *buf) {
    696     /* init bitstream */
    697     bsi->bytePtr = buf;
    698     bsi->iCache = 0; /* 4-byte unsigned int */
    699     bsi->cachedBits = 0; /* i.e. zero bits in cache */
    700     bsi->nBytes = nBytes;
    701 }
    702 //----------------------------------------------------------------------------------------------------------------------
    703 void RefillBitstreamCache(BitStreamInfo_t *bsi) {
    704     int nBytes = bsi->nBytes;
    705     /* optimize for common case, independent of machine endian-ness */
    706     if (nBytes >= 4) {
    707         bsi->iCache = (*bsi->bytePtr++) << 24;
    708         bsi->iCache |= (*bsi->bytePtr++) << 16;
    709         bsi->iCache |= (*bsi->bytePtr++) << 8;
    710         bsi->iCache |= (*bsi->bytePtr++);
    711         bsi->cachedBits = 32;
    712         bsi->nBytes -= 4;
    713     } else {
    714         bsi->iCache = 0;
    715         while (nBytes--) {
    716             bsi->iCache |= (*bsi->bytePtr++);
    717             bsi->iCache <<= 8;
    718         }
    719         bsi->iCache <<= ((3 - bsi->nBytes) * 8);
    720         bsi->cachedBits = 8 * bsi->nBytes;
    721         bsi->nBytes = 0;
    722     }
    723 }
    724 //----------------------------------------------------------------------------------------------------------------------
    725 unsigned int GetBits(BitStreamInfo_t *bsi, int nBits) {
    726     unsigned int data, lowBits;
    727 
    728     nBits &= 0x1f; /* nBits mod 32 to avoid unpredictable results like >> by negative amount */
    729     data = bsi->iCache >> (31 - nBits); /* unsigned >> so zero-extend */
    730     data >>= 1; /* do as >> 31, >> 1 so that nBits = 0 works okay (returns 0) */
    731     bsi->iCache <<= nBits; /* left-justify cache */
    732     bsi->cachedBits -= nBits; /* how many bits have we drawn from the cache so far */
    733     if (bsi->cachedBits < 0) {/* if we cross an int boundary, refill the cache */
    734         lowBits = -bsi->cachedBits;
    735         RefillBitstreamCache(bsi);
    736         data |= bsi->iCache >> (32 - lowBits); /* get the low-order bits */
    737         bsi->cachedBits -= lowBits; /* how many bits have we drawn from the cache so far */
    738         bsi->iCache <<= lowBits; /* left-justify cache */
    739     }
    740     return data;
    741 }
    742 //----------------------------------------------------------------------------------------------------------------------
    743 int CalcBitsUsed(BitStreamInfo_t *bsi, unsigned char *startBuf, int startOffset){
    744     int bitsUsed;
    745     bitsUsed = (bsi->bytePtr - startBuf) * 8;
    746     bitsUsed -= bsi->cachedBits;
    747     bitsUsed -= startOffset;
    748     return bitsUsed;
    749 }
    750 //----------------------------------------------------------------------------------------------------------------------
    751 int CheckPadBit(){
    752     return (m_FrameHeader->paddingBit ? 1 : 0);
    753 }
    754 //----------------------------------------------------------------------------------------------------------------------
    755 int UnpackFrameHeader(unsigned char *buf){
    756     int verIdx;
    757     /* validate pointers and sync word */
    758     if ((buf[0] & m_SYNCWORDH) != m_SYNCWORDH || (buf[1] & m_SYNCWORDL) != m_SYNCWORDL)  return -1;
    759     /* read header fields - use bitmasks instead of GetBits() for speed, since format never varies */
    760     verIdx = (buf[1] >> 3) & 0x03;
    761     m_MPEGVersion = (MPEGVersion_t) (verIdx == 0 ? MPEG25 : ((verIdx & 0x01) ? MPEG1 : MPEG2));
    762     m_FrameHeader->layer = 4 - ((buf[1] >> 1) & 0x03); /* easy mapping of index to layer number, 4 = error */
    763     m_FrameHeader->crc = 1 - ((buf[1] >> 0) & 0x01);
    764     m_FrameHeader->brIdx = (buf[2] >> 4) & 0x0f;
    765     m_FrameHeader->srIdx = (buf[2] >> 2) & 0x03;
    766     m_FrameHeader->paddingBit = (buf[2] >> 1) & 0x01;
    767     m_FrameHeader->privateBit = (buf[2] >> 0) & 0x01;
    768     m_sMode = (StereoMode_t) ((buf[3] >> 6) & 0x03); /* maps to correct enum (see definition) */
    769     m_FrameHeader->modeExt = (buf[3] >> 4) & 0x03;
    770     m_FrameHeader->copyFlag = (buf[3] >> 3) & 0x01;
    771     m_FrameHeader->origFlag = (buf[3] >> 2) & 0x01;
    772     m_FrameHeader->emphasis = (buf[3] >> 0) & 0x03;
    773     /* check parameters to avoid indexing tables with bad values */
    774     if (m_FrameHeader->srIdx == 3 || m_FrameHeader->layer == 4 || m_FrameHeader->brIdx == 15) return -1;
    775     /* for readability (we reference sfBandTable many times in decoder) */
    776     m_SFBandTable = sfBandTable[m_MPEGVersion][m_FrameHeader->srIdx];
    777     if (m_sMode != Joint) /* just to be safe (dequant, stproc check fh->modeExt) */
    778         m_FrameHeader->modeExt = 0;
    779     /* init user-accessible data */
    780     m_MP3DecInfo->nChans = (m_sMode == Mono ? 1 : 2);
    781     m_MP3DecInfo->samprate = samplerateTab[m_MPEGVersion][m_FrameHeader->srIdx];
    782     m_MP3DecInfo->nGrans = (m_MPEGVersion == MPEG1 ? m_NGRANS_MPEG1 : m_NGRANS_MPEG2);
    783     m_MP3DecInfo->nGranSamps = ((int) samplesPerFrameTab[m_MPEGVersion][m_FrameHeader->layer - 1])/m_MP3DecInfo->nGrans;
    784     m_MP3DecInfo->layer = m_FrameHeader->layer;
    785 
    786     /* get bitrate and nSlots from table, unless brIdx == 0 (free mode) in which case caller must figure it out himself
    787      * question - do we want to overwrite mp3DecInfo->bitrate with 0 each time if it's free mode, and
    788      *  copy the pre-calculated actual free bitrate into it in mp3dec.c (according to the spec,
    789      *  this shouldn't be necessary, since it should be either all frames free or none free)
    790      */
    791     if (m_FrameHeader->brIdx) {
    792         m_MP3DecInfo->bitrate=((int) bitrateTab[m_MPEGVersion][m_FrameHeader->layer - 1][m_FrameHeader->brIdx]) * 1000;
    793         /* nSlots = total frame bytes (from table) - sideInfo bytes - header - CRC (if present) + pad (if present) */
    794         m_MP3DecInfo->nSlots= (int) slotTab[m_MPEGVersion][m_FrameHeader->srIdx][m_FrameHeader->brIdx]
    795                 - (int) sideBytesTab[m_MPEGVersion][(m_sMode == Mono ? 0 : 1)] - 4
    796                 - (m_FrameHeader->crc ? 2 : 0) + (m_FrameHeader->paddingBit ? 1 : 0);
    797     }
    798     /* load crc word, if enabled, and return length of frame header (in bytes) */
    799     if (m_FrameHeader->crc) {
    800         m_FrameHeader->CRCWord = ((int) buf[4] << 8 | (int) buf[5] << 0);
    801         return 6;
    802     } else {
    803         m_FrameHeader->CRCWord = 0;
    804         return 4;
    805     }
    806 }
    807 //----------------------------------------------------------------------------------------------------------------------
    808 int UnpackSideInfo( unsigned char *buf) {
    809     int gr, ch, bd, nBytes;
    810     BitStreamInfo_t bitStreamInfo, *bsi;
    811 
    812     SideInfoSub_t *sis;
    813     /* validate pointers and sync word */
    814     bsi = &bitStreamInfo;
    815     if (m_MPEGVersion == MPEG1) {
    816         /* MPEG 1 */
    817         nBytes=(m_sMode == Mono ? m_SIBYTES_MPEG1_MONO : m_SIBYTES_MPEG1_STEREO);
    818         SetBitstreamPointer(bsi, nBytes, buf);
    819         m_SideInfo->mainDataBegin = GetBits(bsi, 9);
    820         m_SideInfo->privateBits= GetBits(bsi, (m_sMode == Mono ? 5 : 3));
    821         for (ch = 0; ch < m_MP3DecInfo->nChans; ch++)
    822             for (bd = 0; bd < m_MAX_SCFBD; bd++) m_SideInfo->scfsi[ch][bd] = GetBits(bsi, 1);
    823     } else {
    824         /* MPEG 2, MPEG 2.5 */
    825         nBytes=(m_sMode == Mono ? m_SIBYTES_MPEG2_MONO : m_SIBYTES_MPEG2_STEREO);
    826         SetBitstreamPointer(bsi, nBytes, buf);
    827         m_SideInfo->mainDataBegin = GetBits(bsi, 8);
    828         m_SideInfo->privateBits = GetBits(bsi, (m_sMode == Mono ? 1 : 2));
    829     }
    830     for (gr = 0; gr < m_MP3DecInfo->nGrans; gr++) {
    831         for (ch = 0; ch < m_MP3DecInfo->nChans; ch++) {
    832             sis = &m_SideInfoSub[gr][ch]; /* side info subblock for this granule, channel */
    833             sis->part23Length = GetBits(bsi, 12);
    834             sis->nBigvals = GetBits(bsi, 9);
    835             sis->globalGain = GetBits(bsi, 8);
    836             sis->sfCompress = GetBits(bsi, (m_MPEGVersion == MPEG1 ? 4 : 9));
    837             sis->winSwitchFlag = GetBits(bsi, 1);
    838             if (sis->winSwitchFlag) {
    839                 /* this is a start, stop, short, or mixed block */
    840                 sis->blockType = GetBits(bsi, 2); /* 0 = normal, 1 = start, 2 = short, 3 = stop */
    841                 sis->mixedBlock = GetBits(bsi, 1); /* 0 = not mixed, 1 = mixed */
    842                 sis->tableSelect[0] = GetBits(bsi, 5);
    843                 sis->tableSelect[1] = GetBits(bsi, 5);
    844                 sis->tableSelect[2] = 0; /* unused */
    845                 sis->subBlockGain[0] = GetBits(bsi, 3);
    846                 sis->subBlockGain[1] = GetBits(bsi, 3);
    847                 sis->subBlockGain[2] = GetBits(bsi, 3);
    848                 if (sis->blockType == 0) {
    849                     /* this should not be allowed, according to spec */
    850                     sis->nBigvals = 0;
    851                     sis->part23Length = 0;
    852                     sis->sfCompress = 0;
    853                 } else if (sis->blockType == 2 && sis->mixedBlock == 0) {
    854                     /* short block, not mixed */
    855                     sis->region0Count = 8;
    856                 } else {
    857                     /* start, stop, or short-mixed */
    858                     sis->region0Count = 7;
    859                 }
    860                 sis->region1Count = 20 - sis->region0Count;
    861             } else {
    862                 /* this is a normal block */
    863                 sis->blockType = 0;
    864                 sis->mixedBlock = 0;
    865                 sis->tableSelect[0] = GetBits(bsi, 5);
    866                 sis->tableSelect[1] = GetBits(bsi, 5);
    867                 sis->tableSelect[2] = GetBits(bsi, 5);
    868                 sis->region0Count = GetBits(bsi, 4);
    869                 sis->region1Count = GetBits(bsi, 3);
    870             }
    871             sis->preFlag = (m_MPEGVersion == MPEG1 ? GetBits(bsi, 1) : 0);
    872             sis->sfactScale = GetBits(bsi, 1);
    873             sis->count1TableSelect = GetBits(bsi, 1);
    874         }
    875     }
    876     m_MP3DecInfo->mainDataBegin = m_SideInfo->mainDataBegin; /* needed by main decode loop */
    877     assert(nBytes == CalcBitsUsed(bsi, buf, 0) >> 3);
    878     return nBytes;
    879 }
    880 /***********************************************************************************************************************
    881  * Function:    UnpackSFMPEG1
    882  *
    883  * Description: unpack MPEG 1 scalefactors from bitstream
    884  *
    885  * Inputs:      BitStreamInfo, SideInfoSub, ScaleFactorInfoSub structs for this
    886  *                granule/channel
    887  *              vector of scfsi flags from side info, length = 4 (MAX_SCFBD)
    888  *              index of current granule
    889  *              ScaleFactorInfoSub from granule 0 (for granule 1, if scfsi[i] is set,
    890  *                then we just replicate the scale factors from granule 0 in the
    891  *                i'th set of scalefactor bands)
    892  *
    893  * Outputs:     updated BitStreamInfo struct
    894  *              scalefactors in sfis (short and/or long arrays, as appropriate)
    895  *
    896  * Return:      none
    897  *
    898  * Notes:       set order of short blocks to s[band][window] instead of s[window][band]
    899  *                so that we index through consectutive memory locations when unpacking
    900  *                (make sure dequantizer follows same convention)
    901  *              Illegal Intensity Position = 7 (always) for MPEG1 scale factors
    902  **********************************************************************************************************************/
    903 void UnpackSFMPEG1(BitStreamInfo_t *bsi, SideInfoSub_t *sis,
    904                    ScaleFactorInfoSub_t *sfis, int *scfsi, int gr, ScaleFactorInfoSub_t *sfisGr0){
    905     int sfb;
    906     int slen0, slen1;
    907     /* these can be 0, so make sure GetBits(bsi, 0) returns 0 (no >> 32 or anything) */
    908     slen0 = (int)m_SFLenTab[sis->sfCompress][0];
    909     slen1 = (int)m_SFLenTab[sis->sfCompress][1];
    910     if (sis->blockType == 2){
    911         /* short block, type 2 (implies winSwitchFlag == 1) */
    912         if (sis->mixedBlock){
    913             /* do long block portion */
    914             for(sfb = 0; sfb < 8; sfb++)
    915                 sfis->l[sfb]=(char)GetBits(bsi, slen0);
    916             sfb=3;
    917         }
    918         else {
    919             /* all short blocks */
    920             sfb=0;
    921         }
    922         for (      ; sfb < 6; sfb++){
    923             sfis->s[sfb][0] = (char)GetBits(bsi, slen0);
    924             sfis->s[sfb][1] = (char)GetBits(bsi, slen0);
    925             sfis->s[sfb][2] = (char)GetBits(bsi, slen0);
    926         }
    927         for (      ; sfb < 12; sfb++) {
    928             sfis->s[sfb][0] = (char)GetBits(bsi, slen1);
    929             sfis->s[sfb][1] = (char)GetBits(bsi, slen1);
    930             sfis->s[sfb][2] = (char)GetBits(bsi, slen1);
    931         }
    932         /* last sf band not transmitted */
    933         sfis->s[12][0] = sfis->s[12][1] = sfis->s[12][2] = 0;
    934     }
    935     else{
    936         /* long blocks, type 0, 1, or 3 */
    937         if(gr == 0) {
    938             /* first granule */
    939             for (sfb = 0;  sfb < 11; sfb++)
    940                 sfis->l[sfb] = (char)GetBits(bsi, slen0);
    941             for (sfb = 11; sfb < 21; sfb++)
    942                 sfis->l[sfb] = (char)GetBits(bsi, slen1);
    943             return;
    944         }
    945         else{
    946             /* second granule
    947              * scfsi: 0 = different scalefactors for each granule,
    948              *        1 = copy sf's from granule 0 into granule 1
    949              * for block type == 2, scfsi is always 0
    950              */
    951             sfb = 0;
    952             if(scfsi[0])  for(  ; sfb < 6 ; sfb++) sfis->l[sfb] = sfisGr0->l[sfb];
    953             else          for(  ; sfb < 6 ; sfb++) sfis->l[sfb] = (char)GetBits(bsi, slen0);
    954             if(scfsi[1])  for(  ; sfb <11 ; sfb++) sfis->l[sfb] = sfisGr0->l[sfb];
    955             else          for(  ; sfb <11 ; sfb++) sfis->l[sfb] = (char)GetBits(bsi, slen0);
    956             if(scfsi[2])  for(  ; sfb <16 ; sfb++) sfis->l[sfb] = sfisGr0->l[sfb];
    957             else          for(  ; sfb <16 ; sfb++) sfis->l[sfb] = (char)GetBits(bsi, slen1);
    958             if(scfsi[3])  for(  ; sfb <21 ; sfb++) sfis->l[sfb] = sfisGr0->l[sfb];
    959             else          for(  ; sfb <21 ; sfb++) sfis->l[sfb] = (char)GetBits(bsi, slen1);
    960         }
    961         /* last sf band not transmitted */
    962         sfis->l[21] = 0;
    963         sfis->l[22] = 0;
    964     }
    965 }
    966 /***********************************************************************************************************************
    967  * Function:    UnpackSFMPEG2
    968  *
    969  * Description: unpack MPEG 2 scalefactors from bitstream
    970  *
    971  * Inputs:      BitStreamInfo, SideInfoSub, ScaleFactorInfoSub structs for this
    972  *                granule/channel
    973  *              index of current granule and channel
    974  *              ScaleFactorInfoSub from this granule
    975  *              modeExt field from frame header, to tell whether intensity stereo is on
    976  *              ScaleFactorJS struct for storing IIP info used in Dequant()
    977  *
    978  * Outputs:     updated BitStreamInfo struct
    979  *              scalefactors in sfis (short and/or long arrays, as appropriate)
    980  *              updated intensityScale and preFlag flags
    981  *
    982  * Return:      none
    983  *
    984  * Notes:       Illegal Intensity Position = (2^slen) - 1 for MPEG2 scale factors
    985  **********************************************************************************************************************/
    986 void UnpackSFMPEG2(BitStreamInfo_t *bsi, SideInfoSub_t *sis,
    987                    ScaleFactorInfoSub_t *sfis, int gr, int ch, int modeExt, ScaleFactorJS_t *sfjs){
    988 
    989     int i, sfb, sfcIdx, btIdx, nrIdx;// iipTest;
    990     int slen[4], nr[4];
    991     int sfCompress, preFlag, intensityScale;
    992     (void)gr;
    993     sfCompress = sis->sfCompress;
    994     preFlag = 0;
    995     intensityScale = 0;
    996 
    997     /* stereo mode bits (1 = on): bit 1 = mid-side on/off, bit 0 = intensity on/off */
    998     if (! ((modeExt & 0x01) && (ch == 1)) ) {
    999         /* in other words: if ((modeExt & 0x01) == 0 || ch == 0) */
   1000         if (sfCompress < 400) {
   1001             /* max slen = floor[(399/16) / 5] = 4 */
   1002             slen[0] = (sfCompress >> 4) / 5;
   1003             slen[1]= (sfCompress >> 4) % 5;
   1004             slen[2]= (sfCompress & 0x0f) >> 2;
   1005             slen[3]= (sfCompress & 0x03);
   1006             sfcIdx = 0;
   1007         }
   1008         else if(sfCompress < 500){
   1009             /* max slen = floor[(99/4) / 5] = 4 */
   1010             sfCompress -= 400;
   1011             slen[0] = (sfCompress >> 2) / 5;
   1012             slen[1]= (sfCompress >> 2) % 5;
   1013             slen[2]= (sfCompress & 0x03);
   1014             slen[3]= 0;
   1015             sfcIdx = 1;
   1016         }
   1017         else{
   1018             /* max slen = floor[11/3] = 3 (sfCompress = 9 bits in MPEG2) */
   1019             sfCompress -= 500;
   1020             slen[0] = sfCompress / 3;
   1021             slen[1] = sfCompress % 3;
   1022             slen[2] = slen[3] = 0;
   1023             if (sis->mixedBlock) {
   1024                 /* adjust for long/short mix logic (see comment above in NRTab[] definition) */
   1025                 slen[2] = slen[1];
   1026                 slen[1] = slen[0];
   1027             }
   1028             preFlag = 1;
   1029             sfcIdx = 2;
   1030         }
   1031     }
   1032     else{
   1033         /* intensity stereo ch = 1 (right) */
   1034         intensityScale = sfCompress & 0x01;
   1035         sfCompress >>= 1;
   1036         if (sfCompress < 180) {
   1037             /* max slen = floor[35/6] = 5 (from mod 36) */
   1038             slen[0] = (sfCompress / 36);
   1039             slen[1] = (sfCompress % 36) / 6;
   1040             slen[2] = (sfCompress % 36) % 6;
   1041             slen[3] = 0;
   1042             sfcIdx = 3;
   1043         }
   1044         else if (sfCompress < 244){
   1045             /* max slen = floor[63/16] = 3 */
   1046             sfCompress -= 180;
   1047             slen[0] = (sfCompress & 0x3f) >> 4;
   1048             slen[1] = (sfCompress & 0x0f) >> 2;
   1049             slen[2] = (sfCompress & 0x03);
   1050             slen[3] = 0;
   1051             sfcIdx = 4;
   1052         }
   1053         else{
   1054             /* max slen = floor[11/3] = 3 (max sfCompress >> 1 = 511/2 = 255) */
   1055             sfCompress -= 244;
   1056             slen[0] = (sfCompress / 3);
   1057             slen[1] = (sfCompress % 3);
   1058             slen[2] = slen[3] = 0;
   1059             sfcIdx = 5;
   1060         }
   1061     }
   1062     /* set index based on block type: (0,1,3) --> 0, (2 non-mixed) --> 1, (2 mixed) ---> 2 */
   1063     btIdx = 0;
   1064     if (sis->blockType == 2)
   1065         btIdx = (sis->mixedBlock ? 2 : 1);
   1066     for (i = 0; i < 4; i++)
   1067         nr[i] = (int)NRTab[sfcIdx][btIdx][i];
   1068 
   1069     /* save intensity stereo scale factor info */
   1070     if( (modeExt & 0x01) && (ch == 1) ) {
   1071         for (i = 0; i < 4; i++) {
   1072             sfjs->slen[i] = slen[i];
   1073             sfjs->nr[i] = nr[i];
   1074         }
   1075         sfjs->intensityScale = intensityScale;
   1076     }
   1077     sis->preFlag = preFlag;
   1078 
   1079     /* short blocks */
   1080     if(sis->blockType == 2) {
   1081         if(sis->mixedBlock) {
   1082             /* do long block portion */
   1083             //iipTest = (1 << slen[0]) - 1;
   1084             for (sfb=0; sfb < 6; sfb++) {
   1085                 sfis->l[sfb] = (char)GetBits(bsi, slen[0]);
   1086             }
   1087             sfb = 3;  /* start sfb for short */
   1088             nrIdx = 1;
   1089         }
   1090         else{
   1091             /* all short blocks, so start nr, sfb at 0 */
   1092             sfb = 0;
   1093             nrIdx = 0;
   1094         }
   1095 
   1096         /* remaining short blocks, sfb just keeps incrementing */
   1097         for(    ; nrIdx <= 3; nrIdx++) {
   1098             //iipTest = (1 << slen[nrIdx]) - 1;
   1099             for(i=0; i < nr[nrIdx]; i++, sfb++) {
   1100                 sfis->s[sfb][0] = (char)GetBits(bsi, slen[nrIdx]);
   1101                 sfis->s[sfb][1] = (char)GetBits(bsi, slen[nrIdx]);
   1102                 sfis->s[sfb][2] = (char)GetBits(bsi, slen[nrIdx]);
   1103             }
   1104         }
   1105         /* last sf band not transmitted */
   1106         sfis->s[12][0] = sfis->s[12][1] = sfis->s[12][2] = 0;
   1107     }
   1108     else{
   1109         /* long blocks */
   1110         sfb = 0;
   1111         for (nrIdx = 0; nrIdx <= 3; nrIdx++) {
   1112             //iipTest = (1 << slen[nrIdx]) - 1;
   1113             for(i=0; i < nr[nrIdx]; i++, sfb++) {
   1114                 sfis->l[sfb] = (char)GetBits(bsi, slen[nrIdx]);
   1115             }
   1116         }
   1117         /* last sf band not transmitted */
   1118         sfis->l[21] = sfis->l[22] = 0;
   1119 
   1120     }
   1121 }
   1122 /***********************************************************************************************************************
   1123  * Function:    UnpackScaleFactors
   1124  *
   1125  * Description: parse the fields of the MP3 scale factor data section
   1126  *
   1127  * Inputs:      MP3DecInfo structure filled by UnpackFrameHeader() and UnpackSideInfo()
   1128  *              buffer pointing to the MP3 scale factor data
   1129  *              pointer to bit offset (0-7) indicating starting bit in buf[0]
   1130  *              number of bits available in data buffer
   1131  *              index of current granule and channel
   1132  *
   1133  * Outputs:     updated platform-specific ScaleFactorInfo struct
   1134  *              updated bitOffset
   1135  *
   1136  * Return:      length (in bytes) of scale factor data, -1 if null input pointers
   1137  **********************************************************************************************************************/
   1138 int UnpackScaleFactors( unsigned char *buf, int *bitOffset, int bitsAvail, int gr, int ch){
   1139     int bitsUsed;
   1140     unsigned char *startBuf;
   1141     BitStreamInfo_t bitStreamInfo, *bsi;
   1142 
   1143     /* init GetBits reader */
   1144     startBuf = buf;
   1145     bsi = &bitStreamInfo;
   1146     SetBitstreamPointer(bsi, (bitsAvail + *bitOffset + 7) / 8, buf);
   1147     if (*bitOffset)
   1148         GetBits(bsi, *bitOffset);
   1149 
   1150     if (m_MPEGVersion == MPEG1)
   1151         UnpackSFMPEG1(bsi, &m_SideInfoSub[gr][ch], &m_ScaleFactorInfoSub[gr][ch],
   1152                       m_SideInfo->scfsi[ch], gr, &m_ScaleFactorInfoSub[0][ch]);
   1153     else
   1154         UnpackSFMPEG2(bsi, &m_SideInfoSub[gr][ch], &m_ScaleFactorInfoSub[gr][ch],
   1155                       gr, ch, m_FrameHeader->modeExt, m_ScaleFactorJS);
   1156 
   1157     m_MP3DecInfo->part23Length[gr][ch] = m_SideInfoSub[gr][ch].part23Length;
   1158 
   1159     bitsUsed = CalcBitsUsed(bsi, buf, *bitOffset);
   1160     buf += (bitsUsed + *bitOffset) >> 3;
   1161     *bitOffset = (bitsUsed + *bitOffset) & 0x07;
   1162 
   1163     return (buf - startBuf);
   1164 }
   1165 /***********************************************************************************************************************
   1166  * M P 3 D E C
   1167  **********************************************************************************************************************/
   1168 
   1169 /***********************************************************************************************************************
   1170  * Function:    MP3FindSyncWord
   1171  *
   1172  * Description: locate the next byte-alinged sync word in the raw mp3 stream
   1173  *
   1174  * Inputs:      buffer to search for sync word
   1175  *              max number of bytes to search in buffer
   1176  *
   1177  * Outputs:     none
   1178  *
   1179  * Return:      offset to first sync word (bytes from start of buf)
   1180  *              -1 if sync not found after searching nBytes
   1181  **********************************************************************************************************************/
   1182 int MP3FindSyncWord(unsigned char *buf, int nBytes) {
   1183     int i;
   1184 
   1185     /* find byte-aligned syncword - need 12 (MPEG 1,2) or 11 (MPEG 2.5) matching bits */
   1186     for (i = 0; i < nBytes - 1; i++) {
   1187         if ((buf[i + 0] & m_SYNCWORDH) == m_SYNCWORDH
   1188                 && (buf[i + 1] & m_SYNCWORDL) == m_SYNCWORDL)
   1189             return i;
   1190     }
   1191 
   1192     return -1;
   1193 }
   1194 /***********************************************************************************************************************
   1195  * Function:    MP3FindFreeSync
   1196  *
   1197  * Description: figure out number of bytes between adjacent sync words in "free" mode
   1198  *
   1199  * Inputs:      buffer to search for next sync word
   1200  *              the 4-byte frame header starting at the current sync word
   1201  *              max number of bytes to search in buffer
   1202  *
   1203  * Outputs:     none
   1204  *
   1205  * Return:      offset to next sync word, minus any pad byte (i.e. nSlots)
   1206  *              -1 if sync not found after searching nBytes
   1207  *
   1208  * Notes:       this checks that the first 22 bits of the next frame header are the
   1209  *                same as the current frame header, but it's still not foolproof
   1210  *                (could accidentally find a sequence in the bitstream which
   1211  *                 appears to match but is not actually the next frame header)
   1212  *              this could be made more error-resilient by checking several frames
   1213  *                in a row and verifying that nSlots is the same in each case
   1214  *              since free mode requires CBR (see spec) we generally only call
   1215  *                this function once (first frame) then store the result (nSlots)
   1216  *                and just use it from then on
   1217  **********************************************************************************************************************/
   1218 int MP3FindFreeSync(unsigned char *buf, unsigned char firstFH[4], int nBytes){
   1219     int offset = 0;
   1220     unsigned char *bufPtr = buf;
   1221 
   1222     /* loop until we either:
   1223      *  - run out of nBytes (FindMP3SyncWord() returns -1)
   1224      *  - find the next valid frame header (sync word, version, layer, CRC flag, bitrate, and sample rate
   1225      *      in next header must match current header)
   1226      */
   1227     while (1) {
   1228         offset = MP3FindSyncWord(bufPtr, nBytes);
   1229         bufPtr += offset;
   1230         if (offset < 0) {
   1231             return -1;
   1232         } else if ((bufPtr[0] == firstFH[0]) && (bufPtr[1] == firstFH[1])
   1233                 && ((bufPtr[2] & 0xfc) == (firstFH[2] & 0xfc))) {
   1234             /* want to return number of bytes per frame,
   1235              * NOT counting the padding byte, so subtract one if padFlag == 1 */
   1236             if ((firstFH[2] >> 1) & 0x01)
   1237                 bufPtr--;
   1238             return bufPtr - buf;
   1239         }
   1240         bufPtr += 3;
   1241         nBytes -= (offset + 3);
   1242     };
   1243 
   1244     return -1;
   1245 }
   1246 /***********************************************************************************************************************
   1247  * Function:    MP3GetLastFrameInfo
   1248  *
   1249  * Description: get info about last MP3 frame decoded (number of sampled decoded,
   1250  *                sample rate, bitrate, etc.)
   1251  *
   1252  * Inputs:
   1253  *
   1254  * Outputs:     filled-in MP3FrameInfo struct
   1255  *
   1256  * Return:      none
   1257  *
   1258  * Notes:       call this right after calling MP3Decode
   1259  **********************************************************************************************************************/
   1260 void MP3GetLastFrameInfo() {
   1261     if (m_MP3DecInfo->layer != 3){
   1262         m_MP3FrameInfo->bitrate=0;
   1263         m_MP3FrameInfo->nChans=0;
   1264         m_MP3FrameInfo->samprate=0;
   1265         m_MP3FrameInfo->bitsPerSample=0;
   1266         m_MP3FrameInfo->outputSamps=0;
   1267         m_MP3FrameInfo->layer=0;
   1268         m_MP3FrameInfo->version=0;
   1269     }
   1270     else{
   1271         m_MP3FrameInfo->bitrate=m_MP3DecInfo->bitrate;
   1272         m_MP3FrameInfo->nChans=m_MP3DecInfo->nChans;
   1273         m_MP3FrameInfo->samprate=m_MP3DecInfo->samprate;
   1274         m_MP3FrameInfo->bitsPerSample=16;
   1275         m_MP3FrameInfo->outputSamps=m_MP3DecInfo->nChans
   1276                 * (int) samplesPerFrameTab[m_MPEGVersion][m_MP3DecInfo->layer-1];
   1277         m_MP3FrameInfo->layer=m_MP3DecInfo->layer;
   1278         m_MP3FrameInfo->version=m_MPEGVersion;
   1279     }
   1280 }
   1281 int MP3GetSampRate(){return m_MP3FrameInfo->samprate;}
   1282 int MP3GetChannels(){return m_MP3FrameInfo->nChans;}
   1283 int MP3GetBitsPerSample(){return m_MP3FrameInfo->bitsPerSample;}
   1284 int MP3GetBitrate(){return m_MP3FrameInfo->bitrate;}
   1285 int MP3GetOutputSamps(){return m_MP3FrameInfo->outputSamps;}
   1286 /***********************************************************************************************************************
   1287  * Function:    MP3GetNextFrameInfo
   1288  *
   1289  * Description: parse MP3 frame header
   1290  *
   1291  * Inputs:        pointer to buffer containing valid MP3 frame header (located using
   1292  *                MP3FindSyncWord(), above)
   1293  *
   1294  * Outputs:     filled-in MP3FrameInfo struct
   1295  *
   1296  * Return:      error code, defined in mp3dec.h (0 means no error, < 0 means error)
   1297  **********************************************************************************************************************/
   1298 int MP3GetNextFrameInfo(unsigned char *buf) {
   1299 
   1300     if (UnpackFrameHeader( buf) == -1 || m_MP3DecInfo->layer != 3)
   1301         return ERR_MP3_INVALID_FRAMEHEADER;
   1302 
   1303     MP3GetLastFrameInfo();
   1304 
   1305     return ERR_MP3_NONE;
   1306 }
   1307 /***********************************************************************************************************************
   1308  * Function:    MP3ClearBadFrame
   1309  *
   1310  * Description: zero out pcm buffer if error decoding MP3 frame
   1311  *
   1312  * Inputs:      mp3DecInfo struct with correct frame size parameters filled in
   1313  *              pointer pcm output buffer
   1314  *
   1315  * Outputs:     zeroed out pcm buffer
   1316  *
   1317  * Return:      none
   1318  **********************************************************************************************************************/
   1319 void MP3ClearBadFrame( short *outbuf) {
   1320     int i;
   1321     for (i = 0; i < m_MP3DecInfo->nGrans * m_MP3DecInfo->nGranSamps * m_MP3DecInfo->nChans; i++)
   1322         outbuf[i] = 0;
   1323 }
   1324 /***********************************************************************************************************************
   1325  * Function:    MP3Decode
   1326  *
   1327  * Description: decode one frame of MP3 data
   1328  *
   1329  * Inputs:      number of valid bytes remaining in inbuf
   1330  *              pointer to outbuf, big enough to hold one frame of decoded PCM samples
   1331  *              flag indicating whether MP3 data is normal MPEG format (useSize = 0)
   1332  *              or reformatted as "self-contained" frames (useSize = 1)
   1333  *
   1334  * Outputs:     PCM data in outbuf, interleaved LRLRLR... if stereo
   1335  *              number of output samples = nGrans * nGranSamps * nChans
   1336  *              updated inbuf pointer, updated bytesLeft
   1337  *
   1338  * Return:      error code, defined in mp3dec.h (0 means no error, < 0 means error)
   1339  *
   1340  * Notes:       switching useSize on and off between frames in the same stream
   1341  *                is not supported (bit reservoir is not maintained if useSize on)
   1342  **********************************************************************************************************************/
   1343 int MP3Decode( unsigned char *inbuf, int *bytesLeft, short *outbuf, int useSize){
   1344     int offset, bitOffset, mainBits, gr, ch, fhBytes, siBytes, freeFrameBytes;
   1345     int prevBitOffset, sfBlockBits, huffBlockBits;
   1346     unsigned char *mainPtr;
   1347 
   1348     /* unpack frame header */
   1349     fhBytes = UnpackFrameHeader(inbuf);
   1350     if (fhBytes < 0)
   1351         return ERR_MP3_INVALID_FRAMEHEADER; /* don't clear outbuf since we don't know size (failed to parse header) */
   1352     inbuf += fhBytes;
   1353     /* unpack side info */
   1354     siBytes = UnpackSideInfo( inbuf);
   1355     if (siBytes < 0) {
   1356         MP3ClearBadFrame(outbuf);
   1357         return ERR_MP3_INVALID_SIDEINFO;
   1358     }
   1359     inbuf += siBytes;
   1360     *bytesLeft -= (fhBytes + siBytes);
   1361 
   1362     /* if free mode, need to calculate bitrate and nSlots manually, based on frame size */
   1363     if (m_MP3DecInfo->bitrate == 0 || m_MP3DecInfo->freeBitrateFlag) {
   1364         if(!m_MP3DecInfo->freeBitrateFlag){
   1365             /* first time through, need to scan for next sync word and figure out frame size */
   1366             m_MP3DecInfo->freeBitrateFlag=1;
   1367             m_MP3DecInfo->freeBitrateSlots=MP3FindFreeSync(inbuf, inbuf - fhBytes - siBytes, *bytesLeft);
   1368             if(m_MP3DecInfo->freeBitrateSlots < 0){
   1369                 MP3ClearBadFrame(outbuf);
   1370                 m_MP3DecInfo->freeBitrateFlag = 0;
   1371                 return ERR_MP3_FREE_BITRATE_SYNC;
   1372             }
   1373             freeFrameBytes=m_MP3DecInfo->freeBitrateSlots + fhBytes + siBytes;
   1374             m_MP3DecInfo->bitrate=(freeFrameBytes * m_MP3DecInfo->samprate * 8)
   1375                     / (m_MP3DecInfo->nGrans * m_MP3DecInfo->nGranSamps);
   1376         }
   1377         m_MP3DecInfo->nSlots = m_MP3DecInfo->freeBitrateSlots + CheckPadBit(); /* add pad byte, if required */
   1378     }
   1379 
   1380     /* useSize != 0 means we're getting reformatted (RTP) packets (see RFC 3119)
   1381      *  - calling function assembles "self-contained" MP3 frames by shifting any main_data
   1382      *      from the bit reservoir (in previous frames) to AFTER the sync word and side info
   1383      *  - calling function should set mainDataBegin to 0, and tell us exactly how large this
   1384      *      frame is (in bytesLeft)
   1385      */
   1386     if (useSize) {
   1387         m_MP3DecInfo->nSlots = *bytesLeft;
   1388         if (m_MP3DecInfo->mainDataBegin != 0 || m_MP3DecInfo->nSlots <= 0) {
   1389             /* error - non self-contained frame, or missing frame (size <= 0), could do loss concealment here */
   1390             MP3ClearBadFrame(outbuf);
   1391             return ERR_MP3_INVALID_FRAMEHEADER;
   1392         }
   1393 
   1394         /* can operate in-place on reformatted frames */
   1395         m_MP3DecInfo->mainDataBytes = m_MP3DecInfo->nSlots;
   1396         mainPtr = inbuf;
   1397         inbuf += m_MP3DecInfo->nSlots;
   1398         *bytesLeft -= (m_MP3DecInfo->nSlots);
   1399     } else {
   1400         /* out of data - assume last or truncated frame */
   1401         if (m_MP3DecInfo->nSlots > *bytesLeft) {
   1402             MP3ClearBadFrame(outbuf);
   1403             return ERR_MP3_INDATA_UNDERFLOW;
   1404         }
   1405         /* fill main data buffer with enough new data for this frame */
   1406         if (m_MP3DecInfo->mainDataBytes >= m_MP3DecInfo->mainDataBegin) {
   1407             /* adequate "old" main data available (i.e. bit reservoir) */
   1408             memmove(m_MP3DecInfo->mainBuf,
   1409                     m_MP3DecInfo->mainBuf + m_MP3DecInfo->mainDataBytes - m_MP3DecInfo->mainDataBegin,
   1410                     m_MP3DecInfo->mainDataBegin);
   1411             memcpy (m_MP3DecInfo->mainBuf + m_MP3DecInfo->mainDataBegin, inbuf,
   1412                     m_MP3DecInfo->nSlots);
   1413 
   1414             m_MP3DecInfo->mainDataBytes = m_MP3DecInfo->mainDataBegin + m_MP3DecInfo->nSlots;
   1415             inbuf += m_MP3DecInfo->nSlots;
   1416             *bytesLeft -= (m_MP3DecInfo->nSlots);
   1417             mainPtr = m_MP3DecInfo->mainBuf;
   1418         } else {
   1419             /* not enough data in bit reservoir from previous frames (perhaps starting in middle of file) */
   1420             memcpy(m_MP3DecInfo->mainBuf + m_MP3DecInfo->mainDataBytes, inbuf, m_MP3DecInfo->nSlots);
   1421             m_MP3DecInfo->mainDataBytes += m_MP3DecInfo->nSlots;
   1422             inbuf += m_MP3DecInfo->nSlots;
   1423             *bytesLeft -= (m_MP3DecInfo->nSlots);
   1424             MP3ClearBadFrame( outbuf);
   1425             return ERR_MP3_MAINDATA_UNDERFLOW;
   1426         }
   1427     }
   1428     bitOffset = 0;
   1429     mainBits = m_MP3DecInfo->mainDataBytes * 8;
   1430 
   1431     /* decode one complete frame */
   1432     for (gr = 0; gr < m_MP3DecInfo->nGrans; gr++) {
   1433         for (ch = 0; ch < m_MP3DecInfo->nChans; ch++) {
   1434             /* unpack scale factors and compute size of scale factor block */
   1435             prevBitOffset = bitOffset;
   1436             offset = UnpackScaleFactors( mainPtr, &bitOffset,
   1437                     mainBits, gr, ch);
   1438             sfBlockBits = 8 * offset - prevBitOffset + bitOffset;
   1439             huffBlockBits = m_MP3DecInfo->part23Length[gr][ch] - sfBlockBits;
   1440             mainPtr += offset;
   1441             mainBits -= sfBlockBits;
   1442 
   1443             if (offset < 0 || mainBits < huffBlockBits) {
   1444                 MP3ClearBadFrame(outbuf);
   1445                 return ERR_MP3_INVALID_SCALEFACT;
   1446             }
   1447             /* decode Huffman code words */
   1448             prevBitOffset = bitOffset;
   1449             offset = DecodeHuffman( mainPtr, &bitOffset, huffBlockBits, gr, ch);
   1450             if (offset < 0) {
   1451                 MP3ClearBadFrame( outbuf);
   1452                 return ERR_MP3_INVALID_HUFFCODES;
   1453             }
   1454             mainPtr += offset;
   1455             mainBits -= (8 * offset - prevBitOffset + bitOffset);
   1456         }
   1457         /* dequantize coefficients, decode stereo, reorder short blocks */
   1458         if (MP3Dequantize( gr) < 0) {
   1459             MP3ClearBadFrame(outbuf);
   1460             return ERR_MP3_INVALID_DEQUANTIZE;
   1461         }
   1462 
   1463         /* alias reduction, inverse MDCT, overlap-add, frequency inversion */
   1464         for (ch = 0; ch < m_MP3DecInfo->nChans; ch++) {
   1465             if (IMDCT( gr, ch) < 0) {
   1466                 MP3ClearBadFrame(outbuf);
   1467                 return ERR_MP3_INVALID_IMDCT;
   1468             }
   1469         }
   1470         /* subband transform - if stereo, interleaves pcm LRLRLR */
   1471         if (Subband(
   1472                 outbuf + gr * m_MP3DecInfo->nGranSamps * m_MP3DecInfo->nChans)
   1473                 < 0) {
   1474             MP3ClearBadFrame(outbuf);
   1475             return ERR_MP3_INVALID_SUBBAND;
   1476         }
   1477     }
   1478     MP3GetLastFrameInfo();
   1479     return ERR_MP3_NONE;
   1480 }
   1481 
   1482 /***********************************************************************************************************************
   1483  * Function:    MP3Decoder_ClearBuffer
   1484  *
   1485  * Description: clear all the memory needed for the MP3 decoder
   1486  *
   1487  * Inputs:      none
   1488  *
   1489  * Outputs:     none
   1490  *
   1491  * Return:      none
   1492  *
   1493  **********************************************************************************************************************/
   1494 void MP3Decoder_ClearBuffer(void) {
   1495 
   1496     /* important to do this - DSP primitives assume a bunch of state variables are 0 on first use */
   1497     memset( m_MP3DecInfo,         0, sizeof(MP3DecInfo_t));                                    //Clear MP3DecInfo
   1498     memset(&m_ScaleFactorInfoSub, 0, sizeof(ScaleFactorInfoSub_t)*(m_MAX_NGRAN *m_MAX_NCHAN)); //Clear ScaleFactorInfo
   1499     memset( m_SideInfo,           0, sizeof(SideInfo_t));                                      //Clear SideInfo
   1500     memset( m_FrameHeader,        0, sizeof(FrameHeader_t));                                   //Clear FrameHeader
   1501     memset( m_HuffmanInfo,        0, sizeof(HuffmanInfo_t));                                   //Clear HuffmanInfo
   1502     memset( m_DequantInfo,        0, sizeof(DequantInfo_t));                                   //Clear DequantInfo
   1503     memset( m_IMDCTInfo,          0, sizeof(IMDCTInfo_t));                                     //Clear IMDCTInfo
   1504     memset( m_SubbandInfo,        0, sizeof(SubbandInfo_t));                                   //Clear SubbandInfo
   1505     memset(&m_CriticalBandInfo,   0, sizeof(CriticalBandInfo_t)*m_MAX_NCHAN);                  //Clear CriticalBandInfo
   1506     memset( m_ScaleFactorJS,      0, sizeof(ScaleFactorJS_t));                                 //Clear ScaleFactorJS
   1507     memset(&m_SideInfoSub,        0, sizeof(SideInfoSub_t)*(m_MAX_NGRAN *m_MAX_NCHAN));        //Clear SideInfoSub
   1508     memset(&m_SFBandTable,        0, sizeof(SFBandTable_t));                                   //Clear SFBandTable
   1509     memset( m_MP3FrameInfo,       0, sizeof(MP3FrameInfo_t));                                  //Clear MP3FrameInfo
   1510 
   1511     return;
   1512 
   1513 }
   1514 /***********************************************************************************************************************
   1515  * Function:    MP3Decoder_AllocateBuffers
   1516  *
   1517  * Description: allocate all the memory needed for the MP3 decoder
   1518  *
   1519  * Inputs:      none
   1520  *
   1521  * Outputs:     none
   1522  *
   1523  * Return:      pointer to MP3DecInfo structure (initialized with pointers to all
   1524  *                the internal buffers needed for decoding)
   1525  *
   1526  * Notes:       if one or more mallocs fail, function frees any buffers already
   1527  *                allocated before returning
   1528  *
   1529  **********************************************************************************************************************/
   1530 
   1531 #ifdef CONFIG_IDF_TARGET_ESP32S3
   1532     // ESP32-S3: If there is PSRAM, prefer it
   1533     #define __malloc_heap_psram(size) \
   1534         heap_caps_malloc_prefer(size, 2, MALLOC_CAP_DEFAULT|MALLOC_CAP_SPIRAM, MALLOC_CAP_DEFAULT|MALLOC_CAP_INTERNAL)
   1535 #else
   1536     // ESP32, PSRAM is too slow, prefer SRAM
   1537     #define __malloc_heap_psram(size) \
   1538         heap_caps_malloc_prefer(size, 2, MALLOC_CAP_DEFAULT|MALLOC_CAP_INTERNAL, MALLOC_CAP_DEFAULT|MALLOC_CAP_SPIRAM)
   1539 #endif
   1540 
   1541 bool MP3Decoder_AllocateBuffers(void) {
   1542     if(!m_MP3DecInfo)       {m_MP3DecInfo    = (MP3DecInfo_t*)    __malloc_heap_psram(sizeof(MP3DecInfo_t)   );}
   1543     if(!m_FrameHeader)      {m_FrameHeader   = (FrameHeader_t*)   __malloc_heap_psram(sizeof(FrameHeader_t)  );}
   1544     if(!m_SideInfo)         {m_SideInfo      = (SideInfo_t*)      __malloc_heap_psram(sizeof(SideInfo_t)     );}
   1545     if(!m_ScaleFactorJS)    {m_ScaleFactorJS = (ScaleFactorJS_t*) __malloc_heap_psram(sizeof(ScaleFactorJS_t));}
   1546     if(!m_HuffmanInfo)      {m_HuffmanInfo   = (HuffmanInfo_t*)   __malloc_heap_psram(sizeof(HuffmanInfo_t)  );}
   1547     if(!m_DequantInfo)      {m_DequantInfo   = (DequantInfo_t*)   __malloc_heap_psram(sizeof(DequantInfo_t)  );}
   1548     if(!m_IMDCTInfo)        {m_IMDCTInfo     = (IMDCTInfo_t*)     __malloc_heap_psram(sizeof(IMDCTInfo_t)    );}
   1549     if(!m_SubbandInfo)      {m_SubbandInfo   = (SubbandInfo_t*)   __malloc_heap_psram(sizeof(SubbandInfo_t)  );}
   1550     if(!m_MP3FrameInfo)     {m_MP3FrameInfo  = (MP3FrameInfo_t*)  __malloc_heap_psram(sizeof(MP3FrameInfo_t) );}
   1551 
   1552     if(!m_MP3DecInfo || !m_FrameHeader || !m_SideInfo || !m_ScaleFactorJS || !m_HuffmanInfo ||
   1553        !m_DequantInfo || !m_IMDCTInfo || !m_SubbandInfo || !m_MP3FrameInfo) {
   1554         MP3Decoder_FreeBuffers();
   1555         log_e("not enough memory to allocate mp3decoder buffers");
   1556         return false;
   1557     }
   1558     MP3Decoder_ClearBuffer();
   1559     return true;
   1560 }
   1561 /***********************************************************************************************************************
   1562  * Function:    MP3Decoder_FreeBuffers
   1563  *
   1564  * Description: frees all the memory used by the MP3 decoder
   1565  *
   1566  * Inputs:      pointer to initialized MP3DecInfo structure
   1567  *
   1568  * Outputs:     none
   1569  *
   1570  * Return:      none
   1571  *
   1572  * Notes:       safe to call even if some buffers were not allocated
   1573  **********************************************************************************************************************/
   1574 void MP3Decoder_FreeBuffers()
   1575 {
   1576 //    uint32_t i = ESP.getFreeHeap();
   1577 
   1578     if(m_MP3DecInfo)        {free(m_MP3DecInfo);      m_MP3DecInfo=NULL;}
   1579     if(m_FrameHeader)       {free(m_FrameHeader);     m_FrameHeader=NULL;}
   1580     if(m_SideInfo)          {free(m_SideInfo);        m_SideInfo=NULL;}
   1581     if(m_ScaleFactorJS )    {free(m_ScaleFactorJS);   m_ScaleFactorJS=NULL;}
   1582     if(m_HuffmanInfo)       {free(m_HuffmanInfo);     m_HuffmanInfo=NULL;}
   1583     if(m_DequantInfo)       {free(m_DequantInfo);     m_DequantInfo=0;}
   1584     if(m_IMDCTInfo)         {free(m_IMDCTInfo);       m_IMDCTInfo=0;}
   1585     if(m_SubbandInfo)       {free(m_SubbandInfo);     m_SubbandInfo=0;}
   1586     if(m_MP3FrameInfo)      {free(m_MP3FrameInfo);    m_MP3FrameInfo=0;}
   1587 
   1588 //    log_i("MP3Decoder: %lu bytes memory was freed", ESP.getFreeHeap() - i);
   1589 }
   1590 
   1591 /***********************************************************************************************************************
   1592  * H U F F M A N N
   1593  **********************************************************************************************************************/
   1594 
   1595 /***********************************************************************************************************************
   1596  * Function:    DecodeHuffmanPairs
   1597  *
   1598  * Description: decode 2-way vector Huffman codes in the "bigValues" region of spectrum
   1599  *
   1600  * Inputs:      valid BitStreamInfo struct, pointing to start of pair-wise codes
   1601  *              pointer to xy buffer to received decoded values
   1602  *              number of codewords to decode
   1603  *              index of Huffman table to use
   1604  *              number of bits remaining in bitstream
   1605  *
   1606  * Outputs:     pairs of decoded coefficients in vwxy
   1607  *              updated BitStreamInfo struct
   1608  *
   1609  * Return:      number of bits used, or -1 if out of bits
   1610  *
   1611  * Notes:       assumes that nVals is an even number
   1612  *              si_huff.bit tests every Huffman codeword in every table (though not
   1613  *                necessarily all linBits outputs for x,y > 15)
   1614  **********************************************************************************************************************/
   1615 // no improvement with section=data
   1616 int DecodeHuffmanPairs(int *xy, int nVals, int tabIdx, int bitsLeft, unsigned char *buf, int bitOffset){
   1617     int i, x, y;
   1618     int cachedBits, padBits, len, startBits, linBits, maxBits, minBits;
   1619     HuffTabType_t tabType;
   1620     unsigned short cw, *tBase, *tCurr;
   1621     unsigned int cache;
   1622 
   1623     if (nVals <= 0)
   1624         return 0;
   1625 
   1626     if (bitsLeft < 0)
   1627         return -1;
   1628     startBits = bitsLeft;
   1629 
   1630     tBase = (unsigned short *) (huffTable + huffTabOffset[tabIdx]);
   1631     linBits = huffTabLookup[tabIdx].linBits;
   1632     tabType = (HuffTabType_t)huffTabLookup[tabIdx].tabType;
   1633 
   1634 //    assert(!(nVals & 0x01));
   1635 //    assert(tabIdx < m_HUFF_PAIRTABS);
   1636 //    assert(tabIdx >= 0);
   1637 //    assert(tabType != invalidTab);
   1638 
   1639     if((nVals & 0x01)){log_i("assert(!(nVals & 0x01))"); return -1;}
   1640     if(!(tabIdx < m_HUFF_PAIRTABS)){log_i("assert(tabIdx < m_HUFF_PAIRTABS)"); return -1;}
   1641     if(!(tabIdx >= 0)){log_i("(tabIdx >= 0)"); return -1;}
   1642     if(!(tabType != invalidTab)){log_i("(tabType != invalidTab)"); return -1;}
   1643 
   1644 
   1645     /* initially fill cache with any partial byte */
   1646     cache = 0;
   1647     cachedBits = (8 - bitOffset) & 0x07;
   1648     if (cachedBits)
   1649         cache = (unsigned int) (*buf++) << (32 - cachedBits);
   1650     bitsLeft -= cachedBits;
   1651 
   1652     if (tabType == noBits) {
   1653         /* table 0, no data, x = y = 0 */
   1654         for (i = 0; i < nVals; i += 2) {
   1655             xy[i + 0] = 0;
   1656             xy[i + 1] = 0;
   1657         }
   1658         return 0;
   1659     } else if (tabType == oneShot) {
   1660         /* single lookup, no escapes */
   1661 
   1662         maxBits = (int)( (((unsigned short)(pgm_read_word(&tBase[0])) >>  0) & 0x000f));
   1663         tBase++;
   1664         padBits = 0;
   1665         while (nVals > 0) {
   1666             /* refill cache - assumes cachedBits <= 16 */
   1667             if (bitsLeft >= 16) {
   1668                 /* load 2 new bytes into left-justified cache */
   1669                 cache |= (unsigned int) (*buf++) << (24 - cachedBits);
   1670                 cache |= (unsigned int) (*buf++) << (16 - cachedBits);
   1671                 cachedBits += 16;
   1672                 bitsLeft -= 16;
   1673             } else {
   1674                 /* last time through, pad cache with zeros and drain cache */
   1675                 if (cachedBits + bitsLeft <= 0)
   1676                     return -1;
   1677                 if (bitsLeft > 0)
   1678                     cache |= (unsigned int) (*buf++) << (24 - cachedBits);
   1679                 if (bitsLeft > 8)
   1680                     cache |= (unsigned int) (*buf++) << (16 - cachedBits);
   1681                 cachedBits += bitsLeft;
   1682                 bitsLeft = 0;
   1683 
   1684                 cache &= (signed int) 0x80000000 >> (cachedBits - 1);
   1685                 padBits = 11;
   1686                 cachedBits += padBits; /* okay if this is > 32 (0's automatically shifted in from right) */
   1687             }
   1688 
   1689             /* largest maxBits = 9, plus 2 for sign bits, so make sure cache has at least 11 bits */
   1690             while (nVals > 0 && cachedBits >= 11) {
   1691                 cw = pgm_read_word(&tBase[cache >> (32 - maxBits)]);
   1692 
   1693                 len=(int)( (((unsigned short)(cw)) >> 12) & 0x000f);
   1694                 cachedBits -= len;
   1695                 cache <<= len;
   1696 
   1697                 x=(int)( (((unsigned short)(cw)) >>  4) & 0x000f);
   1698                 if (x) {
   1699                     (x) |= ((cache) & 0x80000000);
   1700                     cache <<= 1;
   1701                     cachedBits--;
   1702                 }
   1703 
   1704 
   1705 
   1706                 y=(int)( (((unsigned short)(cw)) >>  8) & 0x000f);
   1707                 if (y) {
   1708                     (y) |= ((cache) & 0x80000000);
   1709                     cache <<= 1;
   1710                     cachedBits--;
   1711                 }
   1712 
   1713                 /* ran out of bits - should never have consumed padBits */
   1714                 if (cachedBits < padBits)
   1715                     return -1;
   1716 
   1717                 *xy++ = x;
   1718                 *xy++ = y;
   1719                 nVals -= 2;
   1720             }
   1721         }
   1722         bitsLeft += (cachedBits - padBits);
   1723         return (startBits - bitsLeft);
   1724     } else if (tabType == loopLinbits || tabType == loopNoLinbits) {
   1725         tCurr = tBase;
   1726         padBits = 0;
   1727         while (nVals > 0) {
   1728             /* refill cache - assumes cachedBits <= 16 */
   1729             if (bitsLeft >= 16) {
   1730                 /* load 2 new bytes into left-justified cache */
   1731                 cache |= (unsigned int) (*buf++) << (24 - cachedBits);
   1732                 cache |= (unsigned int) (*buf++) << (16 - cachedBits);
   1733                 cachedBits += 16;
   1734                 bitsLeft -= 16;
   1735             } else {
   1736                 /* last time through, pad cache with zeros and drain cache */
   1737                 if (cachedBits + bitsLeft <= 0)
   1738                     return -1;
   1739                 if (bitsLeft > 0)
   1740                     cache |= (unsigned int) (*buf++) << (24 - cachedBits);
   1741                 if (bitsLeft > 8)
   1742                     cache |= (unsigned int) (*buf++) << (16 - cachedBits);
   1743                 cachedBits += bitsLeft;
   1744                 bitsLeft = 0;
   1745 
   1746                 cache &= (signed int) 0x80000000 >> (cachedBits - 1);
   1747                 padBits = 11;
   1748                 cachedBits += padBits; /* okay if this is > 32 (0's automatically shifted in from right) */
   1749             }
   1750 
   1751             /* largest maxBits = 9, plus 2 for sign bits, so make sure cache has at least 11 bits */
   1752             while (nVals > 0 && cachedBits >= 11) {
   1753                 maxBits = (int)( (((unsigned short)(pgm_read_word(&tCurr[0]))) >>  0) & 0x000f);
   1754                 cw = pgm_read_word(&tCurr[(cache >> (32 - maxBits)) + 1]);
   1755                 len=(int)( (((unsigned short)(cw)) >> 12) & 0x000f);
   1756                 if (!len) {
   1757                     cachedBits -= maxBits;
   1758                     cache <<= maxBits;
   1759                     tCurr += cw;
   1760                     continue;
   1761                 }
   1762                 cachedBits -= len;
   1763                 cache <<= len;
   1764 
   1765                 x=(int)( (((unsigned short)(cw)) >>  4) & 0x000f);
   1766                 y=(int)( (((unsigned short)(cw)) >>  8) & 0x000f);
   1767 
   1768                 if (x == 15 && tabType == loopLinbits) {
   1769                     minBits = linBits + 1 + (y ? 1 : 0);
   1770                     if (cachedBits + bitsLeft < minBits)
   1771                         return -1;
   1772                     while (cachedBits < minBits) {
   1773                         cache |= (unsigned int) (*buf++) << (24 - cachedBits);
   1774                         cachedBits += 8;
   1775                         bitsLeft -= 8;
   1776                     }
   1777                     if (bitsLeft < 0) {
   1778                         cachedBits += bitsLeft;
   1779                         bitsLeft = 0;
   1780                         cache &= (signed int) 0x80000000 >> (cachedBits - 1);
   1781                     }
   1782                     x += (int) (cache >> (32 - linBits));
   1783                     cachedBits -= linBits;
   1784                     cache <<= linBits;
   1785                 }
   1786                 if (x) {
   1787                     (x) |= ((cache) & 0x80000000);
   1788                     cache <<= 1;
   1789                     cachedBits--;
   1790                 }
   1791 
   1792                 if (y == 15 && tabType == loopLinbits) {
   1793                     minBits = linBits + 1;
   1794                     if (cachedBits + bitsLeft < minBits)
   1795                         return -1;
   1796                     while (cachedBits < minBits) {
   1797                         cache |= (unsigned int) (*buf++) << (24 - cachedBits);
   1798                         cachedBits += 8;
   1799                         bitsLeft -= 8;
   1800                     }
   1801                     if (bitsLeft < 0) {
   1802                         cachedBits += bitsLeft;
   1803                         bitsLeft = 0;
   1804                         cache &= (signed int) 0x80000000 >> (cachedBits - 1);
   1805                     }
   1806                     y += (int) (cache >> (32 - linBits));
   1807                     cachedBits -= linBits;
   1808                     cache <<= linBits;
   1809                 }
   1810                 if (y) {
   1811                     (y) |= ((cache) & 0x80000000);
   1812                     cache <<= 1;
   1813                     cachedBits--;
   1814                 }
   1815 
   1816                 /* ran out of bits - should never have consumed padBits */
   1817                 if (cachedBits < padBits)
   1818                     return -1;
   1819 
   1820                 *xy++ = x;
   1821                 *xy++ = y;
   1822                 nVals -= 2;
   1823                 tCurr = tBase;
   1824             }
   1825         }
   1826         bitsLeft += (cachedBits - padBits);
   1827         return (startBits - bitsLeft);
   1828     }
   1829 
   1830     /* error in bitstream - trying to access unused Huffman table */
   1831     return -1;
   1832 }
   1833 
   1834 /***********************************************************************************************************************
   1835  * Function:    DecodeHuffmanQuads
   1836  *
   1837  * Description: decode 4-way vector Huffman codes in the "count1" region of spectrum
   1838  *
   1839  * Inputs:      valid BitStreamInfo struct, pointing to start of quadword codes
   1840  *              pointer to vwxy buffer to received decoded values
   1841  *              maximum number of codewords to decode
   1842  *              index of quadword table (0 = table A, 1 = table B)
   1843  *              number of bits remaining in bitstream
   1844  *
   1845  * Outputs:     quadruples of decoded coefficients in vwxy
   1846  *              updated BitStreamInfo struct
   1847  *
   1848  * Return:      index of the first "zero_part" value (index of the first sample
   1849  *                of the quad word after which all samples are 0)
   1850  *
   1851  * Notes:        si_huff.bit tests every vwxy output in both quad tables
   1852  **********************************************************************************************************************/
   1853 // no improvement with section=data
   1854 int DecodeHuffmanQuads(int *vwxy, int nVals, int tabIdx, int bitsLeft, unsigned char *buf, int bitOffset){
   1855     int i, v, w, x, y;
   1856     int len, maxBits, cachedBits, padBits;
   1857     unsigned int cache;
   1858     unsigned char cw, *tBase;
   1859 
   1860     if(bitsLeft<=0) return 0;
   1861 
   1862     tBase = (unsigned char *) quadTable + quadTabOffset[tabIdx];
   1863     maxBits = quadTabMaxBits[tabIdx];
   1864 
   1865     /* initially fill cache with any partial byte */
   1866     cache = 0;
   1867     cachedBits=(8-bitOffset) & 0x07;
   1868     if(cachedBits)cache=(unsigned int)(*buf++) << (32 - cachedBits);
   1869     bitsLeft -= cachedBits;
   1870 
   1871     i = padBits = 0;
   1872     while (i < (nVals - 3)) {
   1873         /* refill cache - assumes cachedBits <= 16 */
   1874         if (bitsLeft >= 16) {
   1875             /* load 2 new bytes into left-justified cache */
   1876             cache |= (unsigned int) (*buf++) << (24 - cachedBits);
   1877             cache |= (unsigned int) (*buf++) << (16 - cachedBits);
   1878             cachedBits += 16;
   1879             bitsLeft -= 16;
   1880         } else {
   1881             /* last time through, pad cache with zeros and drain cache */
   1882             if(cachedBits+bitsLeft <= 0) return i;
   1883             if(bitsLeft>0) cache |= (unsigned int)(*buf++)<<(24-cachedBits);
   1884             if (bitsLeft > 8) cache |= (unsigned int)(*buf++)<<(16 - cachedBits);
   1885             cachedBits += bitsLeft;
   1886             bitsLeft = 0;
   1887 
   1888             cache &= (signed int) 0x80000000 >> (cachedBits - 1);
   1889             padBits = 10;
   1890             cachedBits += padBits; /* okay if this is > 32 (0's automatically shifted in from right) */
   1891         }
   1892 
   1893         /* largest maxBits = 6, plus 4 for sign bits, so make sure cache has at least 10 bits */
   1894         while(i < (nVals - 3) && cachedBits >= 10){
   1895             cw = pgm_read_byte(&tBase[cache >> (32 - maxBits)]);
   1896             len=(int)( (((unsigned char)(cw)) >> 4) & 0x0f);
   1897             cachedBits -= len;
   1898             cache <<= len;
   1899 
   1900             v=(int)( (((unsigned char)(cw)) >> 3) & 0x01);
   1901             if (v) {
   1902                 (v) |= ((cache) & 0x80000000);
   1903                 cache <<= 1;
   1904                 cachedBits--;
   1905             }
   1906             w=(int)( (((unsigned char)(cw)) >> 2) & 0x01);
   1907             if (w) {
   1908                 (w) |= ((cache) & 0x80000000);
   1909                 cache <<= 1;
   1910                 cachedBits--;
   1911             }
   1912 
   1913             x=(int)( (((unsigned char)(cw)) >> 1) & 0x01);
   1914             if (x) {
   1915                 (x) |= ((cache) & 0x80000000);
   1916                 cache <<= 1;
   1917                 cachedBits--;
   1918             }
   1919 
   1920             y=(int)( (((unsigned char)(cw)) >> 0) & 0x01);
   1921             if (y) {
   1922                 (y) |= ((cache) & 0x80000000);
   1923                 cache <<= 1;
   1924                 cachedBits--;
   1925             }
   1926 
   1927             /* ran out of bits - okay (means we're done) */
   1928             if (cachedBits < padBits)
   1929                 return i;
   1930 
   1931             *vwxy++ = v;
   1932             *vwxy++ = w;
   1933             *vwxy++ = x;
   1934             *vwxy++ = y;
   1935             i += 4;
   1936         }
   1937     }
   1938 
   1939     /* decoded max number of quad values */
   1940     return i;
   1941 }
   1942 
   1943 /***********************************************************************************************************************
   1944  * Function:    DecodeHuffman
   1945  *
   1946  * Description: decode one granule, one channel worth of Huffman codes
   1947  *
   1948  * Inputs:      MP3DecInfo structure filled by UnpackFrameHeader(), UnpackSideInfo(),
   1949  *                and UnpackScaleFactors() (for this granule)
   1950  *              buffer pointing to start of Huffman data in MP3 frame
   1951  *              pointer to bit offset (0-7) indicating starting bit in buf[0]
   1952  *              number of bits in the Huffman data section of the frame
   1953  *                (could include padding bits)
   1954  *              index of current granule and channel
   1955  *
   1956  * Outputs:     decoded coefficients in hi->huffDecBuf[ch] (hi pointer in mp3DecInfo)
   1957  *              updated bitOffset
   1958  *
   1959  * Return:      length (in bytes) of Huffman codes
   1960  *              bitOffset also returned in parameter (0 = MSB, 7 = LSB of
   1961  *                byte located at buf + offset)
   1962  *              -1 if null input pointers, huffBlockBits < 0, or decoder runs
   1963  *                out of bits prematurely (invalid bitstream)
   1964  **********************************************************************************************************************/
   1965 // .data about 1ms faster per frame
   1966 int DecodeHuffman(unsigned char *buf, int *bitOffset, int huffBlockBits, int gr, int ch){
   1967 
   1968     int r1Start, r2Start, rEnd[4]; /* region boundaries */
   1969     int i, w, bitsUsed, bitsLeft;
   1970     unsigned char *startBuf = buf;
   1971 
   1972     SideInfoSub_t *sis;
   1973     sis = &m_SideInfoSub[gr][ch];
   1974     //hi = (HuffmanInfo_t*) (m_MP3DecInfo->HuffmanInfoPS);
   1975 
   1976     if (huffBlockBits < 0)
   1977         return -1;
   1978 
   1979     /* figure out region boundaries (the first 2*bigVals coefficients divided into 3 regions) */
   1980     if (sis->winSwitchFlag && sis->blockType == 2) {
   1981         if (sis->mixedBlock == 0) {
   1982             r1Start = m_SFBandTable.s[(sis->region0Count + 1) / 3] * 3;
   1983         } else {
   1984             if (m_MPEGVersion == MPEG1) {
   1985                 r1Start = m_SFBandTable.l[sis->region0Count + 1];
   1986             } else {
   1987                 /* see MPEG2 spec for explanation */
   1988                 w = m_SFBandTable.s[4] - m_SFBandTable.s[3];
   1989                 r1Start = m_SFBandTable.l[6] + 2 * w;
   1990             }
   1991         }
   1992         r2Start = m_MAX_NSAMP; /* short blocks don't have region 2 */
   1993     } else {
   1994         r1Start = m_SFBandTable.l[sis->region0Count + 1];
   1995         r2Start = m_SFBandTable.l[sis->region0Count + 1 + sis->region1Count + 1];
   1996     }
   1997 
   1998     /* offset rEnd index by 1 so first region = rEnd[1] - rEnd[0], etc. */
   1999     rEnd[3] = (m_MAX_NSAMP < (2 * sis->nBigvals) ? m_MAX_NSAMP : (2 * sis->nBigvals));
   2000     rEnd[2] = (r2Start < rEnd[3] ? r2Start : rEnd[3]);
   2001     rEnd[1] = (r1Start < rEnd[3] ? r1Start : rEnd[3]);
   2002     rEnd[0] = 0;
   2003 
   2004     /* rounds up to first all-zero pair (we don't check last pair for (x,y) == (non-zero, zero)) */
   2005     m_HuffmanInfo->nonZeroBound[ch] = rEnd[3];
   2006 
   2007     /* decode Huffman pairs (rEnd[i] are always even numbers) */
   2008     bitsLeft = huffBlockBits;
   2009     for (i = 0; i < 3; i++) {
   2010         bitsUsed = DecodeHuffmanPairs(m_HuffmanInfo->huffDecBuf[ch] + rEnd[i],
   2011                 rEnd[i + 1] - rEnd[i], sis->tableSelect[i], bitsLeft, buf,
   2012                 *bitOffset);
   2013         if (bitsUsed < 0 || bitsUsed > bitsLeft) /* error - overran end of bitstream */
   2014             return -1;
   2015 
   2016         /* update bitstream position */
   2017         buf += (bitsUsed + *bitOffset) >> 3;
   2018         *bitOffset = (bitsUsed + *bitOffset) & 0x07;
   2019         bitsLeft -= bitsUsed;
   2020     }
   2021 
   2022     /* decode Huffman quads (if any) */
   2023     m_HuffmanInfo->nonZeroBound[ch] += DecodeHuffmanQuads(m_HuffmanInfo->huffDecBuf[ch] + rEnd[3],
   2024             m_MAX_NSAMP - rEnd[3], sis->count1TableSelect, bitsLeft, buf,
   2025             *bitOffset);
   2026 
   2027     assert(m_HuffmanInfo->nonZeroBound[ch] <= m_MAX_NSAMP);
   2028     for (i = m_HuffmanInfo->nonZeroBound[ch]; i < m_MAX_NSAMP; i++)
   2029         m_HuffmanInfo->huffDecBuf[ch][i] = 0;
   2030 
   2031     /* If bits used for 576 samples < huffBlockBits, then the extras are considered
   2032      *  to be stuffing bits (throw away, but need to return correct bitstream position)
   2033      */
   2034     buf += (bitsLeft + *bitOffset) >> 3;
   2035     *bitOffset = (bitsLeft + *bitOffset) & 0x07;
   2036 
   2037     return (buf - startBuf);
   2038 }
   2039 
   2040 /***********************************************************************************************************************
   2041  * D E Q U A N T
   2042  **********************************************************************************************************************/
   2043 
   2044 /***********************************************************************************************************************
   2045  * Function:    MP3Dequantize
   2046  *
   2047  * Description: dequantize coefficients, decode stereo, reorder short blocks
   2048  *                (one granule-worth)
   2049  *
   2050  * Inputs:      index of current granule
   2051  *
   2052  * Outputs:     dequantized and reordered coefficients in hi->huffDecBuf
   2053  *                (one granule-worth, all channels), format = Q26
   2054  *              operates in-place on huffDecBuf but also needs di->workBuf
   2055  *              updated hi->nonZeroBound index for both channels
   2056  *
   2057  * Return:      0 on success, -1 if null input pointers
   2058  *
   2059  * Notes:       In calling output Q(DQ_FRACBITS_OUT), we assume an implicit bias
   2060  *                of 2^15. Some (floating-point) reference implementations factor this
   2061  *                into the 2^(0.25 * gain) scaling explicitly. But to avoid precision
   2062  *                loss, we don't do that. Instead take it into account in the final
   2063  *                round to PCM (>> by 15 less than we otherwise would have).
   2064  *              Equivalently, we can think of the dequantized coefficients as
   2065  *                Q(DQ_FRACBITS_OUT - 15) with no implicit bias.
   2066  **********************************************************************************************************************/
   2067 int MP3Dequantize(int gr){
   2068     int i, ch, nSamps, mOut[2];
   2069     CriticalBandInfo_t *cbi;
   2070     cbi = &m_CriticalBandInfo[0];
   2071     mOut[0] = mOut[1] = 0;
   2072 
   2073     /* dequantize all the samples in each channel */
   2074     for (ch = 0; ch < m_MP3DecInfo->nChans; ch++) {
   2075         m_HuffmanInfo->gb[ch] = DequantChannel(m_HuffmanInfo->huffDecBuf[ch], m_DequantInfo->workBuf,
   2076                 &m_HuffmanInfo->nonZeroBound[ch], &m_SideInfoSub[gr][ch], &m_ScaleFactorInfoSub[gr][ch], &cbi[ch]);
   2077     }
   2078 
   2079     /* joint stereo processing assumes one guard bit in input samples
   2080      * it's extremely rare not to have at least one gb, so if this is the case
   2081      *   just make a pass over the data and clip to [-2^30+1, 2^30-1]
   2082      * in practice this may never happen
   2083      */
   2084     if (m_FrameHeader->modeExt && (m_HuffmanInfo->gb[0] < 1 || m_HuffmanInfo->gb[1] < 1)) {
   2085         for (i = 0; i < m_HuffmanInfo->nonZeroBound[0]; i++) {
   2086             if (m_HuffmanInfo->huffDecBuf[0][i] < -0x3fffffff)  m_HuffmanInfo->huffDecBuf[0][i] = -0x3fffffff;
   2087             if (m_HuffmanInfo->huffDecBuf[0][i] >  0x3fffffff)  m_HuffmanInfo->huffDecBuf[0][i] =  0x3fffffff;
   2088         }
   2089         for (i = 0; i < m_HuffmanInfo->nonZeroBound[1]; i++) {
   2090             if (m_HuffmanInfo->huffDecBuf[1][i] < -0x3fffffff)  m_HuffmanInfo->huffDecBuf[1][i] = -0x3fffffff;
   2091             if (m_HuffmanInfo->huffDecBuf[1][i] >  0x3fffffff)  m_HuffmanInfo->huffDecBuf[1][i] =  0x3fffffff;
   2092         }
   2093     }
   2094 
   2095     /* do mid-side stereo processing, if enabled */
   2096     if (m_FrameHeader->modeExt >> 1) {
   2097         if (m_FrameHeader->modeExt & 0x01) {
   2098             /* intensity stereo enabled - run mid-side up to start of right zero region */
   2099             if (cbi[1].cbType == 0)
   2100                 nSamps = m_SFBandTable.l[cbi[1].cbEndL + 1];
   2101             else
   2102                 nSamps = 3 * m_SFBandTable.s[cbi[1].cbEndSMax + 1];
   2103         } else {
   2104             /* intensity stereo disabled - run mid-side on whole spectrum */
   2105             nSamps = (m_HuffmanInfo->nonZeroBound[0] > m_HuffmanInfo->nonZeroBound[1] ?
   2106                                                        m_HuffmanInfo->nonZeroBound[0] : m_HuffmanInfo->nonZeroBound[1]);
   2107         }
   2108         MidSideProc(m_HuffmanInfo->huffDecBuf, nSamps, mOut);
   2109     }
   2110 
   2111     /* do intensity stereo processing, if enabled */
   2112     if (m_FrameHeader->modeExt & 0x01) {
   2113         nSamps = m_HuffmanInfo->nonZeroBound[0];
   2114         if (m_MPEGVersion == MPEG1) {
   2115             IntensityProcMPEG1(m_HuffmanInfo->huffDecBuf, nSamps, &m_ScaleFactorInfoSub[gr][1], &m_CriticalBandInfo[0],
   2116                     m_FrameHeader->modeExt >> 1, m_SideInfoSub[gr][1].mixedBlock, mOut);
   2117         } else {
   2118             IntensityProcMPEG2(m_HuffmanInfo->huffDecBuf, nSamps, &m_ScaleFactorInfoSub[gr][1], &m_CriticalBandInfo[0],
   2119                     m_ScaleFactorJS, m_FrameHeader->modeExt >> 1, m_SideInfoSub[gr][1].mixedBlock, mOut);
   2120         }
   2121     }
   2122 
   2123     /* adjust guard bit count and nonZeroBound if we did any stereo processing */
   2124     if (m_FrameHeader->modeExt) {
   2125         m_HuffmanInfo->gb[0] = CLZ(mOut[0]) - 1;
   2126         m_HuffmanInfo->gb[1] = CLZ(mOut[1]) - 1;
   2127         nSamps = (m_HuffmanInfo->nonZeroBound[0] > m_HuffmanInfo->nonZeroBound[1] ?
   2128                                                        m_HuffmanInfo->nonZeroBound[0] : m_HuffmanInfo->nonZeroBound[1]);
   2129         m_HuffmanInfo->nonZeroBound[0] = nSamps;
   2130         m_HuffmanInfo->nonZeroBound[1] = nSamps;
   2131     }
   2132 
   2133     /* output format Q(DQ_FRACBITS_OUT) */
   2134     return 0;
   2135 }
   2136 
   2137 /***********************************************************************************************************************
   2138  * D Q C H A N
   2139  **********************************************************************************************************************/
   2140 
   2141 /***********************************************************************************************************************
   2142  * Function:    DequantBlock
   2143  *
   2144  * Description: Ken's highly-optimized, low memory dequantizer performing the operation
   2145  *              y = pow(x, 4.0/3.0) * pow(2, 25 - scale/4.0)
   2146  *
   2147  * Inputs:      input buffer of decode Huffman codewords (signed-magnitude)
   2148  *              output buffer of same length (in-place (outbuf = inbuf) is allowed)
   2149  *              number of samples
   2150  *
   2151  * Outputs:     dequantized samples in Q25 format
   2152  *
   2153  * Return:      bitwise-OR of the unsigned outputs (for guard bit calculations)
   2154  **********************************************************************************************************************/
   2155 int DequantBlock(int *inbuf, int *outbuf, int num, int scale){
   2156     int tab4[4];
   2157     int scalef, scalei, shift;
   2158     int sx, x, y;
   2159     int mask = 0;
   2160     const int *tab16;
   2161     const unsigned int *coef;
   2162 
   2163     tab16 = pow43_14[scale & 0x3];
   2164     scalef = pow14[scale & 0x3];
   2165     scalei =((scale >> 2) < 31 ? (scale >> 2) : 31 );
   2166     //scalei = MIN(scale >> 2, 31);   /* smallest input scale = -47, so smallest scalei = -12 */
   2167 
   2168     /* cache first 4 values */
   2169     shift = (scalei + 3 < 31 ? scalei + 3 : 31);
   2170     shift = (shift > 0 ? shift : 0);
   2171 
   2172     tab4[0] = 0;
   2173     tab4[1] = tab16[1] >> shift;
   2174     tab4[2] = tab16[2] >> shift;
   2175     tab4[3] = tab16[3] >> shift;
   2176 
   2177     do {
   2178         sx = *inbuf++;
   2179         x = sx & 0x7fffffff;    /* sx = sign|mag */
   2180         if (x < 4) {
   2181             y = tab4[x];
   2182         } else if (x < 16) {
   2183             y = tab16[x];
   2184             y = (scalei < 0) ? y << -scalei : y >> scalei;
   2185         } else {
   2186             if (x < 64) {
   2187                 y = pow43[x-16];
   2188                 /* fractional scale */
   2189                 y = MULSHIFT32(y, scalef);
   2190                 shift = scalei - 3;
   2191             } else {
   2192                 /* normalize to [0x40000000, 0x7fffffff] */
   2193                 x <<= 17;
   2194                 shift = 0;
   2195                 if (x < 0x08000000)
   2196                     x <<= 4, shift += 4;
   2197                 if (x < 0x20000000)
   2198                     x <<= 2, shift += 2;
   2199                 if (x < 0x40000000)
   2200                     x <<= 1, shift += 1;
   2201 
   2202                 coef = (x < m_SQRTHALF) ? poly43lo : poly43hi;
   2203 
   2204                 /* polynomial */
   2205                 y = coef[0];
   2206                 y = MULSHIFT32(y, x) + coef[1];
   2207                 y = MULSHIFT32(y, x) + coef[2];
   2208                 y = MULSHIFT32(y, x) + coef[3];
   2209                 y = MULSHIFT32(y, x) + coef[4];
   2210                 y = MULSHIFT32(y, pow2frac[shift]) << 3;
   2211 
   2212                 /* fractional scale */
   2213                 y = MULSHIFT32(y, scalef);
   2214                 shift = scalei - pow2exp[shift];
   2215             }
   2216 
   2217             /* integer scale */
   2218             if (shift < 0) {
   2219                 shift = -shift;
   2220                 if (y > (0x7fffffff >> shift))
   2221                     y = 0x7fffffff;     /* clip */
   2222                 else
   2223                     y <<= shift;
   2224             } else {
   2225                 y >>= shift;
   2226             }
   2227         }
   2228 
   2229         /* sign and store */
   2230         mask |= y;
   2231         *outbuf++ = (sx < 0) ? -y : y;
   2232 
   2233     } while (--num);
   2234 
   2235     return mask;
   2236 }
   2237 
   2238 /***********************************************************************************************************************
   2239  * Function:    DequantChannel
   2240  *
   2241  * Description: dequantize one granule, one channel worth of decoded Huffman codewords
   2242  *
   2243  * Inputs:      sample buffer (decoded Huffman codewords), length = m_MAX_NSAMP samples
   2244  *              work buffer for reordering short-block, length = m_MAX_REORDER_SAMPS
   2245  *                samples (3 * width of largest short-block critical band)
   2246  *              non-zero bound for this channel/granule
   2247  *              valid FrameHeader, SideInfoSub, ScaleFactorInfoSub, and CriticalBandInfo
   2248  *                structures for this channel/granule
   2249  *
   2250  * Outputs:     MAX_NSAMP dequantized samples in sampleBuf
   2251  *              updated non-zero bound (indicating which samples are != 0 after DQ)
   2252  *              filled-in cbi structure indicating start and end critical bands
   2253  *
   2254  * Return:      minimum number of guard bits in dequantized sampleBuf
   2255  *
   2256  * Notes:       dequantized samples in Q(DQ_FRACBITS_OUT) format
   2257  **********************************************************************************************************************/
   2258 int DequantChannel(int *sampleBuf, int *workBuf, int *nonZeroBound,  SideInfoSub_t *sis, ScaleFactorInfoSub_t *sfis,
   2259                                                                                               CriticalBandInfo_t *cbi)
   2260 {
   2261     int i, j, w, cb;
   2262     int /* cbStartL, */ cbEndL, cbStartS, cbEndS;
   2263     int nSamps, nonZero, sfactMultiplier, gbMask;
   2264     int globalGain, gainI;
   2265     int cbMax[3];
   2266     typedef int ARRAY3[3];  /* for short-block reordering */
   2267     ARRAY3 *buf;    /* short block reorder */
   2268 
   2269     /* set default start/end points for short/long blocks - will update with non-zero cb info */
   2270     if (sis->blockType == 2) {
   2271         // cbStartL = 0;
   2272         if (sis->mixedBlock) {
   2273             cbEndL = (m_MPEGVersion == MPEG1 ? 8 : 6);
   2274             cbStartS = 3;
   2275         } else {
   2276             cbEndL = 0;
   2277             cbStartS = 0;
   2278         }
   2279         cbEndS = 13;
   2280     } else {
   2281         /* long block */
   2282         //cbStartL = 0;
   2283         cbEndL =   22;
   2284         cbStartS = 13;
   2285         cbEndS =   13;
   2286     }
   2287     cbMax[2] = cbMax[1] = cbMax[0] = 0;
   2288     gbMask = 0;
   2289     i = 0;
   2290 
   2291     /* sfactScale = 0 --> quantizer step size = 2
   2292      * sfactScale = 1 --> quantizer step size = sqrt(2)
   2293      *   so sfactMultiplier = 2 or 4 (jump through globalGain by powers of 2 or sqrt(2))
   2294      */
   2295     sfactMultiplier = 2 * (sis->sfactScale + 1);
   2296 
   2297     /* offset globalGain by -2 if midSide enabled, for 1/sqrt(2) used in MidSideProc()
   2298      *  (DequantBlock() does 0.25 * gainI so knocking it down by two is the same as
   2299      *   dividing every sample by sqrt(2) = multiplying by 2^-.5)
   2300      */
   2301     globalGain = sis->globalGain;
   2302     if (m_FrameHeader->modeExt >> 1)
   2303          globalGain -= 2;
   2304     globalGain += m_IMDCT_SCALE;      /* scale everything by sqrt(2), for fast IMDCT36 */
   2305 
   2306     /* long blocks */
   2307     for (cb = 0; cb < cbEndL; cb++) {
   2308 
   2309         nonZero = 0;
   2310         nSamps = m_SFBandTable.l[cb + 1] - m_SFBandTable.l[cb];
   2311         gainI = 210 - globalGain + sfactMultiplier * (sfis->l[cb] + (sis->preFlag ? (int)preTab[cb] : 0));
   2312 
   2313         nonZero |= DequantBlock(sampleBuf + i, sampleBuf + i, nSamps, gainI);
   2314         i += nSamps;
   2315 
   2316         /* update highest non-zero critical band */
   2317         if (nonZero)
   2318             cbMax[0] = cb;
   2319         gbMask |= nonZero;
   2320 
   2321         if (i >= *nonZeroBound)
   2322             break;
   2323     }
   2324 
   2325     /* set cbi (Type, EndS[], EndSMax will be overwritten if we proceed to do short blocks) */
   2326     cbi->cbType = 0;            /* long only */
   2327     cbi->cbEndL  = cbMax[0];
   2328     cbi->cbEndS[0] = cbi->cbEndS[1] = cbi->cbEndS[2] = 0;
   2329     cbi->cbEndSMax = 0;
   2330 
   2331     /* early exit if no short blocks */
   2332     if (cbStartS >= 12)
   2333         return CLZ(gbMask) - 1;
   2334 
   2335     /* short blocks */
   2336     cbMax[2] = cbMax[1] = cbMax[0] = cbStartS;
   2337     for (cb = cbStartS; cb < cbEndS; cb++) {
   2338 
   2339         nSamps = m_SFBandTable.s[cb + 1] - m_SFBandTable.s[cb];
   2340         for (w = 0; w < 3; w++) {
   2341             nonZero =  0;
   2342             gainI = 210 - globalGain + 8*sis->subBlockGain[w] + sfactMultiplier*(sfis->s[cb][w]);
   2343 
   2344             nonZero |= DequantBlock(sampleBuf + i + nSamps*w, workBuf + nSamps*w, nSamps, gainI);
   2345 
   2346             /* update highest non-zero critical band */
   2347             if (nonZero)
   2348                 cbMax[w] = cb;
   2349             gbMask |= nonZero;
   2350         }
   2351 
   2352         /* reorder blocks */
   2353         buf = (ARRAY3 *)(sampleBuf + i);
   2354         i += 3*nSamps;
   2355         for (j = 0; j < nSamps; j++) {
   2356             buf[j][0] = workBuf[0*nSamps + j];
   2357             buf[j][1] = workBuf[1*nSamps + j];
   2358             buf[j][2] = workBuf[2*nSamps + j];
   2359         }
   2360 
   2361         assert(3*nSamps <= m_MAX_REORDER_SAMPS);
   2362 
   2363         if (i >= *nonZeroBound)
   2364             break;
   2365     }
   2366 
   2367     /* i = last non-zero INPUT sample processed, which corresponds to highest possible non-zero
   2368      *     OUTPUT sample (after reorder)
   2369      * however, the original nzb is no longer necessarily true
   2370      *   for each cb, buf[][] is updated with 3*nSamps samples (i increases 3*nSamps each time)
   2371      *   (buf[j + 1][0] = 3 (input) samples ahead of buf[j][0])
   2372      * so update nonZeroBound to i
   2373      */
   2374     *nonZeroBound = i;
   2375 
   2376     assert(*nonZeroBound <= m_MAX_NSAMP);
   2377 
   2378     cbi->cbType = (sis->mixedBlock ? 2 : 1);    /* 2 = mixed short/long, 1 = short only */
   2379 
   2380     cbi->cbEndS[0] = cbMax[0];
   2381     cbi->cbEndS[1] = cbMax[1];
   2382     cbi->cbEndS[2] = cbMax[2];
   2383 
   2384     cbi->cbEndSMax = cbMax[0];
   2385     cbi->cbEndSMax = (cbi->cbEndSMax > cbMax[1] ? cbi->cbEndSMax : cbMax[1]);
   2386     cbi->cbEndSMax = (cbi->cbEndSMax > cbMax[2] ? cbi->cbEndSMax : cbMax[2]);
   2387 
   2388     return CLZ(gbMask) - 1;
   2389 }
   2390 
   2391 /***********************************************************************************************************************
   2392  * S T P R O C
   2393  **********************************************************************************************************************/
   2394 
   2395 /***********************************************************************************************************************
   2396  * Function:    MidSideProc
   2397  *
   2398  * Description: sum-difference stereo reconstruction
   2399  *
   2400  * Inputs:      vector x with dequantized samples from left and right channels
   2401  *              number of non-zero samples (MAX of left and right)
   2402  *              assume 1 guard bit in input
   2403  *              guard bit mask (left and right channels)
   2404  *
   2405  * Outputs:     updated sample vector x
   2406  *              updated guard bit mask
   2407  *
   2408  * Return:      none
   2409  *
   2410  * Notes:       assume at least 1 GB in input
   2411  **********************************************************************************************************************/
   2412 void MidSideProc(int x[m_MAX_NCHAN][m_MAX_NSAMP], int nSamps, int mOut[2]){
   2413     int i, xr, xl, mOutL, mOutR;
   2414 
   2415     /* L = (M+S)/sqrt(2), R = (M-S)/sqrt(2)
   2416      * NOTE: 1/sqrt(2) done in DequantChannel() - see comments there
   2417      */
   2418     mOutL = mOutR = 0;
   2419     for (i = 0; i < nSamps; i++) {
   2420         xl = x[0][i];
   2421         xr = x[1][i];
   2422         x[0][i] = xl + xr;
   2423         x[1][i] = xl - xr;
   2424         mOutL |= FASTABS(x[0][i]);
   2425         mOutR |= FASTABS(x[1][i]);
   2426     }
   2427     mOut[0] |= mOutL;
   2428     mOut[1] |= mOutR;
   2429 }
   2430 
   2431 /***********************************************************************************************************************
   2432  * Function:    IntensityProcMPEG1
   2433  *
   2434  * Description: intensity stereo processing for MPEG1
   2435  *
   2436  * Inputs:      vector x with dequantized samples from left and right channels
   2437  *              number of non-zero samples in left channel
   2438  *              valid FrameHeader struct
   2439  *              two each of ScaleFactorInfoSub, CriticalBandInfo structs (both channels)
   2440  *              flags indicating midSide on/off, mixedBlock on/off
   2441  *              guard bit mask (left and right channels)
   2442  *
   2443  * Outputs:     updated sample vector x
   2444  *              updated guard bit mask
   2445  *
   2446  * Return:      none
   2447  *
   2448  * Notes:       assume at least 1 GB in input
   2449  *
   2450  **********************************************************************************************************************/
   2451 void IntensityProcMPEG1(int x[m_MAX_NCHAN][m_MAX_NSAMP], int nSamps,  ScaleFactorInfoSub_t *sfis,
   2452                                                     CriticalBandInfo_t *cbi, int midSideFlag, int mixFlag, int mOut[2])
   2453 {
   2454     int i = 0, j = 0, n = 0, cb = 0, w = 0;
   2455     int sampsLeft, isf, mOutL, mOutR, xl, xr;
   2456     int fl, fr, fls[3], frs[3];
   2457     int cbStartL = 0, cbStartS = 0, cbEndL = 0, cbEndS = 0;
   2458     int *isfTab;
   2459     (void) mixFlag;
   2460 
   2461     /* NOTE - this works fine for mixed blocks, as long as the switch point starts in the
   2462      *  short block section (i.e. on or after sample 36 = sfBand->l[8] = 3*sfBand->s[3]
   2463      * is this a safe assumption?
   2464      */
   2465     if (cbi[1].cbType == 0) {
   2466         /* long block */
   2467         cbStartL = cbi[1].cbEndL + 1;
   2468         cbEndL = cbi[0].cbEndL + 1;
   2469         cbStartS = cbEndS = 0;
   2470         i = m_SFBandTable.l[cbStartL];
   2471     } else if (cbi[1].cbType == 1 || cbi[1].cbType == 2) {
   2472         /* short or mixed block */
   2473         cbStartS = cbi[1].cbEndSMax + 1;
   2474         cbEndS = cbi[0].cbEndSMax + 1;
   2475         cbStartL = cbEndL = 0;
   2476         i = 3 * m_SFBandTable.s[cbStartS];
   2477     }
   2478     sampsLeft = nSamps - i; /* process to length of left */
   2479     isfTab = (int *) ISFMpeg1[midSideFlag];
   2480     mOutL = mOutR = 0;
   2481 
   2482     /* long blocks */
   2483     for (cb = cbStartL; cb < cbEndL && sampsLeft > 0; cb++) {
   2484         isf = sfis->l[cb];
   2485         if (isf == 7) {
   2486             fl = ISFIIP[midSideFlag][0];
   2487             fr = ISFIIP[midSideFlag][1];
   2488         } else {
   2489             fl = isfTab[isf];
   2490             fr = isfTab[6] - isfTab[isf];
   2491         }
   2492 
   2493         n = m_SFBandTable.l[cb + 1] - m_SFBandTable.l[cb];
   2494         for (j = 0; j < n && sampsLeft > 0; j++, i++) {
   2495             xr = MULSHIFT32(fr, x[0][i]) << 2;
   2496             x[1][i] = xr;
   2497             mOutR |= FASTABS(xr);
   2498             xl = MULSHIFT32(fl, x[0][i]) << 2;
   2499             x[0][i] = xl;
   2500             mOutL |= FASTABS(xl);
   2501             sampsLeft--;
   2502         }
   2503     }
   2504     /* short blocks */
   2505     for (cb = cbStartS; cb < cbEndS && sampsLeft >= 3; cb++) {
   2506         for (w = 0; w < 3; w++) {
   2507             isf = sfis->s[cb][w];
   2508             if (isf == 7) {
   2509                 fls[w] = ISFIIP[midSideFlag][0];
   2510                 frs[w] = ISFIIP[midSideFlag][1];
   2511             } else {
   2512                 fls[w] = isfTab[isf];
   2513                 frs[w] = isfTab[6] - isfTab[isf];
   2514             }
   2515         }
   2516         n = m_SFBandTable.s[cb + 1] - m_SFBandTable.s[cb];
   2517         for (j = 0; j < n && sampsLeft >= 3; j++, i += 3) {
   2518             xr = MULSHIFT32(frs[0], x[0][i + 0]) << 2;
   2519             x[1][i + 0] = xr;
   2520             mOutR |= FASTABS(xr);
   2521             xl = MULSHIFT32(fls[0], x[0][i + 0]) << 2;
   2522             x[0][i + 0] = xl;
   2523             mOutL |= FASTABS(xl);
   2524             xr = MULSHIFT32(frs[1], x[0][i + 1]) << 2;
   2525             x[1][i + 1] = xr;
   2526             mOutR |= FASTABS(xr);
   2527             xl = MULSHIFT32(fls[1], x[0][i + 1]) << 2;
   2528             x[0][i + 1] = xl;
   2529             mOutL |= FASTABS(xl);
   2530             xr = MULSHIFT32(frs[2], x[0][i + 2]) << 2;
   2531             x[1][i + 2] = xr;
   2532             mOutR |= FASTABS(xr);
   2533             xl = MULSHIFT32(fls[2], x[0][i + 2]) << 2;
   2534             x[0][i + 2] = xl;
   2535             mOutL |= FASTABS(xl);
   2536             sampsLeft -= 3;
   2537         }
   2538     }
   2539     mOut[0] = mOutL;
   2540     mOut[1] = mOutR;
   2541     return;
   2542 }
   2543 
   2544 /***********************************************************************************************************************
   2545  * Function:    IntensityProcMPEG2
   2546  *
   2547  * Description: intensity stereo processing for MPEG2
   2548  *
   2549  * Inputs:      vector x with dequantized samples from left and right channels
   2550  *              number of non-zero samples in left channel
   2551  *              valid FrameHeader struct
   2552  *              two each of ScaleFactorInfoSub, CriticalBandInfo structs (both channels)
   2553  *              ScaleFactorJS struct with joint stereo info from UnpackSFMPEG2()
   2554  *              flags indicating midSide on/off, mixedBlock on/off
   2555  *              guard bit mask (left and right channels)
   2556  *
   2557  * Outputs:     updated sample vector x
   2558  *              updated guard bit mask
   2559  *
   2560  * Return:      none
   2561  *
   2562  * Notes:       assume at least 1 GB in input
   2563  *
   2564  **********************************************************************************************************************/
   2565 void IntensityProcMPEG2(int x[m_MAX_NCHAN][m_MAX_NSAMP], int nSamps,
   2566          ScaleFactorInfoSub_t *sfis, CriticalBandInfo_t *cbi,
   2567         ScaleFactorJS_t *sfjs, int midSideFlag, int mixFlag, int mOut[2]) {
   2568     int i, j, k, n, r, cb, w;
   2569     int fl, fr, mOutL, mOutR, xl, xr;
   2570     int sampsLeft;
   2571     int isf, sfIdx, tmp, il[23];
   2572     int *isfTab;
   2573     int cbStartL, cbStartS, cbEndL, cbEndS;
   2574 
   2575     (void) mixFlag;
   2576 
   2577     isfTab = (int *) ISFMpeg2[sfjs->intensityScale][midSideFlag];
   2578     mOutL = mOutR = 0;
   2579 
   2580     /* fill buffer with illegal intensity positions (depending on slen) */
   2581     for (k = r = 0; r < 4; r++) {
   2582         tmp = (1 << sfjs->slen[r]) - 1;
   2583         for (j = 0; j < sfjs->nr[r]; j++, k++)
   2584             il[k] = tmp;
   2585     }
   2586 
   2587     if (cbi[1].cbType == 0) {
   2588         /* long blocks */
   2589         il[21] = il[22] = 1;
   2590         cbStartL = cbi[1].cbEndL + 1; /* start at end of right */
   2591         cbEndL = cbi[0].cbEndL + 1; /* process to end of left */
   2592         i = m_SFBandTable.l[cbStartL];
   2593         sampsLeft = nSamps - i;
   2594 
   2595         for (cb = cbStartL; cb < cbEndL; cb++) {
   2596             sfIdx = sfis->l[cb];
   2597             if (sfIdx == il[cb]) {
   2598                 fl = ISFIIP[midSideFlag][0];
   2599                 fr = ISFIIP[midSideFlag][1];
   2600             } else {
   2601                 isf = (sfis->l[cb] + 1) >> 1;
   2602                 fl = isfTab[(sfIdx & 0x01 ? isf : 0)];
   2603                 fr = isfTab[(sfIdx & 0x01 ? 0 : isf)];
   2604             }
   2605             int r=m_SFBandTable.l[cb + 1] - m_SFBandTable.l[cb];
   2606             n=(r < sampsLeft ? r : sampsLeft);
   2607             //n = MIN(fh->sfBand->l[cb + 1] - fh->sfBand->l[cb], sampsLeft);
   2608             for (j = 0; j < n; j++, i++) {
   2609                 xr = MULSHIFT32(fr, x[0][i]) << 2;
   2610                 x[1][i] = xr;
   2611                 mOutR |= FASTABS(xr);
   2612                 xl = MULSHIFT32(fl, x[0][i]) << 2;
   2613                 x[0][i] = xl;
   2614                 mOutL |= FASTABS(xl);
   2615             }
   2616             /* early exit once we've used all the non-zero samples */
   2617             sampsLeft -= n;
   2618             if (sampsLeft == 0)
   2619                 break;
   2620         }
   2621     } else {
   2622         /* short or mixed blocks */
   2623         il[12] = 1;
   2624 
   2625         for (w = 0; w < 3; w++) {
   2626             cbStartS = cbi[1].cbEndS[w] + 1; /* start at end of right */
   2627             cbEndS = cbi[0].cbEndS[w] + 1; /* process to end of left */
   2628             i = 3 * m_SFBandTable.s[cbStartS] + w;
   2629 
   2630             /* skip through sample array by 3, so early-exit logic would be more tricky */
   2631             for (cb = cbStartS; cb < cbEndS; cb++) {
   2632                 sfIdx = sfis->s[cb][w];
   2633                 if (sfIdx == il[cb]) {
   2634                     fl = ISFIIP[midSideFlag][0];
   2635                     fr = ISFIIP[midSideFlag][1];
   2636                 } else {
   2637                     isf = (sfis->s[cb][w] + 1) >> 1;
   2638                     fl = isfTab[(sfIdx & 0x01 ? isf : 0)];
   2639                     fr = isfTab[(sfIdx & 0x01 ? 0 : isf)];
   2640                 }
   2641                 n = m_SFBandTable.s[cb + 1] - m_SFBandTable.s[cb];
   2642 
   2643                 for (j = 0; j < n; j++, i += 3) {
   2644                     xr = MULSHIFT32(fr, x[0][i]) << 2;
   2645                     x[1][i] = xr;
   2646                     mOutR |= FASTABS(xr);
   2647                     xl = MULSHIFT32(fl, x[0][i]) << 2;
   2648                     x[0][i] = xl;
   2649                     mOutL |= FASTABS(xl);
   2650                 }
   2651             }
   2652         }
   2653     }
   2654     mOut[0] = mOutL;
   2655     mOut[1] = mOutR;
   2656     return;
   2657 }
   2658 
   2659 /***********************************************************************************************************************
   2660  * I M D C T
   2661  **********************************************************************************************************************/
   2662 
   2663 /***********************************************************************************************************************
   2664  * Function:    AntiAlias
   2665  *
   2666  * Description: smooth transition across DCT block boundaries (every 18 coefficients)
   2667  *
   2668  * Inputs:      vector of dequantized coefficients, length = (nBfly+1) * 18
   2669  *              number of "butterflies" to perform (one butterfly means one
   2670  *                inter-block smoothing operation)
   2671  *
   2672  * Outputs:     updated coefficient vector x
   2673  *
   2674  * Return:      none
   2675  *
   2676  * Notes:       weighted average of opposite bands (pairwise) from the 8 samples
   2677  *                before and after each block boundary
   2678  *              nBlocks = (nonZeroBound + 7) / 18, since nZB is the first ZERO sample
   2679  *                above which all other samples are also zero
   2680  *              max gain per sample = 1.372
   2681  *                MAX(i) (abs(csa[i][0]) + abs(csa[i][1]))
   2682  *              bits gained = 0
   2683  *              assume at least 1 guard bit in x[] to avoid overflow
   2684  *                (should be guaranteed from dequant, and max gain from stproc * max
   2685  *                 gain from AntiAlias < 2.0)
   2686  **********************************************************************************************************************/
   2687 // a little bit faster in RAM (< 1 ms per block)
   2688 /* __attribute__ ((section (".data"))) */
   2689 void AntiAlias(int *x, int nBfly){
   2690     int k, a0, b0, c0, c1;
   2691     const uint32_t *c;
   2692 
   2693     /* csa = Q31 */
   2694     for (k = nBfly; k > 0; k--) {
   2695         c = csa[0];
   2696         x += 18;
   2697         a0 = x[-1];
   2698         c0 = *c;
   2699         c++;
   2700         b0 = x[0];
   2701         c1 = *c;
   2702         c++;
   2703         x[-1] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1;
   2704         x[0] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1;
   2705 
   2706         a0 = x[-2];
   2707         c0 = *c;
   2708         c++;
   2709         b0 = x[1];
   2710         c1 = *c;
   2711         c++;
   2712         x[-2] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1;
   2713         x[1] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1;
   2714 
   2715         a0 = x[-3];
   2716         c0 = *c;
   2717         c++;
   2718         b0 = x[2];
   2719         c1 = *c;
   2720         c++;
   2721         x[-3] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1;
   2722         x[2] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1;
   2723 
   2724         a0 = x[-4];
   2725         c0 = *c;
   2726         c++;
   2727         b0 = x[3];
   2728         c1 = *c;
   2729         c++;
   2730         x[-4] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1;
   2731         x[3] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1;
   2732 
   2733         a0 = x[-5];
   2734         c0 = *c;
   2735         c++;
   2736         b0 = x[4];
   2737         c1 = *c;
   2738         c++;
   2739         x[-5] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1;
   2740         x[4] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1;
   2741 
   2742         a0 = x[-6];
   2743         c0 = *c;
   2744         c++;
   2745         b0 = x[5];
   2746         c1 = *c;
   2747         c++;
   2748         x[-6] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1;
   2749         x[5] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1;
   2750 
   2751         a0 = x[-7];
   2752         c0 = *c;
   2753         c++;
   2754         b0 = x[6];
   2755         c1 = *c;
   2756         c++;
   2757         x[-7] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1;
   2758         x[6] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1;
   2759 
   2760         a0 = x[-8];
   2761         c0 = *c;
   2762         c++;
   2763         b0 = x[7];
   2764         c1 = *c;
   2765         c++;
   2766         x[-8] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1;
   2767         x[7] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1;
   2768     }
   2769 }
   2770 
   2771 /***********************************************************************************************************************
   2772  * Function:    WinPrevious
   2773  *
   2774  * Description: apply specified window to second half of previous IMDCT (overlap part)
   2775  *
   2776  * Inputs:      vector of 9 coefficients (xPrev)
   2777  *
   2778  * Outputs:     18 windowed output coefficients (gain 1 integer bit)
   2779  *              window type (0, 1, 2, 3)
   2780  *
   2781  * Return:      none
   2782  *
   2783  * Notes:       produces 9 output samples from 18 input samples via symmetry
   2784  *              all blocks gain at least 1 guard bit via window (long blocks get extra
   2785  *                sign bit, short blocks can have one addition but max gain < 1.0)
   2786  **********************************************************************************************************************/
   2787 
   2788 void WinPrevious(int *xPrev, int *xPrevWin, int btPrev){
   2789     int i, x, *xp, *xpwLo, *xpwHi, wLo, wHi;
   2790     const uint32_t *wpLo, *wpHi;
   2791 
   2792     xp = xPrev;
   2793     /* mapping (see IMDCT12x3): xPrev[0-2] = sum[6-8], xPrev[3-8] = sum[12-17] */
   2794     if (btPrev == 2) {
   2795         /* this could be reordered for minimum loads/stores */
   2796         wpLo = imdctWin[btPrev];
   2797         xPrevWin[0] = MULSHIFT32(wpLo[6], xPrev[2])
   2798                 + MULSHIFT32(wpLo[0], xPrev[6]);
   2799         xPrevWin[1] = MULSHIFT32(wpLo[7], xPrev[1])
   2800                 + MULSHIFT32(wpLo[1], xPrev[7]);
   2801         xPrevWin[2] = MULSHIFT32(wpLo[8], xPrev[0])
   2802                 + MULSHIFT32(wpLo[2], xPrev[8]);
   2803         xPrevWin[3] = MULSHIFT32(wpLo[9], xPrev[0])
   2804                 + MULSHIFT32(wpLo[3], xPrev[8]);
   2805         xPrevWin[4] = MULSHIFT32(wpLo[10], xPrev[1])
   2806                 + MULSHIFT32(wpLo[4], xPrev[7]);
   2807         xPrevWin[5] = MULSHIFT32(wpLo[11], xPrev[2])
   2808                 + MULSHIFT32(wpLo[5], xPrev[6]);
   2809         xPrevWin[6] = MULSHIFT32(wpLo[6], xPrev[5]);
   2810         xPrevWin[7] = MULSHIFT32(wpLo[7], xPrev[4]);
   2811         xPrevWin[8] = MULSHIFT32(wpLo[8], xPrev[3]);
   2812         xPrevWin[9] = MULSHIFT32(wpLo[9], xPrev[3]);
   2813         xPrevWin[10] = MULSHIFT32(wpLo[10], xPrev[4]);
   2814         xPrevWin[11] = MULSHIFT32(wpLo[11], xPrev[5]);
   2815         xPrevWin[12] = xPrevWin[13] = xPrevWin[14] = xPrevWin[15] =
   2816                 xPrevWin[16] = xPrevWin[17] = 0;
   2817     } else {
   2818         /* use ARM-style pointers (*ptr++) so that ADS compiles well */
   2819         wpLo = imdctWin[btPrev] + 18;
   2820         wpHi = wpLo + 17;
   2821         xpwLo = xPrevWin;
   2822         xpwHi = xPrevWin + 17;
   2823         for (i = 9; i > 0; i--) {
   2824             x = *xp++;
   2825             wLo = *wpLo++;
   2826             wHi = *wpHi--;
   2827             *xpwLo++ = MULSHIFT32(wLo, x);
   2828             *xpwHi-- = MULSHIFT32(wHi, x);
   2829         }
   2830     }
   2831 }
   2832 
   2833 /***********************************************************************************************************************
   2834  * Function:    FreqInvertRescale
   2835  *
   2836  * Description: do frequency inversion (odd samples of odd blocks) and rescale
   2837  *                if necessary (extra guard bits added before IMDCT)
   2838  *
   2839  * Inputs:      output vector y (18 new samples, spaced NBANDS apart)
   2840  *              previous sample vector xPrev (9 samples)
   2841  *              index of current block
   2842  *              number of extra shifts added before IMDCT (usually 0)
   2843  *
   2844  * Outputs:     inverted and rescaled (as necessary) outputs
   2845  *              rescaled (as necessary) previous samples
   2846  *
   2847  * Return:      updated mOut (from new outputs y)
   2848  **********************************************************************************************************************/
   2849 
   2850 int FreqInvertRescale(int *y, int *xPrev, int blockIdx, int es) {
   2851 
   2852 	if (es == 0) {
   2853 		/* fast case - frequency invert only (no rescaling) */
   2854 		if (blockIdx & 0x01) {
   2855 			y += m_NBANDS;
   2856             for (int i = 0; i < 9; i++) {
   2857     			*y = - *y;	y += 2 * m_NBANDS;
   2858             }
   2859 		}
   2860 		return 0;
   2861 	}
   2862 
   2863     int d, mOut;
   2864     /* undo pre-IMDCT scaling, clipping if necessary */
   2865     mOut = 0;
   2866     if (blockIdx & 0x01) {
   2867         /* frequency invert */
   2868         for (int i = 0; i < 9; i++) {
   2869             d = *y;		CLIP_2N(d, (31 - es));	*y = d << es;	mOut |= FASTABS(*y);	y += m_NBANDS;
   2870             d = -*y;	CLIP_2N(d, (31 - es));	*y = d << es;	mOut |= FASTABS(*y);	y += m_NBANDS;
   2871             d = *xPrev;	CLIP_2N(d, (31 - es));	*xPrev++ = d << es;
   2872         }
   2873     } else {
   2874         for (int i = 0; i < 9; i++) {
   2875             d = *y;		CLIP_2N(d, (31 - es));	*y = d << es;	mOut |= FASTABS(*y);	y += m_NBANDS;
   2876             d = *y;		CLIP_2N(d, (31 - es));	*y = d << es;	mOut |= FASTABS(*y);	y += m_NBANDS;
   2877             d = *xPrev;	CLIP_2N(d, (31 - es));	*xPrev++ = d << es;
   2878         }
   2879     }
   2880     return mOut;
   2881 
   2882 }
   2883 
   2884 
   2885 /* require at least 3 guard bits in x[] to ensure no overflow */
   2886 void idct9(int *x) {
   2887     int a1, a2, a3, a4, a5, a6, a7, a8, a9;
   2888     int a10, a11, a12, a13, a14, a15, a16, a17, a18;
   2889     int a19, a20, a21, a22, a23, a24, a25, a26, a27;
   2890     int m1, m3, m5, m6, m7, m8, m9, m10, m11, m12;
   2891     int x0, x1, x2, x3, x4, x5, x6, x7, x8;
   2892 
   2893     x0 = x[0];
   2894     x1 = x[1];
   2895     x2 = x[2];
   2896     x3 = x[3];
   2897     x4 = x[4];
   2898     x5 = x[5];
   2899     x6 = x[6];
   2900     x7 = x[7];
   2901     x8 = x[8];
   2902 
   2903     a1 = x0 - x6;
   2904     a2 = x1 - x5;
   2905     a3 = x1 + x5;
   2906     a4 = x2 - x4;
   2907     a5 = x2 + x4;
   2908     a6 = x2 + x8;
   2909     a7 = x1 + x7;
   2910 
   2911     a8 = a6 - a5; /* ie x[8] - x[4] */
   2912     a9 = a3 - a7; /* ie x[5] - x[7] */
   2913     a10 = a2 - x7; /* ie x[1] - x[5] - x[7] */
   2914     a11 = a4 - x8; /* ie x[2] - x[4] - x[8] */
   2915 
   2916     /* do the << 1 as constant shifts where mX is actually used (free, no stall or extra inst.) */
   2917     m1 = MULSHIFT32(c9_0, x3);
   2918     m3 = MULSHIFT32(c9_0, a10);
   2919     m5 = MULSHIFT32(c9_1, a5);
   2920     m6 = MULSHIFT32(c9_2, a6);
   2921     m7 = MULSHIFT32(c9_1, a8);
   2922     m8 = MULSHIFT32(c9_2, a5);
   2923     m9 = MULSHIFT32(c9_3, a9);
   2924     m10 = MULSHIFT32(c9_4, a7);
   2925     m11 = MULSHIFT32(c9_3, a3);
   2926     m12 = MULSHIFT32(c9_4, a9);
   2927 
   2928     a12 = x[0] + (x[6] >> 1);
   2929     a13 = a12 + (m1 << 1);
   2930     a14 = a12 - (m1 << 1);
   2931     a15 = a1 + (a11 >> 1);
   2932     a16 = (m5 << 1) + (m6 << 1);
   2933     a17 = (m7 << 1) - (m8 << 1);
   2934     a18 = a16 + a17;
   2935     a19 = (m9 << 1) + (m10 << 1);
   2936     a20 = (m11 << 1) - (m12 << 1);
   2937 
   2938     a21 = a20 - a19;
   2939     a22 = a13 + a16;
   2940     a23 = a14 + a16;
   2941     a24 = a14 + a17;
   2942     a25 = a13 + a17;
   2943     a26 = a14 - a18;
   2944     a27 = a13 - a18;
   2945 
   2946     x0 = a22 + a19;
   2947     x[0] = x0;
   2948     x1 = a15 + (m3 << 1);
   2949     x[1] = x1;
   2950     x2 = a24 + a20;
   2951     x[2] = x2;
   2952     x3 = a26 - a21;
   2953     x[3] = x3;
   2954     x4 = a1 - a11;
   2955     x[4] = x4;
   2956     x5 = a27 + a21;
   2957     x[5] = x5;
   2958     x6 = a25 - a20;
   2959     x[6] = x6;
   2960     x7 = a15 - (m3 << 1);
   2961     x[7] = x7;
   2962     x8 = a23 - a19;
   2963     x[8] = x8;
   2964 }
   2965 
   2966 
   2967 /***********************************************************************************************************************
   2968  * Function:    IMDCT36
   2969  *
   2970  * Description: 36-point modified DCT, with windowing and overlap-add (50% overlap)
   2971  *
   2972  * Inputs:      vector of 18 coefficients (N/2 inputs produces N outputs, by symmetry)
   2973  *              overlap part of last IMDCT (9 samples - see output comments)
   2974  *              window type (0,1,2,3) of current and previous block
   2975  *              current block index (for deciding whether to do frequency inversion)
   2976  *              number of guard bits in input vector
   2977  *
   2978  * Outputs:     18 output samples, after windowing and overlap-add with last frame
   2979  *              second half of (unwindowed) 36-point IMDCT - save for next time
   2980  *                only save 9 xPrev samples, using symmetry (see WinPrevious())
   2981  *
   2982  * Notes:       this is Ken's hyper-fast algorithm, including symmetric sin window
   2983  *                optimization, if applicable
   2984  *              total number of multiplies, general case:
   2985  *                2*10 (idct9) + 9 (last stage imdct) + 36 (for windowing) = 65
   2986  *              total number of multiplies, btCurr == 0 && btPrev == 0:
   2987  *                2*10 (idct9) + 9 (last stage imdct) + 18 (for windowing) = 47
   2988  *
   2989  *              blockType == 0 is by far the most common case, so it should be
   2990  *                possible to use the fast path most of the time
   2991  *              this is the fastest known algorithm for performing
   2992  *                long IMDCT + windowing + overlap-add in MP3
   2993  *
   2994  * Return:      mOut (OR of abs(y) for all y calculated here)
   2995  **********************************************************************************************************************/
   2996 // barely faster in RAM
   2997 
   2998 int IMDCT36(int *xCurr, int *xPrev, int *y, int btCurr, int btPrev, int blockIdx, int gb){
   2999     int i, es, xBuf[18], xPrevWin[18];
   3000     int acc1, acc2, s, d, t, mOut;
   3001     int xo, xe, c, *xp, yLo, yHi;
   3002     const uint32_t *cp, *wp;
   3003     acc1 = acc2 = 0;
   3004     xCurr += 17;
   3005     /* 7 gb is always adequate for antialias + accumulator loop + idct9 */
   3006     if (gb < 7) {
   3007         /* rarely triggered - 5% to 10% of the time on normal clips (with Q25 input) */
   3008         es = 7 - gb;
   3009         for (i = 8; i >= 0; i--) {
   3010             acc1 = ((*xCurr--) >> es) - acc1;
   3011             acc2 = acc1 - acc2;
   3012             acc1 = ((*xCurr--) >> es) - acc1;
   3013             xBuf[i + 9] = acc2; /* odd */
   3014             xBuf[i + 0] = acc1; /* even */
   3015             xPrev[i] >>= es;
   3016         }
   3017     } else {
   3018         es = 0;
   3019         /* max gain = 18, assume adequate guard bits */
   3020         for (i = 8; i >= 0; i--) {
   3021             acc1 = (*xCurr--) - acc1;
   3022             acc2 = acc1 - acc2;
   3023             acc1 = (*xCurr--) - acc1;
   3024             xBuf[i + 9] = acc2; /* odd */
   3025             xBuf[i + 0] = acc1; /* even */
   3026         }
   3027     }
   3028     /* xEven[0] and xOdd[0] scaled by 0.5 */
   3029     xBuf[9] >>= 1;
   3030     xBuf[0] >>= 1;
   3031 
   3032     /* do 9-point IDCT on even and odd */
   3033     idct9(xBuf + 0); /* even */
   3034     idct9(xBuf + 9); /* odd */
   3035 
   3036     xp = xBuf + 8;
   3037     cp = c18 + 8;
   3038     mOut = 0;
   3039     if (btPrev == 0 && btCurr == 0) {
   3040         /* fast path - use symmetry of sin window to reduce windowing multiplies to 18 (N/2) */
   3041         wp = fastWin36;
   3042         for (i = 0; i < 9; i++) {
   3043             /* do ARM-style pointer arithmetic (i still needed for y[] indexing - compiler spills if 2 y pointers) */
   3044             c = *cp--;
   3045             xo = *(xp + 9);
   3046             xe = *xp--;
   3047             /* gain 2 int bits here */
   3048             xo = MULSHIFT32(c, xo); /* 2*c18*xOdd (mul by 2 implicit in scaling)  */
   3049             xe >>= 2;
   3050 
   3051             s = -(*xPrev); /* sum from last block (always at least 2 guard bits) */
   3052             d = -(xe - xo); /* gain 2 int bits, don't shift xo (effective << 1 to eat sign bit, << 1 for mul by 2) */
   3053             (*xPrev++) = xe + xo; /* symmetry - xPrev[i] = xPrev[17-i] for long blocks */
   3054             t = s - d;
   3055 
   3056             yLo = (d + (MULSHIFT32(t, *wp++) << 2));
   3057             yHi = (s + (MULSHIFT32(t, *wp++) << 2));
   3058             y[(i) * m_NBANDS] = yLo;
   3059             y[(17 - i) * m_NBANDS] = yHi;
   3060             mOut |= FASTABS(yLo);
   3061             mOut |= FASTABS(yHi);
   3062         }
   3063     } else {
   3064         /* slower method - either prev or curr is using window type != 0 so do full 36-point window
   3065          * output xPrevWin has at least 3 guard bits (xPrev has 2, gain 1 in WinPrevious)
   3066          */
   3067         WinPrevious(xPrev, xPrevWin, btPrev);
   3068 
   3069         wp = imdctWin[btCurr];
   3070         for (i = 0; i < 9; i++) {
   3071             c = *cp--;
   3072             xo = *(xp + 9);
   3073             xe = *xp--;
   3074             /* gain 2 int bits here */
   3075             xo = MULSHIFT32(c, xo); /* 2*c18*xOdd (mul by 2 implicit in scaling)  */
   3076             xe >>= 2;
   3077 
   3078             d = xe - xo;
   3079             (*xPrev++) = xe + xo; /* symmetry - xPrev[i] = xPrev[17-i] for long blocks */
   3080 
   3081             yLo = (xPrevWin[i] + MULSHIFT32(d, wp[i])) << 2;
   3082             yHi = (xPrevWin[17 - i] + MULSHIFT32(d, wp[17 - i])) << 2;
   3083             y[(i) * m_NBANDS] = yLo;
   3084             y[(17 - i) * m_NBANDS] = yHi;
   3085             mOut |= FASTABS(yLo);
   3086             mOut |= FASTABS(yHi);
   3087         }
   3088     }
   3089 
   3090     xPrev -= 9;
   3091     mOut |= FreqInvertRescale(y, xPrev, blockIdx, es);
   3092 
   3093     return mOut;
   3094 }
   3095 
   3096 
   3097 
   3098 /* 12-point inverse DCT, used in IMDCT12x3()
   3099  * 4 input guard bits will ensure no overflow
   3100  */
   3101 void imdct12(int *x, int *out) {
   3102     int a0, a1, a2;
   3103     int x0, x1, x2, x3, x4, x5;
   3104 
   3105     x0 = *x;
   3106     x += 3;
   3107     x1 = *x;
   3108     x += 3;
   3109     x2 = *x;
   3110     x += 3;
   3111     x3 = *x;
   3112     x += 3;
   3113     x4 = *x;
   3114     x += 3;
   3115     x5 = *x;
   3116     x += 3;
   3117 
   3118     x4 -= x5;
   3119     x3 -= x4;
   3120     x2 -= x3;
   3121     x3 -= x5;
   3122     x1 -= x2;
   3123     x0 -= x1;
   3124     x1 -= x3;
   3125 
   3126     x0 >>= 1;
   3127     x1 >>= 1;
   3128 
   3129     a0 = MULSHIFT32(c3_0, x2) << 1;
   3130     a1 = x0 + (x4 >> 1);
   3131     a2 = x0 - x4;
   3132     x0 = a1 + a0;
   3133     x2 = a2;
   3134     x4 = a1 - a0;
   3135 
   3136     a0 = MULSHIFT32(c3_0, x3) << 1;
   3137     a1 = x1 + (x5 >> 1);
   3138     a2 = x1 - x5;
   3139 
   3140     /* cos window odd samples, mul by 2, eat sign bit */
   3141     x1 = MULSHIFT32(c6[0], a1 + a0) << 2;
   3142     x3 = MULSHIFT32(c6[1], a2) << 2;
   3143     x5 = MULSHIFT32(c6[2], a1 - a0) << 2;
   3144 
   3145     *out = x0 + x1;
   3146     out++;
   3147     *out = x2 + x3;
   3148     out++;
   3149     *out = x4 + x5;
   3150     out++;
   3151     *out = x4 - x5;
   3152     out++;
   3153     *out = x2 - x3;
   3154     out++;
   3155     *out = x0 - x1;
   3156 }
   3157 
   3158 /***********************************************************************************************************************
   3159  * Function:    IMDCT12x3
   3160  *
   3161  * Description: three 12-point modified DCT's for short blocks, with windowing,
   3162  *                short block concatenation, and overlap-add
   3163  *
   3164  * Inputs:      3 interleaved vectors of 6 samples each
   3165  *                (block0[0], block1[0], block2[0], block0[1], block1[1]....)
   3166  *              overlap part of last IMDCT (9 samples - see output comments)
   3167  *              window type (0,1,2,3) of previous block
   3168  *              current block index (for deciding whether to do frequency inversion)
   3169  *              number of guard bits in input vector
   3170  *
   3171  * Outputs:     updated sample vector x, net gain of 1 integer bit
   3172  *              second half of (unwindowed) IMDCT's - save for next time
   3173  *                only save 9 xPrev samples, using symmetry (see WinPrevious())
   3174  *
   3175  * Return:      mOut (OR of abs(y) for all y calculated here)
   3176  **********************************************************************************************************************/
   3177 // barely faster in RAM
   3178 int IMDCT12x3(int *xCurr, int *xPrev, int *y, int btPrev, int blockIdx, int gb){
   3179     int i, es, mOut, yLo, xBuf[18], xPrevWin[18]; /* need temp buffer for reordering short blocks */
   3180     const uint32_t *wp;
   3181     es = 0;
   3182     /* 7 gb is always adequate for accumulator loop + idct12 + window + overlap */
   3183     if (gb < 7) {
   3184         es = 7 - gb;
   3185         for (i = 0; i < 18; i += 2) {
   3186             xCurr[i + 0] >>= es;
   3187             xCurr[i + 1] >>= es;
   3188             *xPrev++ >>= es;
   3189         }
   3190         xPrev -= 9;
   3191     }
   3192 
   3193     /* requires 4 input guard bits for each imdct12 */
   3194     imdct12(xCurr + 0, xBuf + 0);
   3195     imdct12(xCurr + 1, xBuf + 6);
   3196     imdct12(xCurr + 2, xBuf + 12);
   3197 
   3198     /* window previous from last time */
   3199     WinPrevious(xPrev, xPrevWin, btPrev);
   3200 
   3201     /* could unroll this for speed, minimum loads (short blocks usually rare, so doesn't make much overall difference)
   3202      * xPrevWin[i] << 2 still has 1 gb always, max gain of windowed xBuf stuff also < 1.0 and gain the sign bit
   3203      * so y calculations won't overflow
   3204      */
   3205     wp = imdctWin[2];
   3206     mOut = 0;
   3207     for (i = 0; i < 3; i++) {
   3208         yLo = (xPrevWin[0 + i] << 2);
   3209         mOut |= FASTABS(yLo);
   3210         y[(0 + i) * m_NBANDS] = yLo;
   3211         yLo = (xPrevWin[3 + i] << 2);
   3212         mOut |= FASTABS(yLo);
   3213         y[(3 + i) * m_NBANDS] = yLo;
   3214         yLo = (xPrevWin[6 + i] << 2) + (MULSHIFT32(wp[0 + i], xBuf[3 + i]));
   3215         mOut |= FASTABS(yLo);
   3216         y[(6 + i) * m_NBANDS] = yLo;
   3217         yLo = (xPrevWin[9 + i] << 2) + (MULSHIFT32(wp[3 + i], xBuf[5 - i]));
   3218         mOut |= FASTABS(yLo);
   3219         y[(9 + i) * m_NBANDS] = yLo;
   3220         yLo = (xPrevWin[12 + i] << 2)
   3221                 + (MULSHIFT32(wp[6 + i], xBuf[2 - i])
   3222                         + MULSHIFT32(wp[0 + i], xBuf[(6 + 3) + i]));
   3223         mOut |= FASTABS(yLo);
   3224         y[(12 + i) * m_NBANDS] = yLo;
   3225         yLo = (xPrevWin[15 + i] << 2)
   3226                 + (MULSHIFT32(wp[9 + i], xBuf[0 + i])
   3227                         + MULSHIFT32(wp[3 + i], xBuf[(6 + 5) - i]));
   3228         mOut |= FASTABS(yLo);
   3229         y[(15 + i) * m_NBANDS] = yLo;
   3230     }
   3231 
   3232     /* save previous (unwindowed) for overlap - only need samples 6-8, 12-17 */
   3233     for (i = 6; i < 9; i++)
   3234         *xPrev++ = xBuf[i] >> 2;
   3235     for (i = 12; i < 18; i++)
   3236         *xPrev++ = xBuf[i] >> 2;
   3237 
   3238     xPrev -= 9;
   3239     mOut |= FreqInvertRescale(y, xPrev, blockIdx, es);
   3240 
   3241     return mOut;
   3242 }
   3243 
   3244 /***********************************************************************************************************************
   3245  * Function:    HybridTransform
   3246  *
   3247  * Description: IMDCT's, windowing, and overlap-add on long/short/mixed blocks
   3248  *
   3249  * Inputs:      vector of input coefficients, length = nBlocksTotal * 18)
   3250  *              vector of overlap samples from last time, length = nBlocksPrev * 9)
   3251  *              buffer for output samples, length = MAXNSAMP
   3252  *              SideInfoSub struct for this granule/channel
   3253  *              BlockCount struct with necessary info
   3254  *                number of non-zero input and overlap blocks
   3255  *                number of long blocks in input vector (rest assumed to be short blocks)
   3256  *                number of blocks which use long window (type) 0 in case of mixed block
   3257  *                  (bc->currWinSwitch, 0 for non-mixed blocks)
   3258  *
   3259  * Outputs:     transformed, windowed, and overlapped sample buffer
   3260  *              does frequency inversion on odd blocks
   3261  *              updated buffer of samples for overlap
   3262  *
   3263  * Return:      number of non-zero IMDCT blocks calculated in this call
   3264  *                (including overlap-add)
   3265  **********************************************************************************************************************/
   3266 int HybridTransform(int *xCurr, int *xPrev, int y[m_BLOCK_SIZE][m_NBANDS], SideInfoSub_t *sis, BlockCount_t *bc){
   3267     int xPrevWin[18], currWinIdx, prevWinIdx;
   3268     int i, j, nBlocksOut, nonZero, mOut;
   3269     int fiBit, xp;
   3270 
   3271     assert(bc->nBlocksLong  <= m_NBANDS);
   3272     assert(bc->nBlocksTotal <= m_NBANDS);
   3273     assert(bc->nBlocksPrev  <= m_NBANDS);
   3274 
   3275     mOut = 0;
   3276 
   3277     /* do long blocks, if any */
   3278     for (i = 0; i < bc->nBlocksLong; i++) {
   3279         /* currWinIdx picks the right window for long blocks (if mixed, long blocks use window type 0) */
   3280         currWinIdx = sis->blockType;
   3281         if (sis->mixedBlock && i < bc->currWinSwitch)
   3282             currWinIdx = 0;
   3283 
   3284         prevWinIdx = bc->prevType;
   3285         if (i < bc->prevWinSwitch)
   3286             prevWinIdx = 0;
   3287 
   3288         /* do 36-point IMDCT, including windowing and overlap-add */
   3289         mOut |= IMDCT36(xCurr, xPrev, &(y[0][i]), currWinIdx, prevWinIdx, i,
   3290                 bc->gbIn);
   3291         xCurr += 18;
   3292         xPrev += 9;
   3293     }
   3294 
   3295     /* do short blocks (if any) */
   3296     for (; i < bc->nBlocksTotal; i++) {
   3297         assert(sis->blockType == 2);
   3298 
   3299         prevWinIdx = bc->prevType;
   3300         if (i < bc->prevWinSwitch)
   3301             prevWinIdx = 0;
   3302 
   3303         mOut |= IMDCT12x3(xCurr, xPrev, &(y[0][i]), prevWinIdx, i, bc->gbIn);
   3304         xCurr += 18;
   3305         xPrev += 9;
   3306     }
   3307     nBlocksOut = i;
   3308 
   3309     /* window and overlap prev if prev longer that current */
   3310     for (; i < bc->nBlocksPrev; i++) {
   3311         prevWinIdx = bc->prevType;
   3312         if (i < bc->prevWinSwitch)
   3313             prevWinIdx = 0;
   3314         WinPrevious(xPrev, xPrevWin, prevWinIdx);
   3315 
   3316         nonZero = 0;
   3317         fiBit = i << 31;
   3318         for (j = 0; j < 9; j++) {
   3319             xp = xPrevWin[2 * j + 0] << 2; /* << 2 temp for scaling */
   3320             nonZero |= xp;
   3321             y[2 * j + 0][i] = xp;
   3322             mOut |= FASTABS(xp);
   3323 
   3324             /* frequency inversion on odd blocks/odd samples (flip sign if i odd, j odd) */
   3325             xp = xPrevWin[2 * j + 1] << 2;
   3326             xp = (xp ^ (fiBit >> 31)) + (i & 0x01);
   3327             nonZero |= xp;
   3328             y[2 * j + 1][i] = xp;
   3329             mOut |= FASTABS(xp);
   3330 
   3331             xPrev[j] = 0;
   3332         }
   3333         xPrev += 9;
   3334         if (nonZero)
   3335             nBlocksOut = i;
   3336     }
   3337 
   3338     /* clear rest of blocks */
   3339     for (; i < 32; i++) {
   3340         for (j = 0; j < 18; j++)
   3341             y[j][i] = 0;
   3342     }
   3343 
   3344     bc->gbOut = CLZ(mOut) - 1;
   3345 
   3346     return nBlocksOut;
   3347 }
   3348 
   3349 /***********************************************************************************************************************
   3350  * Function:    IMDCT
   3351  *
   3352  * Description: do alias reduction, inverse MDCT, overlap-add, and frequency inversion
   3353  *
   3354  * Inputs:      MP3DecInfo structure filled by UnpackFrameHeader(), UnpackSideInfo(),
   3355  *                UnpackScaleFactors(), and DecodeHuffman() (for this granule, channel)
   3356  *                includes PCM samples in overBuf (from last call to IMDCT) for OLA
   3357  *              index of current granule and channel
   3358  *
   3359  * Outputs:     PCM samples in outBuf, for input to subband transform
   3360  *              PCM samples in overBuf, for OLA next time
   3361  *              updated hi->nonZeroBound index for this channel
   3362  *
   3363  * Return:      0 on success,  -1 if null input pointers
   3364  **********************************************************************************************************************/
   3365 // a bit faster in RAM
   3366 /*__attribute__ ((section (".data")))*/
   3367 int IMDCT( int gr, int ch) {
   3368     int nBfly, blockCutoff;
   3369     BlockCount_t bc;
   3370 
   3371     /* m_SideInfo is an array of up to 4 structs, stored as gr0ch0, gr0ch1, gr1ch0, gr1ch1 */
   3372     /* anti-aliasing done on whole long blocks only
   3373      * for mixed blocks, nBfly always 1, except 3 for 8 kHz MPEG 2.5 (see sfBandTab)
   3374      *   nLongBlocks = number of blocks with (possibly) non-zero power
   3375      *   nBfly = number of butterflies to do (nLongBlocks - 1, unless no long blocks)
   3376      */
   3377     blockCutoff = m_SFBandTable.l[(m_MPEGVersion == MPEG1 ? 8 : 6)] / 18; /* same as 3* num short sfb's in spec */
   3378     if (m_SideInfoSub[gr][ch].blockType != 2) {
   3379         /* all long transforms */
   3380         int x=(m_HuffmanInfo->nonZeroBound[ch] + 7) / 18 + 1;
   3381         bc.nBlocksLong=(x<32 ? x : 32);
   3382         //bc.nBlocksLong = min((hi->nonZeroBound[ch] + 7) / 18 + 1, 32);
   3383         nBfly = bc.nBlocksLong - 1;
   3384     } else if (m_SideInfoSub[gr][ch].blockType == 2 && m_SideInfoSub[gr][ch].mixedBlock) {
   3385         /* mixed block - long transforms until cutoff, then short transforms */
   3386         bc.nBlocksLong = blockCutoff;
   3387         nBfly = bc.nBlocksLong - 1;
   3388     } else {
   3389         /* all short transforms */
   3390         bc.nBlocksLong = 0;
   3391         nBfly = 0;
   3392     }
   3393 
   3394     AntiAlias(m_HuffmanInfo->huffDecBuf[ch], nBfly);
   3395     int x=m_HuffmanInfo->nonZeroBound[ch];
   3396     int y=nBfly * 18 + 8;
   3397     m_HuffmanInfo->nonZeroBound[ch]=(x>y ? x: y);
   3398 
   3399     assert(m_HuffmanInfo->nonZeroBound[ch] <= m_MAX_NSAMP);
   3400 
   3401     /* for readability, use a struct instead of passing a million parameters to HybridTransform() */
   3402     bc.nBlocksTotal = (m_HuffmanInfo->nonZeroBound[ch] + 17) / 18;
   3403     bc.nBlocksPrev = m_IMDCTInfo->numPrevIMDCT[ch];
   3404     bc.prevType = m_IMDCTInfo->prevType[ch];
   3405     bc.prevWinSwitch = m_IMDCTInfo->prevWinSwitch[ch];
   3406     /* where WINDOW switches (not nec. transform) */
   3407     bc.currWinSwitch = (m_SideInfoSub[gr][ch].mixedBlock ? blockCutoff : 0);
   3408     bc.gbIn = m_HuffmanInfo->gb[ch];
   3409 
   3410     m_IMDCTInfo->numPrevIMDCT[ch] = HybridTransform(m_HuffmanInfo->huffDecBuf[ch], m_IMDCTInfo->overBuf[ch],
   3411             m_IMDCTInfo->outBuf[ch], &m_SideInfoSub[gr][ch], &bc);
   3412     m_IMDCTInfo->prevType[ch] = m_SideInfoSub[gr][ch].blockType;
   3413     m_IMDCTInfo->prevWinSwitch[ch] = bc.currWinSwitch; /* 0 means not a mixed block (either all short or all long) */
   3414     m_IMDCTInfo->gb[ch] = bc.gbOut;
   3415 
   3416     assert(m_IMDCTInfo->numPrevIMDCT[ch] <= m_NBANDS);
   3417 
   3418     /* output has gained 2 int bits */
   3419     return 0;
   3420 }
   3421 
   3422 /***********************************************************************************************************************
   3423  * S U B B A N D
   3424  **********************************************************************************************************************/
   3425 
   3426 /***********************************************************************************************************************
   3427  * Function:    Subband
   3428  *
   3429  * Description: do subband transform on all the blocks in one granule, all channels
   3430  *
   3431  * Inputs:      filled MP3DecInfo structure, after calling IMDCT for all channels
   3432  *              vbuf[ch] and vindex[ch] must be preserved between calls
   3433  *
   3434  * Outputs:     decoded PCM data, interleaved LRLRLR... if stereo
   3435  *
   3436  * Return:      0 on success,  -1 if null input pointers
   3437  **********************************************************************************************************************/
   3438 int Subband( short *pcmBuf) {
   3439     int b;
   3440     if (m_MP3DecInfo->nChans == 2) {
   3441         /* stereo */
   3442         for (b = 0; b < m_BLOCK_SIZE; b++) {
   3443             FDCT32(m_IMDCTInfo->outBuf[0][b], m_SubbandInfo->vbuf + 0 * 32, m_SubbandInfo->vindex,
   3444                     (b & 0x01), m_IMDCTInfo->gb[0]);
   3445             FDCT32(m_IMDCTInfo->outBuf[1][b], m_SubbandInfo->vbuf + 1 * 32, m_SubbandInfo->vindex,
   3446                     (b & 0x01), m_IMDCTInfo->gb[1]);
   3447             PolyphaseStereo(pcmBuf,
   3448                     m_SubbandInfo->vbuf + m_SubbandInfo->vindex + m_VBUF_LENGTH * (b & 0x01),
   3449                     polyCoef);
   3450             m_SubbandInfo->vindex = (m_SubbandInfo->vindex - (b & 0x01)) & 7;
   3451             pcmBuf += (2 * m_NBANDS);
   3452         }
   3453     } else {
   3454         /* mono */
   3455         for (b = 0; b < m_BLOCK_SIZE; b++) {
   3456             FDCT32(m_IMDCTInfo->outBuf[0][b], m_SubbandInfo->vbuf + 0 * 32, m_SubbandInfo->vindex,
   3457                     (b & 0x01), m_IMDCTInfo->gb[0]);
   3458             PolyphaseMono(pcmBuf,
   3459                     m_SubbandInfo->vbuf + m_SubbandInfo->vindex + m_VBUF_LENGTH * (b & 0x01),
   3460                     polyCoef);
   3461             m_SubbandInfo->vindex = (m_SubbandInfo->vindex - (b & 0x01)) & 7;
   3462             pcmBuf += m_NBANDS;
   3463         }
   3464     }
   3465 
   3466     return 0;
   3467 }
   3468 
   3469 /***********************************************************************************************************************
   3470  * D C T 3 2
   3471  **********************************************************************************************************************/
   3472 
   3473 /***********************************************************************************************************************
   3474  * Function:    FDCT32
   3475  *
   3476  * Description: Ken's highly-optimized 32-point DCT (radix-4 + radix-8)
   3477  *
   3478  * Inputs:      input buffer, length = 32 samples
   3479  *              require at least 6 guard bits in input vector x to avoid possibility
   3480  *                of overflow in internal calculations (see bbtest_imdct test app)
   3481  *              buffer offset and oddblock flag for polyphase filter input buffer
   3482  *              number of guard bits in input
   3483  *
   3484  * Outputs:     output buffer, data copied and interleaved for polyphase filter
   3485  *              no guarantees about number of guard bits in output
   3486  *
   3487  * Return:      none
   3488  *
   3489  * Notes:       number of muls = 4*8 + 12*4 = 80
   3490  *              final stage of DCT is hardcoded to shuffle data into the proper order
   3491  *                for the polyphase filterbank
   3492  *              fully unrolled stage 1, for max precision (scale the 1/cos() factors
   3493  *                differently, depending on magnitude)
   3494  *              guard bit analysis verified by exhaustive testing of all 2^32
   3495  *                combinations of max pos/max neg values in x[]
   3496  **********************************************************************************************************************/
   3497 #define D32FP(i, s1, s2) { \
   3498     a0 = buf[i];			a3 = buf[31-i]; \
   3499 	a1 = buf[15-i];			a2 = buf[16+i]; \
   3500     b0 = a0 + a3;			b3 = MULSHIFT32(*cptr++, a0 - a3) << 1;	\
   3501 	b1 = a1 + a2;			b2 = MULSHIFT32(*cptr++, a1 - a2) << (s1);	\
   3502 	buf[i] = b0 + b1;		buf[15-i] = MULSHIFT32(*cptr,   b0 - b1) << (s2); \
   3503 	buf[16+i] = b2 + b3;    buf[31-i] = MULSHIFT32(*cptr++, b3 - b2) << (s2); \
   3504 }
   3505 
   3506 static const uint8_t FDCT32s1s2[16] = {5,3,3,2,2,1,1,1, 1,1,1,1,1,2,2,4};
   3507 
   3508 void FDCT32(int *buf, int *dest, int offset, int oddBlock, int gb) {
   3509     int i, s, tmp, es;
   3510     const int *cptr = (const int*)m_dcttab;
   3511     int a0, a1, a2, a3, a4, a5, a6, a7;
   3512     int b0, b1, b2, b3, b4, b5, b6, b7;
   3513 	int *d;
   3514 
   3515 	/* scaling - ensure at least 6 guard bits for DCT
   3516 	 * (in practice this is already true 99% of time, so this code is
   3517 	 *  almost never triggered)
   3518 	 */
   3519 	es = 0;
   3520 	if (gb < 6) {
   3521 		es = 6 - gb;
   3522 		for (i = 0; i < 32; i++)
   3523 			buf[i] >>= es;
   3524 	}
   3525 
   3526 	/* first pass */
   3527     for (unsigned i=0; i < 8; i++) {
   3528         D32FP(i, FDCT32s1s2[0 + i], FDCT32s1s2[8 + i]);
   3529     }
   3530 
   3531 	/* second pass */
   3532 	for (i = 4; i > 0; i--) {
   3533 		a0 = buf[0]; 	    a7 = buf[7];		a3 = buf[3];	    a4 = buf[4];
   3534 		b0 = a0 + a7;	    b7 = MULSHIFT32(*cptr++, a0 - a7) << 1;
   3535 		b3 = a3 + a4;	    b4 = MULSHIFT32(*cptr++, a3 - a4) << 3;
   3536 		a0 = b0 + b3;	    a3 = MULSHIFT32(*cptr,   b0 - b3) << 1;
   3537 		a4 = b4 + b7;		a7 = MULSHIFT32(*cptr++, b7 - b4) << 1;
   3538 
   3539 		a1 = buf[1];	    a6 = buf[6];	    a2 = buf[2];	    a5 = buf[5];
   3540 		b1 = a1 + a6;	    b6 = MULSHIFT32(*cptr++, a1 - a6) << 1;
   3541 		b2 = a2 + a5;	    b5 = MULSHIFT32(*cptr++, a2 - a5) << 1;
   3542 		a1 = b1 + b2;		a2 = MULSHIFT32(*cptr,   b1 - b2) << 2;
   3543 		a5 = b5 + b6;	    a6 = MULSHIFT32(*cptr++, b6 - b5) << 2;
   3544 
   3545 		b0 = a0 + a1;	    b1 = MULSHIFT32(m_COS4_0, a0 - a1) << 1;
   3546 		b2 = a2 + a3;	    b3 = MULSHIFT32(m_COS4_0, a3 - a2) << 1;
   3547 		buf[0] = b0;	    buf[1] = b1;
   3548 		buf[2] = b2 + b3;	buf[3] = b3;
   3549 
   3550 		b4 = a4 + a5;	    b5 = MULSHIFT32(m_COS4_0, a4 - a5) << 1;
   3551 		b6 = a6 + a7;	    b7 = MULSHIFT32(m_COS4_0, a7 - a6) << 1;
   3552 		b6 += b7;
   3553 		buf[4] = b4 + b6;	buf[5] = b5 + b7;
   3554 		buf[6] = b5 + b6;	buf[7] = b7;
   3555 
   3556 		buf += 8;
   3557 	}
   3558 	buf -= 32;	/* reset */
   3559 
   3560 	/* sample 0 - always delayed one block */
   3561 	d = dest + 64*16 + ((offset - oddBlock) & 7) + (oddBlock ? 0 : m_VBUF_LENGTH);
   3562 	s = buf[ 0];				d[0] = d[8] = s;
   3563 
   3564 	/* samples 16 to 31 */
   3565 	d = dest + offset + (oddBlock ? m_VBUF_LENGTH  : 0);
   3566 
   3567 	s = buf[ 1];				d[0] = d[8] = s;	d += 64;
   3568 
   3569 	tmp = buf[25] + buf[29];
   3570 	s = buf[17] + tmp;			d[0] = d[8] = s;	d += 64;
   3571 	s = buf[ 9] + buf[13];		d[0] = d[8] = s;	d += 64;
   3572 	s = buf[21] + tmp;			d[0] = d[8] = s;	d += 64;
   3573 
   3574 	tmp = buf[29] + buf[27];
   3575 	s = buf[ 5];				d[0] = d[8] = s;	d += 64;
   3576 	s = buf[21] + tmp;			d[0] = d[8] = s;	d += 64;
   3577 	s = buf[13] + buf[11];		d[0] = d[8] = s;	d += 64;
   3578 	s = buf[19] + tmp;			d[0] = d[8] = s;	d += 64;
   3579 
   3580 	tmp = buf[27] + buf[31];
   3581 	s = buf[ 3];				d[0] = d[8] = s;	d += 64;
   3582 	s = buf[19] + tmp;			d[0] = d[8] = s;	d += 64;
   3583 	s = buf[11] + buf[15];		d[0] = d[8] = s;	d += 64;
   3584 	s = buf[23] + tmp;			d[0] = d[8] = s;	d += 64;
   3585 
   3586 	tmp = buf[31];
   3587 	s = buf[ 7];				d[0] = d[8] = s;	d += 64;
   3588 	s = buf[23] + tmp;			d[0] = d[8] = s;	d += 64;
   3589 	s = buf[15];				d[0] = d[8] = s;	d += 64;
   3590 	s = tmp;					d[0] = d[8] = s;
   3591 
   3592 	/* samples 16 to 1 (sample 16 used again) */
   3593 	d = dest + 16 + ((offset - oddBlock) & 7) + (oddBlock ? 0 : m_VBUF_LENGTH);
   3594 
   3595 	s = buf[ 1];				d[0] = d[8] = s;	d += 64;
   3596 
   3597 	tmp = buf[30] + buf[25];
   3598 	s = buf[17] + tmp;			d[0] = d[8] = s;	d += 64;
   3599 	s = buf[14] + buf[ 9];		d[0] = d[8] = s;	d += 64;
   3600 	s = buf[22] + tmp;			d[0] = d[8] = s;	d += 64;
   3601 	s = buf[ 6];				d[0] = d[8] = s;	d += 64;
   3602 
   3603 	tmp = buf[26] + buf[30];
   3604 	s = buf[22] + tmp;			d[0] = d[8] = s;	d += 64;
   3605 	s = buf[10] + buf[14];		d[0] = d[8] = s;	d += 64;
   3606 	s = buf[18] + tmp;			d[0] = d[8] = s;	d += 64;
   3607 	s = buf[ 2];				d[0] = d[8] = s;	d += 64;
   3608 
   3609 	tmp = buf[28] + buf[26];
   3610 	s = buf[18] + tmp;			d[0] = d[8] = s;	d += 64;
   3611 	s = buf[12] + buf[10];		d[0] = d[8] = s;	d += 64;
   3612 	s = buf[20] + tmp;			d[0] = d[8] = s;	d += 64;
   3613 	s = buf[ 4];				d[0] = d[8] = s;	d += 64;
   3614 
   3615 	tmp = buf[24] + buf[28];
   3616 	s = buf[20] + tmp;			d[0] = d[8] = s;	d += 64;
   3617 	s = buf[ 8] + buf[12];		d[0] = d[8] = s;	d += 64;
   3618 	s = buf[16] + tmp;			d[0] = d[8] = s;
   3619 
   3620 	/* this is so rarely invoked that it's not worth making two versions of the output
   3621 	 *   shuffle code (one for no shift, one for clip + variable shift) like in IMDCT
   3622 	 * here we just load, clip, shift, and store on the rare instances that es != 0
   3623 	 */
   3624 	if (es) {
   3625 		d = dest + 64*16 + ((offset - oddBlock) & 7) + (oddBlock ? 0 : m_VBUF_LENGTH);
   3626 		s = d[0];	CLIP_2N(s, (31 - es));	d[0] = d[8] = (s << es);
   3627 
   3628 		d = dest + offset + (oddBlock ? m_VBUF_LENGTH  : 0);
   3629 		for (i = 16; i <= 31; i++) {
   3630 			s = d[0];	CLIP_2N(s, (31 - es));	d[0] = d[8] = (s << es);	d += 64;
   3631 		}
   3632 
   3633 		d = dest + 16 + ((offset - oddBlock) & 7) + (oddBlock ? 0 : m_VBUF_LENGTH);
   3634 		for (i = 15; i >= 0; i--) {
   3635 			s = d[0];	CLIP_2N(s, (31 - es));	d[0] = d[8] = (s << es);	d += 64;
   3636 		}
   3637 	}
   3638 }
   3639 
   3640 /***********************************************************************************************************************
   3641  * P O L Y P H A S E
   3642  **********************************************************************************************************************/
   3643 inline
   3644 short ClipToShort(int x, int fracBits){
   3645 
   3646     /* assumes you've already rounded (x += (1 << (fracBits-1))) */
   3647     x >>= fracBits;
   3648 
   3649 #ifndef __XTENSA__
   3650     /* Ken's trick: clips to [-32768, 32767] */
   3651     //ok vor generic case (fb)
   3652     int sign = x >> 31;
   3653     if (sign != (x >> 15))
   3654         x = sign ^ ((1 << 15) - 1);
   3655 
   3656     return (short)x;
   3657 #else
   3658     //this is better on xtensa (fb)
   3659     asm ("clamps %0, %1, 15" : "=a" (x) : "a" (x) : );
   3660     return x;
   3661 #endif
   3662 }
   3663 /***********************************************************************************************************************
   3664  * Function:    PolyphaseMono
   3665  *
   3666  * Description: filter one subband and produce 32 output PCM samples for one channel
   3667  *
   3668  * Inputs:      pointer to PCM output buffer
   3669  *              number of "extra shifts" (vbuf format = Q(DQ_FRACBITS_OUT-2))
   3670  *              pointer to start of vbuf (preserved from last call)
   3671  *              start of filter coefficient table (in proper, shuffled order)
   3672  *              no minimum number of guard bits is required for input vbuf
   3673  *                (see additional scaling comments below)
   3674  *
   3675  * Outputs:     32 samples of one channel of decoded PCM data, (i.e. Q16.0)
   3676  *
   3677  * Return:      none
   3678  **********************************************************************************************************************/
   3679 void PolyphaseMono(short *pcm, int *vbuf, const uint32_t *coefBase){
   3680     int i;
   3681     const uint32_t *coef;
   3682     int *vb1;
   3683     int vLo, vHi, c1, c2;
   3684     uint64_t sum1L, sum2L, rndVal;
   3685 
   3686     rndVal = (uint64_t)( 1ULL << ((m_DQ_FRACBITS_OUT - 2 - 2 - 15) - 1 + (32 - m_CSHIFT)) );
   3687 
   3688     /* special case, output sample 0 */
   3689     coef = coefBase;
   3690     vb1 = vbuf;
   3691     sum1L = rndVal;
   3692     for(int j=0; j<8; j++){
   3693         c1=*coef; coef++; c2=*coef; coef++; vLo=*(vb1+(j)); vHi=*(vb1+(23-(j))); // 0...7
   3694         sum1L=MADD64(sum1L, vLo, c1); sum1L=MADD64(sum1L, vHi, -c2);
   3695     }
   3696     *(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-m_CSHIFT)), m_DQ_FRACBITS_OUT - 2 - 2 - 15);
   3697 
   3698     /* special case, output sample 16 */
   3699     coef = coefBase + 256;
   3700     vb1 = vbuf + 64*16;
   3701     sum1L = rndVal;
   3702     for(int j=0; j<8; j++){
   3703         c1=*coef; coef++; vLo=*(vb1+(j)); sum1L = MADD64(sum1L, vLo,  c1); // 0...7
   3704     }
   3705     *(pcm + 16) = ClipToShort((int)SAR64(sum1L, (32-m_CSHIFT)), m_DQ_FRACBITS_OUT - 2 - 2 - 15);
   3706 
   3707     /* main convolution loop: sum1L = samples 1, 2, 3, ... 15   sum2L = samples 31, 30, ... 17 */
   3708     coef = coefBase + 16;
   3709     vb1 = vbuf + 64;
   3710     pcm++;
   3711 
   3712     /* right now, the compiler creates bad asm from this... */
   3713     for (i = 15; i > 0; i--) {
   3714         sum1L = sum2L = rndVal;
   3715         for(int j=0; j<8; j++){
   3716             c1=*coef; coef++; c2=*coef; coef++; vLo=*(vb1+(j)); vHi = *(vb1+(23-(j)));
   3717             sum1L=MADD64(sum1L, vLo,  c1); sum2L = MADD64(sum2L, vLo,  c2);
   3718             sum1L=MADD64(sum1L, vHi, -c2); sum2L = MADD64(sum2L, vHi,  c1);
   3719         }
   3720         vb1 += 64;
   3721         *(pcm)       = ClipToShort((int)SAR64(sum1L, (32-m_CSHIFT)), m_DQ_FRACBITS_OUT - 2 - 2 - 15);
   3722         *(pcm + 2*i) = ClipToShort((int)SAR64(sum2L, (32-m_CSHIFT)), m_DQ_FRACBITS_OUT - 2 - 2 - 15);
   3723         pcm++;
   3724     }
   3725 }
   3726 /***********************************************************************************************************************
   3727  * Function:    PolyphaseStereo
   3728  *
   3729  * Description: filter one subband and produce 32 output PCM samples for each channel
   3730  *
   3731  * Inputs:      pointer to PCM output buffer
   3732  *              number of "extra shifts" (vbuf format = Q(DQ_FRACBITS_OUT-2))
   3733  *              pointer to start of vbuf (preserved from last call)
   3734  *              start of filter coefficient table (in proper, shuffled order)
   3735  *              no minimum number of guard bits is required for input vbuf
   3736  *                (see additional scaling comments below)
   3737  *
   3738  * Outputs:     32 samples of two channels of decoded PCM data, (i.e. Q16.0)
   3739  *
   3740  * Return:      none
   3741  *
   3742  * Notes:       interleaves PCM samples LRLRLR...
   3743  **********************************************************************************************************************/
   3744 void PolyphaseStereo(short *pcm, int *vbuf, const uint32_t *coefBase){
   3745     int i;
   3746     const uint32_t *coef;
   3747     int *vb1;
   3748     int vLo, vHi, c1, c2;
   3749     uint64_t sum1L, sum2L, sum1R, sum2R, rndVal;
   3750 
   3751     rndVal = (uint64_t)( 1 << ((m_DQ_FRACBITS_OUT - 2 - 2 - 15) - 1 + (32 - m_CSHIFT)) );
   3752 
   3753     /* special case, output sample 0 */
   3754     coef = coefBase;
   3755     vb1 = vbuf;
   3756     sum1L = sum1R = rndVal;
   3757 
   3758     for(int j=0; j<8; j++){
   3759         c1=*coef; coef++; c2=*coef; coef++; vLo=*(vb1+(j)); vHi = *(vb1+(23-(j)));
   3760         sum1L=MADD64(sum1L, vLo,  c1); sum1L=MADD64(sum1L, vHi, -c2);
   3761         vLo=*(vb1+32+(j)); vHi=*(vb1+32+(23-(j)));
   3762         sum1R=MADD64(sum1R, vLo,  c1); sum1R=MADD64(sum1R, vHi, -c2); \
   3763     }
   3764     *(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-m_CSHIFT)), m_DQ_FRACBITS_OUT - 2 - 2 - 15);
   3765     *(pcm + 1) = ClipToShort((int)SAR64(sum1R, (32-m_CSHIFT)), m_DQ_FRACBITS_OUT - 2 - 2 - 15);
   3766 
   3767     /* special case, output sample 16 */
   3768     coef = coefBase + 256;
   3769     vb1 = vbuf + 64*16;
   3770     sum1L = sum1R = rndVal;
   3771 
   3772     for(int j=0; j<8; j++){
   3773         c1=*coef; coef++; vLo = *(vb1+(j)); sum1L = MADD64(sum1L, vLo,  c1);
   3774         vLo = *(vb1+32+(j)); sum1R = MADD64(sum1R, vLo,  c1);
   3775     }
   3776     *(pcm + 2*16 + 0) = ClipToShort((int)SAR64(sum1L, (32-m_CSHIFT)), m_DQ_FRACBITS_OUT - 2 - 2 - 15);
   3777     *(pcm + 2*16 + 1) = ClipToShort((int)SAR64(sum1R, (32-m_CSHIFT)), m_DQ_FRACBITS_OUT - 2 - 2 - 15);
   3778 
   3779     /* main convolution loop: sum1L = samples 1, 2, 3, ... 15   sum2L = samples 31, 30, ... 17 */
   3780     coef = coefBase + 16;
   3781     vb1 = vbuf + 64;
   3782     pcm += 2;
   3783 
   3784     /* right now, the compiler creates bad asm from this... */
   3785     for (i = 15; i > 0; i--) {
   3786         sum1L = sum2L = rndVal;
   3787         sum1R = sum2R = rndVal;
   3788 
   3789         for(int j=0; j<8; j++){
   3790             c1=*coef; coef++; c2=*coef; coef++; vLo=*(vb1+(j)); vHi = *(vb1+(23-(j)));
   3791             sum1L=MADD64(sum1L, vLo,  c1); sum2L=MADD64(sum2L, vLo,  c2);
   3792             sum1L=MADD64(sum1L, vHi, -c2); sum2L=MADD64(sum2L, vHi,  c1);
   3793             vLo=*(vb1+32+(j));  vHi=*(vb1+32+(23-(j)));
   3794             sum1R=MADD64(sum1R, vLo,  c1); sum2R=MADD64(sum2R, vLo,  c2);
   3795             sum1R=MADD64(sum1R, vHi, -c2); sum2R=MADD64(sum2R, vHi,  c1);
   3796         }
   3797         vb1 += 64;
   3798         *(pcm + 0)         = ClipToShort((int)SAR64(sum1L, (32-m_CSHIFT)), m_DQ_FRACBITS_OUT - 2 - 2 - 15);
   3799         *(pcm + 1)         = ClipToShort((int)SAR64(sum1R, (32-m_CSHIFT)), m_DQ_FRACBITS_OUT - 2 - 2 - 15);
   3800         *(pcm + 2*2*i + 0) = ClipToShort((int)SAR64(sum2L, (32-m_CSHIFT)), m_DQ_FRACBITS_OUT - 2 - 2 - 15);
   3801         *(pcm + 2*2*i + 1) = ClipToShort((int)SAR64(sum2R, (32-m_CSHIFT)), m_DQ_FRACBITS_OUT - 2 - 2 - 15);
   3802         pcm += 2;
   3803     }
   3804 }