/*
 * Copyright 2002-2004 The Apache Software Foundation.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * $Id: uniconv.cpp 176026 2004-09-08 13:57:07Z peiyongz $
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <ctype.h>
#include <cunhc.h>
#include "ccsid.h"
#include "uniconv.h"

XERCES_CPP_NAMESPACE_BEGIN

#define WORK_BUFFER_SIZE 16*1024
#define DDA_NEEDED CUNBCPRM_DDA_REQ
#define RETRY_THRESHOLD 10000

// This is utility routine which strips '-', '_' and spaces from the name and
// also upper cases the name. It also returns the length of the string.
static int stripNameCopy(const char *s,char *d,int max)
{
   int si=0;
   int di=0;

   while ( (s[si] != '\0') && (di < max) ) {
      if ( (s[si] == ' ') || (s[si] == '_') || (s[si] == '-') )
         si++;
      else {
         d[di] = toupper(s[si]);
         si++;di++;
      }
   }
   d[di] = 0;
   if (s[si] != '\0')
      return -1;
   return si;
}

// This takes a name and does a lookup into the ccsid table (from ccsid.h)
// to find the corresponding ccsid. It also checks if the string ends in s390
// and returns that information to the caller.
// The lookup into the table is done via a binary search since we know that the
// table was nicely sorted for us.
static int getccsid(const char *s,int * is390)
{
   char tmpstr[_AE_MAX_CODESET_NAME_LENGTH];
   int start;
   int limit;
   int index;
   int result;
   int thelen;

   // Clean up the name....
   if (s == NULL)
      return -1;
   if ((thelen = stripNameCopy(s,tmpstr,_AE_MAX_CODESET_NAME_LENGTH-1)) == -1)
      return -1;

   // Check for the S390 string in the name
   *is390 = 0;
   if ( (strstr((char *)tmpstr, "S390")) != NULL )
      *is390 = 1;

   // Now lookup the name via a binary search
   start = 0;
   limit = _AE_NUM_OF_CODESETS;
   index = limit/2;
   while ( ((result=strcoll(tmpstr, CCSID_MAPPING[index].NAME)) != 0) &&
            (start < limit-1) ) {
      if (result < 0)
         limit = index;
      else
          start = index;
      index = (start+limit)/2;
   }

   if (result != 0 && start >= limit-1)
      return -1;

   return CCSID_MAPPING[index].CCSID;
}
// **********************************************************************
// These are the character conversion services
// **********************************************************************

// "Open" the conversion. Allocate memory to hold the handle which
// unicode services requires. Call unicode services with a 0 length
// so that it can initialize it's handle.
// Note that unicode services must always be called in a loop since
// it could be busy reloading its tables.
uniconv_t uniconv_open(const char *destenc, const char *srcenc) {
   CUNBCPRM  defparms = {CUNBCPRM_DEFAULT};
   CUNBCPRM * tmpp;
   void * handle_area;
   char *cptr;
   int srcis390;
   int destis390;

   errno = 0;
   handle_area = malloc (sizeof(CUNBCPRM)+DDA_NEEDED+WORK_BUFFER_SIZE+8);
   tmpp = (CUNBCPRM *) handle_area;
   if (tmpp==NULL)
      return (uniconv_t)-1;

   // initialize the parm area with defaults, then start filling it
   // in with our values.
   memcpy(tmpp,&defparms,sizeof(defparms));
   tmpp->Src_Buf_Len= 0;
   // get the ccsids.
   if ( ((tmpp->Src_CCSID=getccsid(srcenc,&srcis390)) == -1) ||
        ((tmpp->Targ_CCSID=getccsid(destenc,&destis390)) == -1) ) {
      errno=ENOENT;
      free(handle_area);
      return (uniconv_t)-1;
   }
   tmpp->Wrk_Buf_Ptr=(void*) (((unsigned int) handle_area) + sizeof(CUNBCPRM)+DDA_NEEDED +8);
   tmpp->Wrk_Buf_Len=WORK_BUFFER_SIZE;
   // Doubleword align the DDA area
   tmpp->DDA_Buf_Ptr=(void*) ((unsigned int) handle_area + sizeof(CUNBCPRM) +7);
   tmpp->DDA_Buf_Ptr = (void*) ((unsigned int) tmpp->DDA_Buf_Ptr & ~7);
   tmpp->DDA_Buf_Len=DDA_NEEDED;
   // This flag tells the services to automatically refresh the handle if it
   // becomes invalid.
   tmpp->Flag1|=CUNBCPRM_REFRESH_AT_INV_HANDLE_START;
   tmpp->Flag1|=CUNBCPRM_SUB_ACTION_SUBSTITUTE;

   /* Determine which technique to use */
   if ( (srcis390) || (destis390) )
      // This technique causes it to swap LF and NL.
      memcpy(tmpp->Technique,"L       ",8);
   else
      memcpy(tmpp->Technique,"        ",8);

   // Retry if the services are busy reloading their tables.
   int retry_count = 0;
   while (retry_count < RETRY_THRESHOLD) {
      CUNLCNV(tmpp);
      if (tmpp->Return_Code == CUN_RC_OK)
         break;
      else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
                ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) )
         // Let it loop around again
         retry_count++;
      else
         break;
   }

   if (tmpp->Return_Code != CUN_RC_OK) {
      free(handle_area);
      errno=EINVAL;
      handle_area = (uniconv_t)-1;
   }

   return handle_area;
}

// All that is required for close is to free the handle buffer.
int uniconv_close(uniconv_t handle_area) {
   errno = 0;
   if (((int)handle_area) <= 0) {
      errno=EBADF;
      return -1;
   }
   free(handle_area);
   return 0;
}

// This does the real conversion.
// Note that unicode services must always be called in a loop since
// it could be busy reloading its tables.
int uniconv(uniconv_t cd, char **inbuf,  size_t *inbytesleft,
                          char **outbuf, size_t *outbytesleft) {
   CUNBCPRM * tmpp;
   size_t startinlen = *inbytesleft;
   size_t startoutlen = *outbytesleft;
   errno = 0;

   if (((int)cd) <= 0) {
      errno=EBADF;
      return -1;
   }

   // Fill in the parameter area with current values
   tmpp = (CUNBCPRM *) cd;
   tmpp->Src_Buf_Ptr = *inbuf;
   tmpp->Src_Buf_Len = *inbytesleft;
   tmpp->Targ_Buf_Ptr = *outbuf;
   tmpp->Targ_Buf_Len = *outbytesleft;

   // Retry if the services are busy reloading their tables.
   int retry_count = 0;
   while (retry_count < RETRY_THRESHOLD) {
      CUNLCNV(tmpp);
      if (tmpp->Return_Code == CUN_RC_OK)
         break;
      else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
                ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) )
         // Let it loop around again
         retry_count++;
      else
         break;
   }
   *inbuf        = (char *)tmpp->Src_Buf_Ptr;
   *inbytesleft  = tmpp->Src_Buf_Len;
   *outbuf       = (char *)tmpp->Targ_Buf_Ptr;
   *outbytesleft = tmpp->Targ_Buf_Len;

   if (tmpp->Return_Code != CUN_RC_OK) {
      if (tmpp->Reason_Code == CUN_RS_TRG_EXH)
         errno=E2BIG;
      else if (tmpp->Reason_Code == CUN_RS_MBC_INCOMPLETE)
         errno=EINVAL;
      else {
         errno=EBADF;
         return -1;
      }
   }
   return (startinlen-*inbytesleft);
}

// **********************************************************************
// These are the case conversion services.
// **********************************************************************

// This "opens" the case conversion. It allocates the parameter area
// then does a dummy call to unicode services so that it can set up
// the handle.
// Note that unicode services must always be called in a loop since
// it could be busy reloading its tables.
static inline uniconv_t uniconv_case_open(unsigned char direction) {
CUNBAPRM  defparms = {CUNBAPRM_DEFAULT};
CUNBAPRM * tmpp;
void * handle_area;

   errno = 0;
   handle_area = malloc (sizeof(CUNBAPRM)+CUNBAPRM_DDA_REQ);
   tmpp = (CUNBAPRM *) handle_area;
   if (tmpp==NULL)
      return (uniconv_t)-1;
   // initialize the parm area with defaults, then start filling it
   // in with our values.
   memcpy(tmpp,&defparms,sizeof(defparms));
   tmpp->DDA_Buf_Ptr=(void*) ((unsigned int) handle_area + sizeof(CUNBAPRM));
   tmpp->DDA_Buf_Len=CUNBAPRM_DDA_REQ;
   // This flag tells the services to automatically refresh the handle if it
   // becomes invalid.
   tmpp->Flag1|=CUNBAPRM_REFRESH_AT_INV_HANDLE_START;

   unichar_t inchar = 0x61;
   unichar_t outchar;
   tmpp->Src_Buf_Ptr=&inchar;
   tmpp->Targ_Buf_Ptr=&outchar;
   tmpp->Targ_Buf_Len=sizeof(unichar_t);
   tmpp->Src_Buf_Len=sizeof(unichar_t);
   tmpp->Conv_Type=direction;

   // Retry if the services are busy reloading their tables.
   int retry_count = 0;
   while (true) {
      CUNLASE ( tmpp );

      if (tmpp->Return_Code == CUN_RC_OK) {
         break;
      } else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
                  ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
                    (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
                    (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) ) {
         // Let it loop around again
         retry_count++;
         if (retry_count > RETRY_THRESHOLD) {
            errno = ENOSYS;
            break;
         }
      } else {
         errno = ENOSYS;
         break;
      }
   }
   if (tmpp->Return_Code != CUN_RC_OK) {
      free(handle_area);
      errno=EINVAL;
      handle_area = (uniconv_t)-1;
   }

   return handle_area;
}

// These are the actual external interfaces for the open function
uniconv_t uniconv_toupper_open() {
   return uniconv_case_open(CUNBAPRM_TO_UPPER);
}
uniconv_t uniconv_tolower_open() {
   return uniconv_case_open(CUNBAPRM_TO_LOWER);
}
// This closes the case conversion. All it does is free the handle buffer.
int _uniconv_case_close(uniconv_t handle_area) {
   errno = 0;
   if (((int)handle_area) <= 0) {
      errno=EBADF;
      return -1;
   }
   free(handle_area);
   return 0;
}

// This does the actual case conversion. The direction is already
// stored in the handle buffer.
// Note that unicode services must always be called in a loop since
// it could be busy reloading its tables.
unichar_t uniconv_caseit (uniconv_t cd,unichar_t inchar) {
   unichar_t outchar;
   CUNBAPRM * tmpp;

   errno = 0;
   if (((int)cd) <= 0) {
      errno=EBADF;
      return -1;
   }
   tmpp = (CUNBAPRM *) cd;
   tmpp->Src_Buf_Ptr=&inchar;
   tmpp->Targ_Buf_Ptr=&outchar;
   tmpp->Targ_Buf_Len=sizeof(unichar_t);
   tmpp->Src_Buf_Len=sizeof(unichar_t);

   // Retry if the services are busy reloading their tables.
   int retry_count = 0;
   while (true) {
      CUNLASE ( tmpp );

      if (tmpp->Return_Code == CUN_RC_OK) {
         break;
      }
      else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
                ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) ) {
         // Let it loop around again
         retry_count++;
         if (retry_count > RETRY_THRESHOLD) {
            errno = ENOSYS;
            break;
         }
      } else {
         errno = ENOSYS;
         break;
      }
   }
   return outchar;
}

XERCES_CPP_NAMESPACE_END