/* * Copyright 2002-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * $Id: uniconv.cpp 176026 2004-09-08 13:57:07Z peiyongz $ */ #include #include #include #include #include #include #include "ccsid.h" #include "uniconv.h" XERCES_CPP_NAMESPACE_BEGIN #define WORK_BUFFER_SIZE 16*1024 #define DDA_NEEDED CUNBCPRM_DDA_REQ #define RETRY_THRESHOLD 10000 // This is utility routine which strips '-', '_' and spaces from the name and // also upper cases the name. It also returns the length of the string. static int stripNameCopy(const char *s,char *d,int max) { int si=0; int di=0; while ( (s[si] != '\0') && (di < max) ) { if ( (s[si] == ' ') || (s[si] == '_') || (s[si] == '-') ) si++; else { d[di] = toupper(s[si]); si++;di++; } } d[di] = 0; if (s[si] != '\0') return -1; return si; } // This takes a name and does a lookup into the ccsid table (from ccsid.h) // to find the corresponding ccsid. It also checks if the string ends in s390 // and returns that information to the caller. // The lookup into the table is done via a binary search since we know that the // table was nicely sorted for us. static int getccsid(const char *s,int * is390) { char tmpstr[_AE_MAX_CODESET_NAME_LENGTH]; int start; int limit; int index; int result; int thelen; // Clean up the name.... if (s == NULL) return -1; if ((thelen = stripNameCopy(s,tmpstr,_AE_MAX_CODESET_NAME_LENGTH-1)) == -1) return -1; // Check for the S390 string in the name *is390 = 0; if ( (strstr((char *)tmpstr, "S390")) != NULL ) *is390 = 1; // Now lookup the name via a binary search start = 0; limit = _AE_NUM_OF_CODESETS; index = limit/2; while ( ((result=strcoll(tmpstr, CCSID_MAPPING[index].NAME)) != 0) && (start < limit-1) ) { if (result < 0) limit = index; else start = index; index = (start+limit)/2; } if (result != 0 && start >= limit-1) return -1; return CCSID_MAPPING[index].CCSID; } // ********************************************************************** // These are the character conversion services // ********************************************************************** // "Open" the conversion. Allocate memory to hold the handle which // unicode services requires. Call unicode services with a 0 length // so that it can initialize it's handle. // Note that unicode services must always be called in a loop since // it could be busy reloading its tables. uniconv_t uniconv_open(const char *destenc, const char *srcenc) { CUNBCPRM defparms = {CUNBCPRM_DEFAULT}; CUNBCPRM * tmpp; void * handle_area; char *cptr; int srcis390; int destis390; errno = 0; handle_area = malloc (sizeof(CUNBCPRM)+DDA_NEEDED+WORK_BUFFER_SIZE+8); tmpp = (CUNBCPRM *) handle_area; if (tmpp==NULL) return (uniconv_t)-1; // initialize the parm area with defaults, then start filling it // in with our values. memcpy(tmpp,&defparms,sizeof(defparms)); tmpp->Src_Buf_Len= 0; // get the ccsids. if ( ((tmpp->Src_CCSID=getccsid(srcenc,&srcis390)) == -1) || ((tmpp->Targ_CCSID=getccsid(destenc,&destis390)) == -1) ) { errno=ENOENT; free(handle_area); return (uniconv_t)-1; } tmpp->Wrk_Buf_Ptr=(void*) (((unsigned int) handle_area) + sizeof(CUNBCPRM)+DDA_NEEDED +8); tmpp->Wrk_Buf_Len=WORK_BUFFER_SIZE; // Doubleword align the DDA area tmpp->DDA_Buf_Ptr=(void*) ((unsigned int) handle_area + sizeof(CUNBCPRM) +7); tmpp->DDA_Buf_Ptr = (void*) ((unsigned int) tmpp->DDA_Buf_Ptr & ~7); tmpp->DDA_Buf_Len=DDA_NEEDED; // This flag tells the services to automatically refresh the handle if it // becomes invalid. tmpp->Flag1|=CUNBCPRM_REFRESH_AT_INV_HANDLE_START; tmpp->Flag1|=CUNBCPRM_SUB_ACTION_SUBSTITUTE; /* Determine which technique to use */ if ( (srcis390) || (destis390) ) // This technique causes it to swap LF and NL. memcpy(tmpp->Technique,"L ",8); else memcpy(tmpp->Technique," ",8); // Retry if the services are busy reloading their tables. int retry_count = 0; while (retry_count < RETRY_THRESHOLD) { CUNLCNV(tmpp); if (tmpp->Return_Code == CUN_RC_OK) break; else if ( (tmpp->Return_Code == CUN_RC_WARN) && ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) || (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) || (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) ) // Let it loop around again retry_count++; else break; } if (tmpp->Return_Code != CUN_RC_OK) { free(handle_area); errno=EINVAL; handle_area = (uniconv_t)-1; } return handle_area; } // All that is required for close is to free the handle buffer. int uniconv_close(uniconv_t handle_area) { errno = 0; if (((int)handle_area) <= 0) { errno=EBADF; return -1; } free(handle_area); return 0; } // This does the real conversion. // Note that unicode services must always be called in a loop since // it could be busy reloading its tables. int uniconv(uniconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { CUNBCPRM * tmpp; size_t startinlen = *inbytesleft; size_t startoutlen = *outbytesleft; errno = 0; if (((int)cd) <= 0) { errno=EBADF; return -1; } // Fill in the parameter area with current values tmpp = (CUNBCPRM *) cd; tmpp->Src_Buf_Ptr = *inbuf; tmpp->Src_Buf_Len = *inbytesleft; tmpp->Targ_Buf_Ptr = *outbuf; tmpp->Targ_Buf_Len = *outbytesleft; // Retry if the services are busy reloading their tables. int retry_count = 0; while (retry_count < RETRY_THRESHOLD) { CUNLCNV(tmpp); if (tmpp->Return_Code == CUN_RC_OK) break; else if ( (tmpp->Return_Code == CUN_RC_WARN) && ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) || (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) || (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) ) // Let it loop around again retry_count++; else break; } *inbuf = (char *)tmpp->Src_Buf_Ptr; *inbytesleft = tmpp->Src_Buf_Len; *outbuf = (char *)tmpp->Targ_Buf_Ptr; *outbytesleft = tmpp->Targ_Buf_Len; if (tmpp->Return_Code != CUN_RC_OK) { if (tmpp->Reason_Code == CUN_RS_TRG_EXH) errno=E2BIG; else if (tmpp->Reason_Code == CUN_RS_MBC_INCOMPLETE) errno=EINVAL; else { errno=EBADF; return -1; } } return (startinlen-*inbytesleft); } // ********************************************************************** // These are the case conversion services. // ********************************************************************** // This "opens" the case conversion. It allocates the parameter area // then does a dummy call to unicode services so that it can set up // the handle. // Note that unicode services must always be called in a loop since // it could be busy reloading its tables. static inline uniconv_t uniconv_case_open(unsigned char direction) { CUNBAPRM defparms = {CUNBAPRM_DEFAULT}; CUNBAPRM * tmpp; void * handle_area; errno = 0; handle_area = malloc (sizeof(CUNBAPRM)+CUNBAPRM_DDA_REQ); tmpp = (CUNBAPRM *) handle_area; if (tmpp==NULL) return (uniconv_t)-1; // initialize the parm area with defaults, then start filling it // in with our values. memcpy(tmpp,&defparms,sizeof(defparms)); tmpp->DDA_Buf_Ptr=(void*) ((unsigned int) handle_area + sizeof(CUNBAPRM)); tmpp->DDA_Buf_Len=CUNBAPRM_DDA_REQ; // This flag tells the services to automatically refresh the handle if it // becomes invalid. tmpp->Flag1|=CUNBAPRM_REFRESH_AT_INV_HANDLE_START; unichar_t inchar = 0x61; unichar_t outchar; tmpp->Src_Buf_Ptr=&inchar; tmpp->Targ_Buf_Ptr=&outchar; tmpp->Targ_Buf_Len=sizeof(unichar_t); tmpp->Src_Buf_Len=sizeof(unichar_t); tmpp->Conv_Type=direction; // Retry if the services are busy reloading their tables. int retry_count = 0; while (true) { CUNLASE ( tmpp ); if (tmpp->Return_Code == CUN_RC_OK) { break; } else if ( (tmpp->Return_Code == CUN_RC_WARN) && ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) || (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) || (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) ) { // Let it loop around again retry_count++; if (retry_count > RETRY_THRESHOLD) { errno = ENOSYS; break; } } else { errno = ENOSYS; break; } } if (tmpp->Return_Code != CUN_RC_OK) { free(handle_area); errno=EINVAL; handle_area = (uniconv_t)-1; } return handle_area; } // These are the actual external interfaces for the open function uniconv_t uniconv_toupper_open() { return uniconv_case_open(CUNBAPRM_TO_UPPER); } uniconv_t uniconv_tolower_open() { return uniconv_case_open(CUNBAPRM_TO_LOWER); } // This closes the case conversion. All it does is free the handle buffer. int _uniconv_case_close(uniconv_t handle_area) { errno = 0; if (((int)handle_area) <= 0) { errno=EBADF; return -1; } free(handle_area); return 0; } // This does the actual case conversion. The direction is already // stored in the handle buffer. // Note that unicode services must always be called in a loop since // it could be busy reloading its tables. unichar_t uniconv_caseit (uniconv_t cd,unichar_t inchar) { unichar_t outchar; CUNBAPRM * tmpp; errno = 0; if (((int)cd) <= 0) { errno=EBADF; return -1; } tmpp = (CUNBAPRM *) cd; tmpp->Src_Buf_Ptr=&inchar; tmpp->Targ_Buf_Ptr=&outchar; tmpp->Targ_Buf_Len=sizeof(unichar_t); tmpp->Src_Buf_Len=sizeof(unichar_t); // Retry if the services are busy reloading their tables. int retry_count = 0; while (true) { CUNLASE ( tmpp ); if (tmpp->Return_Code == CUN_RC_OK) { break; } else if ( (tmpp->Return_Code == CUN_RC_WARN) && ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) || (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) || (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) ) { // Let it loop around again retry_count++; if (retry_count > RETRY_THRESHOLD) { errno = ENOSYS; break; } } else { errno = ENOSYS; break; } } return outchar; } XERCES_CPP_NAMESPACE_END