#include "cstrings.h"
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <stdio.h>
#include <regex.h>
unsigned char b64_lookup[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,62,0,0,0,63,52,53,54,55,56,57,58,59,60,61,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,0,0,0,0,0,0,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
CStringCore::CStringCore()
{
m_nRefCount = 1;
m_cString = NULL;
m_nLength = 0;
m_nInternalLength = 0;
}
CStringCore::~CStringCore()
{
if ( m_cString != NULL ) {
free(m_cString);
}
}
// this function computes the length of the string in number of utf8 characters (so e.g. an umlaut is of length 1, not 2)
// this algorithm is from an article by Kragen Sitaker
// it's rather fast, still there's much faster versions out there for this
size_t CStringCore::utf8_length() const
{
if (m_nLength == 0) {
return 0;
}
else {
int i = 0;
int j = 0;
while (i < m_nLength) {
if ((m_cString[i] & 0xC0) != 0x80) {
j++;
}
i++;
}
return j;
}
}
char *CStringCore::string()
{
return m_cString;
}
void CStrings::prepareSize(size_t length)
{
fetchBuffer(length);
}
CStrings::CStrings()
{
// set the string value to an initial value
commonConstruct();
}
void CStrings::chomp()
{
trimRight('\n', 1);
}
void CStrings::commonConstruct()
{
m_pCore = NULL;
}
char CStrings::operator[](unsigned int nIndex) const
{
if (nIndex < length()) {
char cPlatform;
//cPlatform = ((const char *)(*this))[nIndex];
cPlatform = (m_pCore->string())[nIndex];
return cPlatform;
}
else {
return 0;
}
}
bool CStrings::operator==(const CStrings& strData) const
{
return operator==((const char *)strData);
}
void CStrings::operator=(const char *cData)
{
if (cData != NULL) {
int nLength = strlen(cData);
fetchBuffer(nLength+1);
strncpy(m_pCore->string(), cData, nLength);
(m_pCore->string())[nLength] = '\0';
setLength(nLength);
}
else {
unRef();
setLength(0);
if (m_pCore != NULL && m_pCore->string() != NULL) {
*(m_pCore->string()) = '\0';
}
}
}
CStrings::CStrings(const char *cData1, const char *cData2)
{
commonConstruct();
concatenate(cData1, cData2);
}
void CStrings::operator+=(char cData)
{
// char cBuffer[2];
// cBuffer[0] = cData;
// cBuffer[1] = '\0';
// *this += cBuffer;
fetchBuffer(length()+1);
m_pCore->string()[length()] = cData;
m_pCore->string()[length()+1] = '\0';
setLength(length()+1);
}
void CStrings::operator=(char cData)
{
char cBuffer[2];
cBuffer[0] = cData;
cBuffer[1] = '\0';
*this = cBuffer;
}
void CStrings::concatenate(const char *cData1, unsigned int nLength1, const char *cData2, unsigned int nLength2)
{
int nTotalLength = nLength1+nLength2;
fetchBuffer(nTotalLength);
if (cData1) {
strcpy(m_pCore->string(), cData1);
}
if (cData2) {
strcpy(m_pCore->string()+nLength1,cData2);
}
setLength(nTotalLength);
}
void CStrings::concatenate(const char *cData1, const char *cData2)
{
int nLength1 = 0;
int nLength2 = 0;
if (cData1) {
nLength1 = strlen(cData1);
}
if (cData2) {
nLength2 = strlen(cData2);
}
concatenate(cData1, nLength1, cData2, nLength2);
}
void CStrings::concatenate(const CStrings &cData1, const char *cData2)
{
int nLength1 = cData1.length();
int nLength2 = 0;
if (cData2) {
nLength2 = strlen(cData2);
}
concatenate((const char *)cData1, nLength1, cData2, nLength2);
}
void CStrings::concatenate(const CStrings &cData1, const CStrings &cData2)
{
concatenate((const char *)cData1, cData1.length(), (const char *)cData2, cData2.length());
}
void CStrings::concatenate(const char *cData1, const CStrings &cData2)
{
int nLength1 = 0;
int nLength2 = cData2.length();
if (cData1) {
nLength1 = strlen(cData1);
}
concatenate(cData1, nLength1, (const char *)cData2, nLength2);
}
CStrings::operator PCCHAR() const
{
if ( m_pCore != NULL && m_pCore->string() != NULL ) {
return m_pCore->string();
}
else {
return "";
}
}
void CStrings::unRef(bool bCopy)
{
if ( m_pCore == NULL ) {
m_pCore = new CStringCore;
}
else if ( m_pCore->refCount() > 1 ) {
//fprintf(stderr, "*** Refcount is %d\n", m_pCore->refCount()); //BUG
CStringCore *pOldCore = m_pCore;
m_pCore->decreaseRefCount();
m_pCore = new CStringCore;
if ( bCopy ) {
m_pCore->allocMemory(pOldCore->internalLength());
strcpy(m_pCore->string(), pOldCore->string());
m_pCore->setLength(pOldCore->length());
}
}
}
void CStrings::trimLeft(char c, int nNumber)
{
unRef(true);
char *pSource, *pDest;
if (m_pCore != NULL && m_pCore->string() != NULL && strlen(m_pCore->string()) > 0 ) {
pDest = m_pCore->string();
pSource = m_pCore->string();
while(*pSource != '\0' && *pSource == c ) {
pSource++;
if (nNumber > 0) {
nNumber--;
if (nNumber == 0) {
break;
}
}
}
if ( *pSource == '\0' ) {
*pDest = '\0';
}
else {
do {
*pDest++ = *pSource++;
} while(*pSource != '\0');
*pDest = '\0';
}
if (m_pCore->string()) {
setLength(strlen(m_pCore->string()));
}
else {
setLength(0);
}
}
}
void CStrings::trimRight(char c, int nNumber)
{
unRef(true);
if (m_pCore != NULL) {
if ( m_pCore->string() != NULL && strlen(m_pCore->string()) > 0 ) {
char *pZeichen = m_pCore->string() + strlen(m_pCore->string())-1;
do {
if ( *pZeichen != c ) {
break;
}
else {
*pZeichen-- = '\0';
if (nNumber > 0) {
nNumber--;
if (nNumber == 0) {
break;
}
}
}
} while(pZeichen > m_pCore->string());
}
if (m_pCore->string()) {
setLength(strlen(m_pCore->string()));
}
else {
setLength(0);
}
}
}
void CStrings::operator=(const CStrings &string)
{
if ( m_pCore != NULL ) {
m_pCore->decreaseRefCount();
if ( m_pCore->refCount() == 0 ) {
delete m_pCore;
}
}
m_pCore = string.m_pCore;
if ( m_pCore != NULL ) {
m_pCore->increaseRefCount();
}
}
void CStrings::empty()
{
*this = (char *)NULL;
}
void CStrings::operator+=(const char *cData)
{
if (cData) {
int nLength = strlen(cData);
//fetchBuffer(length()+strlen(cData));
fetchBuffer(length()+nLength);
//strcat(m_pCore->string(),cData);
strcpy(m_pCore->string()+length(), cData);
//setLength(strlen(m_pCore->string()));
setLength(length()+nLength);
}
}
void CStrings::operator+=(const CStrings &cData)
{
fetchBuffer(length()+cData.length());
//strcat(m_pCore->string(),(const char *)cData);
strcpy(m_pCore->string()+length(),(const char *)cData);
setLength(length()+cData.length());
}
void CStringCore::allocMemory(size_t nLength)
{
//fprintf(stderr, "Allocating memory of size %d\n", nLength);
nLength++;
if (internalLength() < nLength) {
// STH: no, we don't do that anymore! *** Add about 12.5 % characters
//STH int nPlus = nLength;
//STH nPlus = nPlus >> 3; // divide by 8
//STH nLength += nPlus;
bool bInit = false;
bInit = (m_cString == NULL);
m_cString = (char *)realloc(m_cString,nLength);
if (bInit) {
*m_cString = '\0';
}
m_nInternalLength = nLength;
}
}
void CStrings::fetchBuffer(size_t nLength)
{
unRef(true);
m_pCore->allocMemory(nLength);
}
void CStrings::setLength(size_t nLength)
{
m_pCore->setLength(nLength);
}
CStrings::~CStrings()
{
if ( m_pCore != NULL ) {
m_pCore->decreaseRefCount();
if ( m_pCore->refCount() == 0 ) {
delete m_pCore;
m_pCore = NULL;
}
}
}
CStrings::CStrings(const CStrings &S)
{
commonConstruct();
if ( S.m_pCore ) {
*this = S;
}
}
bool CStrings::oneOf(const char *strString, ...) const
{
va_list argptr;
va_start(argptr,strString);
bool bFound = false;
const char *strValue = strString;
while (!bFound && strValue) {
if (*this == strValue) {
bFound = true;
}
strValue = (const char *)va_arg(argptr, const char *);
}
va_end(argptr);
return bFound;
}
void CStrings::format(const char *cFormat, ...)
{
char *cBuffer;
int nSize = 1024;
cBuffer = (char *)malloc(nSize);
va_list argptr;
va_start(argptr,cFormat);
#ifndef WIN32
int nNewsize = vsnprintf(cBuffer,nSize,cFormat,argptr);
#else
int nNewsize = _vsnprintf(cBuffer,nSize,cFormat,argptr);
#endif
if ( nNewsize >= nSize ) {
// re-initialize the argument pointer
va_end(argptr);
va_start(argptr,cFormat);
nSize = nNewsize+10;
cBuffer = (char *)realloc(cBuffer,nSize);
#ifndef WIN32
nNewsize = vsnprintf(cBuffer,nSize,cFormat,argptr);
#else
nNewsize = _vsnprintf(cBuffer,nSize,cFormat,argptr);
#endif
}
*this = cBuffer;
free(cBuffer);
va_end(argptr);
}
CCatString CStrings::operator+(const char *cData) const
{
CCatString NeuString;
//NeuString.concatenate((const char *)*this, cData);
NeuString.concatenate(*this, cData);
return NeuString;
}
CCatString &CCatString::operator+(const char *cData)
{
*this += cData;
return *this;
}
void CCatString::operator=(const char *cData)
{
CStrings::operator=(cData);
}
void CCatString::operator+=(const char *cData)
{
CStrings::operator+=(cData);
}
void CCatString::operator+=(const CStrings &cData)
{
CStrings::operator+=(cData);
}
CCatString::CCatString()
{
}
CCatString operator+(const char *cData1, const CStrings &strData2)
{
CCatString NeuString;
//NeuString.concatenate(cData1, (const char *)strData2);
NeuString.concatenate(cData1, strData2);
return NeuString;
}
CCatString &CCatString::operator+(const CStrings &strData)
{
*this += strData;
return *this;
}
CCatString CStrings::operator+(const CStrings &strData) const
{
CCatString NeuString;
NeuString.concatenate(*this, strData);
return NeuString;
//return operator+((const char *)strData);
}
bool CStrings::operator==(const char *strData) const
{
if (strData == NULL) {
if (length() == 0) {
return 1;
}
else {
return 0;
}
}
if (m_pCore != NULL) {
const char *strString = m_pCore->string();
if (strString != NULL) {
// optimized: compare the first characters only at first
// if they differy, everything is clear already
// otherwise, proceed and check thoroughly
if (*strString != *strData) {
return false;
}
int nResult = strcmp(strString,strData);
return (nResult == 0);
}
else {
return (*strData == '\0');
}
}
else {
bool bGleich = (*strData == '\0');
return bGleich;
}
}
bool operator==(const char *cData1, const CStrings &strData2)
{
return (strData2 == cData1);
}
CStrings::CStrings(const char *cString)
{
commonConstruct();
*this = cString;
}
CStrings::CStrings(const char *cString, size_t nLength)
{
commonConstruct();
stringCopy(cString, nLength);
}
void CStrings::stringCopy(const char *cString, size_t nLength)
{
fetchBuffer(nLength+1);
if (strlen(cString) < nLength) {
nLength = strlen(cString);
}
if (cString != NULL) {
strncpy(m_pCore->string(), cString, nLength);
(m_pCore->string())[nLength] = '\0';
setLength(strlen(m_pCore->string()));
}
else {
*this = "";
}
}
CStrings CStrings::left(size_t nPos) const
{
CStrings NeuString;
if (m_pCore != NULL) {
NeuString.stringCopy(m_pCore->string(), nPos);
}
return NeuString;
}
CStrings CStrings::right(size_t nPos) const
{
CStrings NeuString;
if (m_pCore != NULL) {
char *cPos = m_pCore->string()+length()-nPos;
if ( cPos >= m_pCore->string() ) {
NeuString = cPos;
}
else {
NeuString = *this;
}
}
return NeuString;
}
CStrings CStrings::mid(size_t nPosStart, size_t nPosEnd) const
{
CStrings NeuString;
if (m_pCore != NULL) {
if (nPosStart >= length() || nPosStart < 0) {
NeuString = "";
}
else {
if (nPosEnd >= nPosStart) {
if ( nPosEnd >= 0 ) {
NeuString.stringCopy(m_pCore->string()+nPosStart, nPosEnd-nPosStart+1);
}
else {
NeuString = (m_pCore->string()+nPosStart);
}
}
}
}
return NeuString;
}
size_t CStrings::findReverse(char cData) const
{
if (m_pCore != NULL) {
char *cPointer = m_pCore->string()+length()-1;
for (int nIndex = length()-1; nIndex >= 0; nIndex--) {
if (*cPointer == cData) {
return nIndex;
}
else {
cPointer--;
}
}
}
return -1;
}
size_t CStrings::find(char cData) const
{
if (m_pCore != NULL) {
char *cPointer = m_pCore->string();
/*int nLength = length();
for (int nIndex = 0; nIndex < nLength; nIndex++) {
if (*cPointer == cData) {
return nIndex;
}
else {
cPointer++;
}
}*/
char *found = index(cPointer, cData);
if (found) {
return found-cPointer;
}
}
return -1;
}
CStrings CStrings::substr(int nStart) const
{
return substr(nStart, length()-nStart);
}
CStrings CStrings::substr(int nStart, size_t nLength) const
{
if (nStart < 0) {
nStart = 0;
}
if (nStart >= length() || nLength < 1) {
CStrings string;
return string;
}
if (nLength > length()-nStart) {
nLength = length()-nStart;
}
char buffer[nLength+1];
strncpy(buffer, ((const char *)*this)+nStart, nLength);
buffer[nLength] = '\0';
CStrings string(buffer);
return string;
}
void CStrings::replace(const char *strOldString, const char *strNewString)
{
unRef(true);
char *strPosition = (char *)strstr((const char *)*this, strOldString);
int nLength = strlen(strOldString);
int nNewLength = strlen(strNewString);
while (strPosition != NULL) {
int nLeft = strPosition - (const char *)*this;
*this = left(nLeft)+strNewString+(strPosition+nLength);
strPosition = (char *)strstr(((const char *)*this)+nLeft+nNewLength, strOldString);
}
}
void CStrings::set(const char *pBuffer, size_t nLength)
{
char buffer[nLength+1];
buffer[nLength] = 0;
strncpy(buffer, pBuffer, nLength);
*this = buffer;
}
void CStrings::setAt(int nIndex, char c)
{
if (nIndex >= 0 && nIndex < length()) {
(m_pCore->m_cString)[nIndex] = c;
}
}
bool CStrings::matchRegex(const char *strRegex) const
{
int cflags = REG_EXTENDED|REG_NOSUB;
int maxsubmatches = 1000;
regex_t preg;
regmatch_t pmatch[maxsubmatches];
int eflags=0;
if (strRegex == NULL || length() == 0) {
return false;
}
if (!regcomp(&preg, strRegex, cflags)) {
if (regexec(&preg,*this,1000,pmatch, eflags)) {
regfree(&preg);
return false;
}
}
else {
// Actually, something was wrong with the regex. We won't handle this for now though... (maybe later on by means of exceptions?)
return false;
}
regfree(&preg);
return true;
}
bool CStrings::matchRegex(const char *strRegex, CSimpleObjectList<const CStrings> &list, bool bSubstrings) const
{
int cflags;
if (bSubstrings) {
cflags = REG_EXTENDED;
}
else {
cflags = REG_EXTENDED|REG_NOSUB;
}
int maxsubmatches = 30;
regex_t preg;
regmatch_t pmatch[maxsubmatches];
int eflags=0;
if (strRegex == NULL || length() == 0) {
return false;
}
if (!regcomp(&preg, strRegex, cflags)) {
if (!regexec(&preg, (const char *)*this, maxsubmatches-1, pmatch, eflags)) {
for (int i = 0; i < (bSubstrings ? maxsubmatches : 1); i++) { // index 0 stands for the whole matched expression
if (pmatch[i].rm_so >= 0) {
CStrings strMatch = mid(pmatch[i].rm_so, pmatch[i].rm_eo-1);
list += strMatch;
}
else {
i = maxsubmatches;
}
}
}
else {
regfree(&preg);
return false;
}
}
else {
// Actually, something was wrong with the regex. We won't handle this for now though... (maybe later on by means of exceptions?)
return false;
}
regfree(&preg);
return true;
}
CStrings CStrings::encodeBase64() const
{
const char *b64_charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
unsigned char block_3[3];
unsigned char block_4[4];
const char *str = (const char *)*this;
int i = 0;
int j = 0;
int size = length();
CStrings ret;
ret.prepareSize(size*2);
while(size--) {
block_3[i++] = *str++;
if(i == 3) {
block_4[0] = (block_3[0] & 0xfc) >> 2;
block_4[1] = ((block_3[0] & 0x03) << 4) + ((block_3[1] & 0xf0) >> 4);
block_4[2] = ((block_3[1] & 0x0f) << 2) + ((block_3[2] & 0xc0) >> 6);
block_4[3] = block_3[2] & 0x3f;
for(i = 0; i < 4 ; i++) {
ret += b64_charset[block_4[i]];
}
i = 0;
}
}
if(i) {
for(j = i; j < 3; j++) {
block_3[j] = '\0';
}
block_4[0] = (block_3[0] & 0xfc) >> 2;
block_4[1] = ((block_3[0] & 0x03) << 4) + ((block_3[1] & 0xf0) >> 4);
block_4[2] = ((block_3[1] & 0x0f) << 2) + ((block_3[2] & 0xc0) >> 6);
block_4[3] = block_3[2] & 0x3f;
for(j = 0; (j < i + 1); j++) {
ret += b64_charset[block_4[j]];
}
while(i++ < 3) {
ret += '=';
}
}
return ret;
}
CStrings CStrings::decodeBase64() const
{
//CStrings b64_charset("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
CStrings ret;
const char * str = (const char *)*this;
int in_len = length();
int i = 0;
int j = 0;
int in_ = 0;
unsigned char block_4[4];
unsigned char block_3[3];
char buffer[in_len];
bzero(buffer, in_len);
int currentindex = 0;
while (in_len-- && (str[in_] != '=')) {
block_4[i++] = str[in_];
in_++;
if (i == 4) {
for ( i = 0; i < 4; i++ ){
//block_4[i] = b64_charset.find(block_4[i]);
block_4[i] = b64_lookup[block_4[i]];
}
block_3[0] = (block_4[0] << 2) + ((block_4[1] & 0x30) >> 4);
block_3[1] = ((block_4[1] & 0xf) << 4) + ((block_4[2] & 0x3c) >> 2);
block_3[2] = ((block_4[2] & 0x3) << 6) + block_4[3];
for (i = 0; i < 3; i++ ) {
//ret += block_3[i];
buffer[currentindex++] = block_3[i];
}
i = 0;
}
}
if(i) {
for (j = i; j < 4; j++) {
block_4[j] = 0;
}
for (j = 0; j < 4; j++) {
//block_4[j] = b64_charset.find(block_4[j]);
block_4[j] = b64_lookup[block_4[j]];
}
block_3[0] = (block_4[0] << 2) + ((block_4[1] & 0x30) >> 4);
block_3[1] = ((block_4[1] & 0xf) << 4) + ((block_4[2] & 0x3c) >> 2);
block_3[2] = ((block_4[2] & 0x3) << 6) + block_4[3];
for (j = 0; j < i - 1; j++) {
//ret += block_3[j];
buffer[currentindex++] = block_3[j];
}
}
ret = buffer;
return ret;
}
// check whether this string is valid utf8; if so, return 0.
// else return the position in the string where a non-utf8 character is found
//int CStrings::is_utf8(const char *instr, size_t len) const
int CStringCore::check_utf8() const
{
size_t len = m_nLength;
const char *instr = m_cString;
size_t i = 0;
size_t continuation_bytes = 0;
unsigned char *str = (unsigned char *)instr;
while (i < len) {
if (str[i] <= 0x7F) {
continuation_bytes = 0;
}
else if (str[i] >= 0xC0 /*11000000*/ && str[i] <= 0xDF /*11011111*/) {
continuation_bytes = 1;
}
else if (str[i] >= 0xE0 /*11100000*/ && str[i] <= 0xEF /*11101111*/) {
continuation_bytes = 2;
}
else if (str[i] >= 0xF0 /*11110000*/ && str[i] <= 0xF4 /* Cause of RFC 3629 */) {
continuation_bytes = 3;
}
else {
return i + 1;
}
i++;
while (i < len
&& continuation_bytes > 0
&& str[i] >= 0x80
&& str[i] <= 0xBF) {
i += 1;
continuation_bytes -= 1;
}
if (continuation_bytes != 0) {
return i + 1;
}
}
return 0;
}