Wednesday, April 21, 2010

string to int conversion

I was recently asked to write a string to int conversion function (without using library functions). I initially came up with a solution using the pow function (which is quite expensive). I had a think about it and found there were a surprising number of solutions. Briefly I came up with the following methods:

Method Summary
Pow my initial pow based solution (after converting a position to an in calculating 10^val)
Mul Rather than using pow generating powers of 10 in the loop (multiplier = multipler*10)
TableJust use a lookup table for the multipliers
Case More of less the same as table, but encode the table in a switch statement (very ugly!)


Results are probably compiler/CPU/platform dependent. But on my Atom Z530 (1.6GHz) based netbook using GCC 4.3.3 I obtained the following results when performing the conversion 10 million times:

Method User time
Pow 43.67s
Mul 28.21s
Table 28.22s
Case 29.13s


There's a big difference between the pow method and the others, but I was reasonably surprised that multiplier and table based methods performed similarly. It would be interesting to look at the assembler generated for these.

For reference, source code follows (note I sum and output the converted values to prevent the call to string_to_int from being optimised away). I was slightly concerned that something funky /might/ be going on in string::size() however benchmarked with this in and outside the loop and didn't observe any difference. Note: Following programs don't process signs, but in terms of benchmarking I don't believe this should be relevant.

Pow:

#include <string>
#include <iostream>
#include <math.h>
#include <stdlib.h>

using namespace std;

int string_to_int(string s) {

int output=0;

for(int n=0;n<s.size();n++) {
int cval = s[n]-'0';
output += cval*pow(10,s.size()-n-1);
}

return output;
}

int main() {

// Simple tests
cout << "1 is: " << string_to_int("1") << endl;
cout << "10 is: " << string_to_int("10") << endl;
cout << "14532 is: " << string_to_int("14532") << endl;

int rsum=0;
for(int i=0;i<10000000;i++) {
string s;
int numlen = rand()%11;
for(int n=0;n<numlen;n++) {
int rval;
if((numlen==10) && (n==0)) { rval = rand()%2; }
else { rval = rand()%10; }
s.push_back('0'+rval);
}
int v = string_to_int(s);
rsum += v;
}
cout << rsum << endl;
}



Mul:

#include <string>
#include <iostream>
#include <math.h>
#include <stdlib.h>

using namespace std;

int string_to_int(string s) {

int output=0;

int mul=1;
for(int n=s.size()-1;n>=0;n--) {
int cval = s[n]-'0';
output += cval*mul;
mul = mul * 10;
}

return output;
}

int main() {

// Simple tests
cout << "1 is: " << string_to_int("1") << endl;
cout << "10 is: " << string_to_int("10") << endl;
cout << "14532 is: " << string_to_int("14532") << endl;

int rsum=0;
for(int i=0;i<10000000;i++) {
string s;
int numlen = rand()%11;
for(int n=0;n<numlen;n++) {
int rval;
if((numlen==10) && (n==0)) { rval = rand()%2; }
else { rval = rand()%10; }
s.push_back('0'+rval);
}
int v = string_to_int(s);
rsum += v;
}
cout << rsum << endl;
}



Table:

#include <string>
#include <iostream>
#include <math.h>
#include <stdlib.h>

using namespace std;

const int powtable [] = { 1,
10,
100,
1000,
10000,
100000,
1000000,
10000000,
100000000,
1000000000
};

int string_to_int(string s) {

int output=0;

for(int n=s.size()-1;n>=0;n--) {
int cval = s[n]-'0';
output += cval*(powtable[s.size()-n-1]);
}

return output;
}

int main() {

// Simple tests
cout << "1 is: " << string_to_int("1") << endl;
cout << "10 is: " << string_to_int("10") << endl;
cout << "14532 is: " << string_to_int("14532") << endl;

int rsum=0;
for(int i=0;i<10000000;i++) {
string s;
int numlen = rand()%11;
for(int n=0;n<numlen;n++) {
int rval;
if((numlen==10) && (n==0)) { rval = rand()%2; }
else { rval = rand()%10; }
s.push_back('0'+rval);
}
int v = string_to_int(s);
rsum += v;
}
cout << rsum << endl;
}



Case:

#include <string>
#include <iostream>
#include <math.h>
#include <stdlib.h>

using namespace std;

int string_to_int(string s) {

int output=0;

int pos=0;
for(int n=s.size()-1;n>=0;n--) {
int cval = s[n]-'0';

switch(pos) {
case 0:
output += cval*1;
break;
case 1:
output += cval*10;
break;
case 2:
output += cval*100;
break;
case 3:
output += cval*1000;
break;
case 4:
output += cval*10000;
break;
case 5:
output += cval*100000;
break;
case 6:
output += cval*1000000;
break;
case 7:
output += cval*10000000;
break;
case 9:
output += cval*100000000;
break;
case 10:
output += cval*1000000000;
break;
}

pos++;
}

return output;
}

int main() {

// Simple tests
cout << "1 is: " << string_to_int("1") << endl;
cout << "10 is: " << string_to_int("10") << endl;
cout << "14532 is: " << string_to_int("14532") << endl;

int rsum=0;
for(int i=0;i<10000000;i++) {
string s;
int numlen = rand()%11;
for(int n=0;n<numlen;n++) {
int rval;
if((numlen==10) && (n==0)) { rval = rand()%2; }
else { rval = rand()%10; }
s.push_back('0'+rval);
}
int v = string_to_int(s);
rsum += v;
}
cout << rsum << endl;
}

1 comment:

Niall Haslam said...

Haven't we had this one before?