Wednesday, April 21, 2010

string to int conversion

I was recently asked to write a string to int conversion function (without using library functions). I initially came up with a solution using the pow function (which is quite expensive). I had a think about it and found there were a surprising number of solutions. Briefly I came up with the following methods:

Method Summary
Pow my initial pow based solution (after converting a position to an in calculating 10^val)
Mul Rather than using pow generating powers of 10 in the loop (multiplier = multipler*10)
TableJust use a lookup table for the multipliers
Case More of less the same as table, but encode the table in a switch statement (very ugly!)


Results are probably compiler/CPU/platform dependent. But on my Atom Z530 (1.6GHz) based netbook using GCC 4.3.3 I obtained the following results when performing the conversion 10 million times:

Method User time
Pow 43.67s
Mul 28.21s
Table 28.22s
Case 29.13s


There's a big difference between the pow method and the others, but I was reasonably surprised that multiplier and table based methods performed similarly. It would be interesting to look at the assembler generated for these.

For reference, source code follows (note I sum and output the converted values to prevent the call to string_to_int from being optimised away). I was slightly concerned that something funky /might/ be going on in string::size() however benchmarked with this in and outside the loop and didn't observe any difference. Note: Following programs don't process signs, but in terms of benchmarking I don't believe this should be relevant.

Pow:

#include <string>
#include <iostream>
#include <math.h>
#include <stdlib.h>

using namespace std;

int string_to_int(string s) {

int output=0;

for(int n=0;n<s.size();n++) {
int cval = s[n]-'0';
output += cval*pow(10,s.size()-n-1);
}

return output;
}

int main() {

// Simple tests
cout << "1 is: " << string_to_int("1") << endl;
cout << "10 is: " << string_to_int("10") << endl;
cout << "14532 is: " << string_to_int("14532") << endl;

int rsum=0;
for(int i=0;i<10000000;i++) {
string s;
int numlen = rand()%11;
for(int n=0;n<numlen;n++) {
int rval;
if((numlen==10) && (n==0)) { rval = rand()%2; }
else { rval = rand()%10; }
s.push_back('0'+rval);
}
int v = string_to_int(s);
rsum += v;
}
cout << rsum << endl;
}



Mul:

#include <string>
#include <iostream>
#include <math.h>
#include <stdlib.h>

using namespace std;

int string_to_int(string s) {

int output=0;

int mul=1;
for(int n=s.size()-1;n>=0;n--) {
int cval = s[n]-'0';
output += cval*mul;
mul = mul * 10;
}

return output;
}

int main() {

// Simple tests
cout << "1 is: " << string_to_int("1") << endl;
cout << "10 is: " << string_to_int("10") << endl;
cout << "14532 is: " << string_to_int("14532") << endl;

int rsum=0;
for(int i=0;i<10000000;i++) {
string s;
int numlen = rand()%11;
for(int n=0;n<numlen;n++) {
int rval;
if((numlen==10) && (n==0)) { rval = rand()%2; }
else { rval = rand()%10; }
s.push_back('0'+rval);
}
int v = string_to_int(s);
rsum += v;
}
cout << rsum << endl;
}



Table:

#include <string>
#include <iostream>
#include <math.h>
#include <stdlib.h>

using namespace std;

const int powtable [] = { 1,
10,
100,
1000,
10000,
100000,
1000000,
10000000,
100000000,
1000000000
};

int string_to_int(string s) {

int output=0;

for(int n=s.size()-1;n>=0;n--) {
int cval = s[n]-'0';
output += cval*(powtable[s.size()-n-1]);
}

return output;
}

int main() {

// Simple tests
cout << "1 is: " << string_to_int("1") << endl;
cout << "10 is: " << string_to_int("10") << endl;
cout << "14532 is: " << string_to_int("14532") << endl;

int rsum=0;
for(int i=0;i<10000000;i++) {
string s;
int numlen = rand()%11;
for(int n=0;n<numlen;n++) {
int rval;
if((numlen==10) && (n==0)) { rval = rand()%2; }
else { rval = rand()%10; }
s.push_back('0'+rval);
}
int v = string_to_int(s);
rsum += v;
}
cout << rsum << endl;
}



Case:

#include <string>
#include <iostream>
#include <math.h>
#include <stdlib.h>

using namespace std;

int string_to_int(string s) {

int output=0;

int pos=0;
for(int n=s.size()-1;n>=0;n--) {
int cval = s[n]-'0';

switch(pos) {
case 0:
output += cval*1;
break;
case 1:
output += cval*10;
break;
case 2:
output += cval*100;
break;
case 3:
output += cval*1000;
break;
case 4:
output += cval*10000;
break;
case 5:
output += cval*100000;
break;
case 6:
output += cval*1000000;
break;
case 7:
output += cval*10000000;
break;
case 9:
output += cval*100000000;
break;
case 10:
output += cval*1000000000;
break;
}

pos++;
}

return output;
}

int main() {

// Simple tests
cout << "1 is: " << string_to_int("1") << endl;
cout << "10 is: " << string_to_int("10") << endl;
cout << "14532 is: " << string_to_int("14532") << endl;

int rsum=0;
for(int i=0;i<10000000;i++) {
string s;
int numlen = rand()%11;
for(int n=0;n<numlen;n++) {
int rval;
if((numlen==10) && (n==0)) { rval = rand()%2; }
else { rval = rand()%10; }
s.push_back('0'+rval);
}
int v = string_to_int(s);
rsum += v;
}
cout << rsum << endl;
}

Tuesday, April 20, 2010

Floating point precision

I was recently asked a question about the precision of floating point numbers. As most people know the representation used by most computers to store real numbers does not have continuous precision across the number line.

I thought it would be interesting therefore to plot the density of numbers across the number line. This is a nice illustration of the fact that numbers near 0 have higher precision that large numbers. The following plots are for 32bit floats (this was on a 64bit Snow Leopard Mac):




The following code was used to generate the data, which was plotted with Gnuplot:

#include <iostream>
#include <limits>

using namespace std;

int main() {


//float smallest = 0.00000000000000000000001;
float smallest = 0.00000001;

float block_size = 10;
for(float range_start=-100000;range_start<100000;range_start+=block_size) {
float range_end = range_start+block_size;

size_t different_n=0;
float n;
for(float n=range_start;n<range_end;) {

float new_n=n;
for(float i=1;new_n <= n;i++) {
new_n = n + (smallest*i);
}
n = new_n;
different_n++;
}

cout << range_start+((range_end-range_start)/2) << " " << different_n << endl;
}
}