String Splitting Revisited Part II
In the last part there was a demo of how to split a string by a delimiter, but the delimiter could only be a single character string (i.e. “,”). Now, what if you came across data that was separated by a delimiter that looks to have more than one character (i.e. two dashes “–”)? I guess that means we have to change our function then.
Here is the new function, it is very similar to part one’s function.
char ** split(char *delimiter, char *str) {
char **elements = (char **) calloc(1, sizeof(char));
char **_elements = NULL;
/*
* x to track strpos offset in *str and e for element tick and current delimiter position, i for var
* start for the start of the substr and stop for the end
*/
int x = 0, e = 0, d = 0, next_d = -1, start, stop;
int str_size = (int) strlen(str), delimiter_size = (int) strlen(delimiter);
while ((d = strpos(delimiter, str, x)) != -1) {
if ((next_d = strpos(delimiter, str, x + 1)) == -1) next_d = str_size;
if (x == 0 && strcmp(substr(str, 0, delimiter_size), delimiter) != 0 && e == 0) {
/* lets not skip the first group */
next_d = d, d = 0, start = d, stop = next_d;
} else if (x == 0 && strcmp(substr(str, 0, delimiter_size), delimiter) == 0 && e == 0) {
/* this runs if the delimiter is the first character in the search */
next_d = delimiter_size, start = 0, stop = 0;
} else {
/* increment d so that we can start from the proper pos in substr */
x++, start = d + delimiter_size, stop = (next_d - (d + delimiter_size));
}
elements[e] = (char *) calloc(next_d - d, sizeof(char));
strcpy(elements[e], substr(str, start, stop));
_elements = realloc(elements, (e + 2) * sizeof(char *));
elements = _elements;
e++;
}
return elements;
}
char **elements = (char **) calloc(1, sizeof(char));
char **_elements = NULL;
/*
* x to track strpos offset in *str and e for element tick and current delimiter position, i for var
* start for the start of the substr and stop for the end
*/
int x = 0, e = 0, d = 0, next_d = -1, start, stop;
int str_size = (int) strlen(str), delimiter_size = (int) strlen(delimiter);
while ((d = strpos(delimiter, str, x)) != -1) {
if ((next_d = strpos(delimiter, str, x + 1)) == -1) next_d = str_size;
if (x == 0 && strcmp(substr(str, 0, delimiter_size), delimiter) != 0 && e == 0) {
/* lets not skip the first group */
next_d = d, d = 0, start = d, stop = next_d;
} else if (x == 0 && strcmp(substr(str, 0, delimiter_size), delimiter) == 0 && e == 0) {
/* this runs if the delimiter is the first character in the search */
next_d = delimiter_size, start = 0, stop = 0;
} else {
/* increment d so that we can start from the proper pos in substr */
x++, start = d + delimiter_size, stop = (next_d - (d + delimiter_size));
}
elements[e] = (char *) calloc(next_d - d, sizeof(char));
strcpy(elements[e], substr(str, start, stop));
_elements = realloc(elements, (e + 2) * sizeof(char *));
elements = _elements;
e++;
}
return elements;
}
And the example uses:
/********************************************************************
* Name: test.c
* Author: rashaud
* Date: 05/05/2010
* License: GNU GPL <http://www.gnu.org/licenses/>
* Description: <description>
********************************************************************/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <xstdlib.h>
int main(void) {
system("clear");
char *text = "This,is,split,by,a,comma,delimiter!";
char **s = split(",", text);
printf("%s %s\n", s[0], s[1]);
printf("\n");
char *text1 = "This--is--split--by--a--two-dash--delimiter!";
char **t = split("--", text1);
printf("%s %s\n", s[0], s[1]);
return 0;
}
* Name: test.c
* Author: rashaud
* Date: 05/05/2010
* License: GNU GPL <http://www.gnu.org/licenses/>
* Description: <description>
********************************************************************/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <xstdlib.h>
int main(void) {
system("clear");
char *text = "This,is,split,by,a,comma,delimiter!";
char **s = split(",", text);
printf("%s %s\n", s[0], s[1]);
printf("\n");
char *text1 = "This--is--split--by--a--two-dash--delimiter!";
char **t = split("--", text1);
printf("%s %s\n", s[0], s[1]);
return 0;
}
This has now been added (or re-added) to the eXtended C Standard Library. You would have to get it the most up-to-date code from the SVN repo
Enjoy,
Rashaud
Up next is… “Now we that we split the string, let us rejoin it!”
Categories: C/C++