/*
** $Id: regexp.c,v 1.4 2008/12/18 04:18:43 dredd Exp $
**
** $Source: /cvsroot/swlpc/swlpc/modules/regexp.c,v $
** $Revision: 1.4 $
** $Date: 2008/12/18 04:18:43 $
** $State: Exp $
**
** Author: Geoff Wong, 1999
**
** See the file "Copying" distributed with this file.
**
** An interface to the GNU regular expression library.
*/
#include <stdlib.h>
#include <regex.h>
#include "stack.h"
/*
* Name: index
* Purpose: return the first index of a pat in string (or array?) if
* there is a match, else -1
* Ignores the first 'ignore' number of matches
*/
#define MAX_BUF 1024
Val * lpc_regerror(int code, regex_t * pat)
{
char buf[MAX_BUF];
regerror(code, pat, buf, MAX_BUF);
// CHECK:
regfree(pat);
efun_error("%d: %s", code, buf);
return make_number(-1);
}
Val * lpc_reg_index(Val * str, Val * match, Val * ignore)
{
regex_t rpat;
regmatch_t * pmatch;
int nmatch;
int err;
err = regcomp(&rpat, match->u.string->str, REG_EXTENDED);
if (err) return lpc_regerror(err, &rpat);
nmatch = 1 + ignore->u.number;
pmatch = (regmatch_t *) malloc(nmatch * sizeof(regmatch_t));
err = regexec(&rpat, match->u.string->str, nmatch, pmatch, 0);
if (err) return lpc_regerror(err, &rpat);
// CHECK:
regfree(&rpat);
return make_number(pmatch[ignore->u.number].rm_so);
}
/*
* Name: index
* Purpose: return the last index of a pat in string if
* there is a match, else -1
* Ignores the first 'ignore' number of matches
* going backwards from the end of the string
*/
Val * lpc_reg_rindex(Val * str, Val * pat, Val * ignore)
{
return make_number(-1);
}
/*
* Name: lpc_index_all
* Purpose: return an array of all locations which matched
* the given pat in str (a string)
*/
Val * lpc_reg_index_all(Val * str, Val * match)
{
regex_t rpat;
regmatch_t * pmatch;
int nmatch = 128;
int err;
int i = 0, size;
Val * ret;
err = regcomp(&rpat, match->u.string->str, REG_EXTENDED);
if (err) return lpc_regerror(err, &rpat);
/* the following 3 lines should be in a loop and repeated */
/* until we get ALL of the possible matches */
pmatch = (regmatch_t *) malloc(nmatch * sizeof(regmatch_t));
err = regexec(&rpat, match->u.string->str, nmatch, pmatch, 0);
if (err) return lpc_regerror(err, &rpat);
if (pmatch[i].rm_so == -1)
{
/* no matches! */
regfree(&rpat);
return allocate_array(0);
}
for (i = nmatch - 1; i >= 0; i--)
{
if (pmatch[i].rm_so != -1) break;
}
size = i + 1;
ret = allocate_array(size);
for (i = 0; i < size; i++)
{
assign_value(&ret->u.vec->item[i], make_number(pmatch[i].rm_so));
}
// CHECK:
regfree(&rpat);
return ret;
}
/*
* Name: split
* Purpose: split a string by a regular expression
* Returns: an array of strings
* if max == 0 - do all possible in split & sub
*/
#define MAX_SPLIT 1024
Val * lpc_reg_split(Val * str, Val * match, Val * max)
{
regex_t rpat;
regmatch_t pmatch[1];
int nmatch = 1, num = 0;
int len, err, m, i, start = 0;
int so[MAX_SPLIT];
int eo[MAX_SPLIT];
char * buf;
Val * ret;
err = regcomp(&rpat, match->u.string->str, REG_EXTENDED);
if (err) return lpc_regerror(err,&rpat);
m = max->u.number;
if (!m) m = MAX_SPLIT;
for (i = 0; i < m; i++)
{
err = regexec(&rpat, &(match->u.string->str[start]),
nmatch, pmatch, REG_NOTBOL);
if (err) return lpc_regerror(err,&rpat);
if (pmatch[0].rm_so != -1)
{
num++;
so[i] = pmatch[0].rm_so;
eo[i] = pmatch[0].rm_eo;
start = pmatch[0].rm_eo + 1;
}
else break;
}
ret = allocate_array(num);
start = 0;
buf = (char *) malloc((str->u.string->length + 1) * sizeof(char));
for (i = 0; i < num; i++)
{
len = so[i] - start;
strncpy(buf, &(match->u.string->str[start]), len);
assign_value(&ret->u.vec->item[i], make_string(buf));
start = eo[i] + 1;
}
free(buf);
regfree(&rpat);
return ret;
}
/*
* Name: sub
* Purpose: given a regular expression, substitute max
* matches with the given replacement, 0 == unlimited
*/
Val * lpc_reg_sub(Val * str, Val * match, Val * repl, Val * max)
{
regex_t rpat;
regmatch_t pmatch[1];
int nmatch = 1, num = 0;
int err, m, i, upto, start = 0;
int so[MAX_SPLIT];
int eo[MAX_SPLIT];
char * newstr;
int rl, len, ol = 0;
Val * ret;
err = regcomp(&rpat, match->u.string->str, REG_EXTENDED);
if (err) return lpc_regerror(err,&rpat);
m = max->u.number;
if (!m) m = MAX_SPLIT;
for (i = 0; i < m; i++)
{
err = regexec(&rpat, &(match->u.string->str[start]),
nmatch, pmatch, REG_NOTBOL);
if (err) return lpc_regerror(err,&rpat);
if (pmatch[0].rm_so != -1)
{
num++;
so[i] = pmatch[0].rm_so;
eo[i] = pmatch[0].rm_eo;
start = pmatch[0].rm_eo + 1;
ol = ol + (eo[i] - so[i]);
}
else break;
}
rl = repl->u.string->length * num - ol;
newstr = (char *) malloc(sizeof(char) * (str->u.string->length + rl + 1));
start = 0;
upto = 0;
len = so[0];
for (i = 0; i < num; i++)
{
strncpy(&newstr[start], &(str->u.string->str[upto]), len);
strncpy(&newstr[start + len], repl->u.string->str, repl->u.string->length);
start = start + len + repl->u.string->length;
upto = eo[i] + 1;
len = so[i+1] - eo[i];
}
ret = make_string(newstr);
free(newstr);
regfree(&rpat);
return ret;
}
/*
* Name: lpc_reg_match
* Purpose: given an array of strings check which strings match
* the given pattern
* Returns: an array of strings which matched
* in the original array.
*/
Val * lpc_reg_match(Val * pattern, Val * a)
{
regex_t prog_pat;
regmatch_t pmatch[1];
int nmatch = 1;
struct vector *arr;
int matches, *element_match, i, p, len;
Val *ret;
char *buf;
int err;
err = regcomp(&prog_pat, pattern->u.string->str, REG_EXTENDED);
if (err) return lpc_regerror(err,&prog_pat);
#if 0
if (a->type == T_STRING)
{
err = regexec(&prog_pat, a->u.string->str,
nmatch, pmatch, 0);
if (err) return lpc_regerror(err,&prog_pat);
if (pmatch[0].so != -1) return make_number(1);
return make_number(0);
}
#endif
/* Must have received an array . I only want to have to call
* regexp once, but dont know the size of the
* returning array until its finished, so I make
* a tempory array that signifies if the corresponding
* array element matches. */
arr = a->u.vec;
if (arr->size == 0)
{
/* stupid person match on an empty array */
regfree(&prog_pat);
return allocate_array(0);
}
element_match = (int *) malloc(sizeof(int) * arr->size);
matches = 0;
for (i = 0; i < arr->size; i++)
{
element_match[i] = 0;
if (arr->item[i].type != T_STRING) continue;
err = regexec(&prog_pat, arr->item[i].u.string->str, nmatch, pmatch, 0);
if (err == REG_NOMATCH) continue;
else if (err) return lpc_regerror(err,&prog_pat);
if (pmatch[0].rm_so != -1) // == 0??
{
matches++;
element_match[i] = 1;
}
}
/* Ok, we know which ones match. Now we just need to put them
* into a new array */
ret = allocate_array(matches);
for (i = 0, p = 0; p < matches; i++)
{
if (element_match[i])
{
len = sizeof(char) * arr->item[i].u.string->length;
buf = (char *) malloc(len + 1);
memcpy(buf, arr->item[i].u.string->str, len);
assign_value(&ret->u.vec->item[p], make_nstring(buf,len));
p++;
free(buf);
/* Should I free (buff) ? */
}
}
free(element_match);
regfree(&prog_pat);
return ret;
}
/*
* Name: escape
* Purpose: escape a string, all non-alphanumeric chars
* are backslashed.
*/
Val * lpc_escape(Val * str)
{
return Const(0);
}