Project

General

Profile

« Previous | Next » 

Revision 2407

Added by stephane over 13 years ago

UTILS_G
- PCRE (regex)
- - add _pcreEasyMatch, _pcreEasySplit, _pcreEasyReplace
- - add _pcreNormalMatch, _pcreNormalSplit

View differences:

scol_glib_pregex.c
21 21
For others informations, please contact us from http://www.scolring.org/
22 22
*/
23 23

  
24
#ifdef __cplusplus
25
#error This source file is not C++ but rather C. Please use a C-compiler
26
#endif
24 27

  
28

  
29

  
25 30
#include "../include/scol_glib_pregex.h"
31

  
32
/* http://library.gnome.org/devel/glib/unstable/glib-Perl-compatible-regular-expressions.html */
33

  
34
#if ((defined _WIN32) || (defined __WIN32__))
35
cbmachine ww;
36
#endif
37
mmachine  mm;
38

  
39
/*
40
    Easy interface
41

  
42
    - SCOL_pcreEasyMatch
43
    - SCOL_pcreEasySplit
44
    - SCOL_pcreEasyReplace
45
*/
46

  
47

  
48
/**
49
 * \brief Scans for a match in string for pattern
50
 * \param : S : pattern : the regular expression
51
 * \param : S : string : the string to scan for matches
52
 * \return : I : 1 if matched, else 0 (or nil if string or pattern is nil)
53
 *
54
 * The compile options for the regular expression are at 0
55
 * The match options ara at 0 too
56
 */
57
int SCOL_pcreEasyMatch (mmachine m)
58
{
59
    int mpattern, mstring;
60
    gchar *pattern, *string;
61

  
62
    MMechostr (MSKDEBUG, "SCOL_pcreEasyMatch : entering\n");
63

  
64
    mstring = MTOP (MMpull (m));
65
    mpattern = MTOP (MMpull (m));
66

  
67
    if((mstring == NIL) || (mpattern == NIL))
68
    {
69
        MMechostr (0, "SCOL_pcreEasyMatch error : an argument is nil");
70
        MMpush (m, NIL);
71
        return 0;
72
    }
73
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
74
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
75

  
76
    MMpush (m, ITOM (g_regex_match_simple (pattern, string, 0, 0)));
77
    g_free (pattern);
78
    g_free (string);
79
    return 0;
80
}
81

  
82
/**
83
 * \brief Breaks the string on the pattern, and returns an list of the tokens.
84
 * \param : S : pattern : the regular expression
85
 * \param : S : string : the string to scan for matches
86
 * \return : [S r1] : a list of substrings or nil
87
 *
88
 * The compile options for the regular expression are at 0
89
 * The match options ara at 0 too
90
 */
91
int SCOL_pcreEasySplit (mmachine m)
92
{
93
    int mpattern, mstring;
94
    int i = 0;
95
    gchar *pattern, *string;
96
    gchar **result;
97

  
98
    MMechostr (MSKDEBUG, "SCOL_pcreEsaySplit : entering\n");
99

  
100
    mstring = MTOP (MMpull (m));
101
    mpattern = MTOP (MMpull (m));
102

  
103
    if (mstring == NIL)
104
    {
105
        MMechostr (0, "SCOL_pcreEsaySplit error : an argument is nil");
106
        MMpush (m, NIL);
107
        return 0;
108
    }
109
    if (mpattern == NIL)
110
    {
111
        Mpushstrbloc (m, MMstartstr (m, mstring));
112
        MMpush (m, NIL);
113
        MMpush (m, ITOM (2));
114
        MBdeftab (m);
115
        return 0;
116
    }
117

  
118
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
119
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
120

  
121
    result = g_regex_split_simple (pattern, string, 0, 0);
122
    while (result[i] != NULL)
123
    {
124
        Mpushstrbloc (m, UTF8SCOL (result[i], strlen (result[i])));
125
        i++;
126
    }
127
    g_strfreev (result);
128
    g_free (pattern);
129
    g_free (string);
130
    MMpush (m, NIL);
131
    for (; i > 0; i--)
132
    {
133
        MMpush (m, ITOM (2));
134
        MBdeftab (m);
135
    }
136
    return 0;
137
}
138

  
139
/**
140
 * \brief Replaces all occurrences of the pattern in string with a replacement text
141
 * \param S : pattern
142
 * \param S : string
143
 * \param S : replacement text
144
 * \return S : new string containing the replacements
145
 */
146
int SCOL_pcreEasyReplace (mmachine m)
147
{
148
    int mpattern, mstring, mreplace;
149
    GRegex *regex;
150
    gchar *result;
151
    gchar *pattern, *string, *replace;
152

  
153
    MMechostr (MSKDEBUG, "SCOL_pcreEasyReplace : entering\n");
154

  
155
    mreplace = MTOP (MMpull (m));
156
    mstring = MTOP (MMpull (m));
157
    mpattern = MTOP (MMpull (m));
158

  
159
    if(mstring == NIL)
160
    {
161
        MMechostr (0, "SCOL_pcreEasyReplace error : an argument is nil");
162
        MMpush (m, NIL);
163
        return 0;
164
    }
165
    if ((mreplace == NIL) || (mpattern == NIL))
166
    {
167
        Mpushstrbloc (m, MMstartstr (m, mstring));
168
        return 0;
169
    }
170

  
171
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
172
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
173
    replace = SCOLUTF8 (MMstartstr (m, mreplace), MMsizestr (m, mreplace));
174

  
175
    regex = scol_pcre_regex_new (pattern, 0, 0);
176
    result = g_regex_replace (regex, string, strlen (string), 0, replace, 0, NULL);
177
    result = UTF8SCOL (result, strlen (result));
178
    Mpushstrbloc (m, result);
179
    g_free (result);
180
    g_regex_unref (regex);
181
    g_free (pattern);
182
    g_free (string);
183
    g_free (replace);
184
    return 0;
185
}
186

  
187

  
188
/*
189
    Normal interface
190

  
191
    - SCOL_pcreNormalMatch
192
    - SCOL_pcreNormalSplit
193
*/
194

  
195
int SCOL_pcreNormalMatch (mmachine m)
196
{
197
    int mpattern, mstring, mcompile, mstart, mmatch, malgo;
198
    gchar * pattern, *string;
199

  
200
    MMechostr (MSKDEBUG, "SCOL_pcreNormalMatch : entering\n");
201

  
202
    malgo = MTOI (MMpull (m));
203
    mmatch = MMpull (m);
204
    mstart = MTOI (MMpull (m));
205
    mcompile = MMpull (m);
206
    mstring = MTOP (MMpull (m));
207
    mpattern = MTOP (MMpull (m));
208

  
209
    if ((mstring == NIL) || (mpattern == NIL))
210
    {
211
        MMechostr (0, "SCOL_pcreNormalMatch error : string or pattern is nil\n");
212
        MMpush (m, NIL);
213
        return 0;
214
    }
215

  
216
    if (mstart< 0)
217
        mstart = 0;
218

  
219
    if (malgo != PCRE_MATCH_DFA)
220
        malgo = PCRE_MATCH_STANDARD;
221

  
222
    if (mcompile == NIL)
223
        mcompile = 0;
224

  
225
    if (mmatch == NIL)
226
        mmatch = 0;
227

  
228
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
229
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
230

  
231
    if (malgo == PCRE_MATCH_STANDARD)
232
        scol_pcre_match_standard (m, pattern, string, mcompile, mmatch, mstart);
233
    else
234
        scol_pcre_match_DFA (m, pattern, string, mcompile, mmatch, mstart);
235

  
236
    g_free (pattern);
237
    g_free (string);
238
    return 0;
239
}
240

  
241
int SCOL_pcreNormalSplit (mmachine m)
242
{
243
    int mpattern, mstring, mcompile, mstart, mmax, mmatch;
244
    int i = 0;
245
    gchar *pattern, *string;
246
    GRegex *regex;
247
    GError *error = NULL;
248
    gchar **result;
249

  
250
    MMechostr (MSKDEBUG, "SCOL_pcreNormalSplit : entering\n");
251

  
252
    mmax = MTOI (MMpull (m));
253
    mmatch = MMpull (m);
254
    mstart = MTOI (MMpull (m));
255
    mcompile = MMpull (m);
256
    mstring = MTOP (MMpull (m));
257
    mpattern = MTOP (MMpull (m));
258

  
259
    if ((mstring == NIL) || (mpattern == NIL))
260
    {
261
        MMechostr (0, "SCOL_pcreNormalSplit error : string or pattern is nil\n");
262
        MMpush (m, NIL);
263
        return 0;
264
    }
265

  
266
    if (mstart < 0)
267
        mstart = 0;
268
    if (mcompile == NIL)
269
        mcompile = 0;
270
    if (mmatch == NIL)
271
        mmatch = 0;
272

  
273
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
274
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
275

  
276
    regex = scol_pcre_regex_new (pattern, mcompile, mmatch);
277
    result = g_regex_split_full (regex, string, strlen (string), mstart, mmatch, mmax, &error);
278
    g_regex_unref (regex);
279
    if (error != NULL)
280
    {
281
        MMechostr (0, "SCOL_pcreNormalSplit error : %s\n", error->message);
282
        MMpush (m, NIL);
283
        g_error_free (error);
284
    }
285

  
286
    while (result[i] != NULL)
287
    {
288
        Mpushstrbloc (m, UTF8SCOL (result[i], strlen (result[i])));
289
        i++;
290
    }
291
    g_strfreev (result);
292
    g_free (pattern);
293
    g_free (string);
294
    MMpush (m, NIL);
295
    for (; i > 0; i--)
296
    {
297
        MMpush (m, ITOM (2));
298
        MBdeftab (m);
299
    }
300
    return 0;
301
}
302

  
303
int SCOL_pcreNormalReplace (mmachine m)
304
{
305
    return 0;
306
}
307

  
308

  
309

  
310

  
311
/*
312
    Internals functions
313
    - scol_pcre_regex_new
314
        GRegex * scol_pcre_regex_new (const gchar *, GRegexCompileFlags, GRegexMatchFlags);
315
        Create a new regular expression
316

  
317
    - scol_pcre_match_standard
318
        void scol_pcre_match_full (mmachine, const gchar *, const gchar *, GRegexCompileFlags  GRegexMatchFlags, gint)
319

  
320
    - scol_pcre_match_DFA
321
        void scol_pcre_match_DFA (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
322

  
323
*/
324

  
325
GRegex * scol_pcre_regex_new (const gchar * pattern, GRegexCompileFlags compile, GRegexMatchFlags match)
326
{
327
    return g_regex_new (pattern, compile, match, NULL);
328
}
329

  
330

  
331
void scol_pcre_match_standard (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
332
{
333
    GRegex *regex;
334
    GMatchInfo *match_info;
335
    GError *error = NULL;
336
    gchar *word;
337
    gboolean r;
338
    int i, n = 0;
339
    gint pos1 = -1, pos2 = -1;
340

  
341
    regex = scol_pcre_regex_new (pattern, compile, match);
342

  
343
    r = g_regex_match_full (regex, string, strlen (string), start, match, &match_info, &error);
344
    while (g_match_info_matches (match_info))
345
    {
346
        word = g_match_info_fetch (match_info, 0);
347
        g_match_info_fetch_pos (match_info, 0, &pos1, &pos2);
348
        Mpushstrbloc (m, word);
349
        MMpush (m, ITOM (pos1));
350
        MMpush (m, ITOM (pos2));
351
        MMpush (m, ITOM (3));
352
        MBdeftab (m);
353
        g_free (word);
354
        pos1 = -1; pos2 = -1;
355
        g_match_info_next (match_info, &error);
356
        n++;
357
    }
358

  
359
    g_match_info_free (match_info);
360
    g_regex_unref (regex);
361
    if (error != NULL)
362
    {
363
        MMechostr (0, "scol_pcre_match_standard error : %s\n", error->message);
364
        g_error_free (error);
365
    }
366
    MMpush (m, NIL);
367
    for (i = 0; i < n; i++)
368
    {
369
        MMpush (m, ITOM (2));
370
        MBdeftab (m);
371
    }
372
    return;
373
}
374

  
375
void scol_pcre_match_DFA (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
376
{
377
    GRegex *regex;
378
    GMatchInfo *match_info;
379
    GError *error = NULL;
380
    gchar *word;
381
    gboolean r;
382
    int i, n = 0;
383
    gint pos1 = -1, pos2 = -1;
384

  
385
    regex = scol_pcre_regex_new (pattern, compile, match);
386

  
387
    r = g_regex_match_all_full (regex, string, strlen (string), start, match, &match_info, &error);
388
    while (g_match_info_matches (match_info))
389
    {
390
        word = g_match_info_fetch (match_info, 0);
391
        g_match_info_fetch_pos (match_info, 0, &pos1, &pos2);
392
        Mpushstrbloc (m, word);
393
        MMpush (m, ITOM (pos1));
394
        MMpush (m, ITOM (pos2));
395
        MMpush (m, ITOM (3));
396
        MBdeftab (m);
397
        g_free (word);
398
        pos1 = -1; pos2 = -1;
399
        g_match_info_next (match_info, &error);
400
        n++;
401
    }
402

  
403
    g_match_info_free (match_info);
404
    g_regex_unref (regex);
405
    if (error != NULL)
406
    {
407
        MMechostr (0, "scol_pcre_match_DFA error : %s\n", error->message);
408
        g_error_free (error);
409
    }
410
    MMpush (m, NIL);
411
    for (i = 0; i < n; i++)
412
    {
413
        MMpush (m, ITOM (2));
414
        MBdeftab (m);
415
    }
416
    return;
417
}
418

  
419

  
420

  
421

  
422
/* API definitions : */
423

  
424
char* glib_pcre_name[GLIB_PCRE_PKG_NB]=
425
{
426
    "PCRE_MATCH_STANDARD", "PCRE_MATCH_DFA",
427

  
428
    "PCRE_MATCH_ANCHORED", "PCRE_MATCH_NOTBOL", "PCRE_MATCH_NOTEOL",
429
    "PCRE_MATCH_NOTEMPTY", "PCRE_MATCH_PARTIAL", "PCRE_MATCH_NEWLINE_CR",
430
    "PCRE_MATCH_NEWLINE_LF", "PCRE_MATCH_NEWLINE_CRLF", "PCRE_MATCH_NEWLINE_ANY",
431

  
432
    "PCRE_REGEX_CASELESS", "PCRE_REGEX_MULTILINE", "PCRE_REGEX_DOTALL",
433
    "PCRE_REGEX_EXTENDED", "PCRE_REGEX_ANCHORED", "PCRE_REGEX_DOLLAR_ENDONLY",
434
    "PCRE_REGEX_UNGREEDY", "PCRE_REGEX_RAW", "PCRE_REGEX_NO_AUTO_CAPTURE",
435
    "PCRE_REGEX_OPTIMIZE", "PCRE_REGEX_DUPNAMES", "PCRE_REGEX_NEWLINE_CR",
436
    "PCRE_REGEX_NEWLINE_LF", "PCRE_REGEX_NEWLINE_CRLF",
437

  
438
    "_pcreEasyMatch",
439
    "_pcreEasySplit",
440
    "_pcreEasyReplace",
441

  
442
    "_pcreNormalMatch",
443
    "_pcreNormalSplit"
444
};
445

  
446
int (*glib_pcre_fun[GLIB_PCRE_PKG_NB])(mmachine m)=
447
{
448
    (bullshit) (1*2), (bullshit) (2*2),
449

  
450
    (bullshit) (1<<4), (bullshit) (1<<7), (bullshit) (1<<8),
451
    (bullshit) (1<<10), (bullshit) (1<<15), (bullshit) (1<<20),
452
    (bullshit) (1<<21), (bullshit) (1<<20|1<<21), (bullshit) (1<<22),
453

  
454
    (bullshit) (1<<0), (bullshit) (1<<1), (bullshit) (1<<2),
455
    (bullshit) (1<<3), (bullshit) (1<<4), (bullshit) (1<<5),
456
    (bullshit) (1<<9), (bullshit) (1<<11), (bullshit) (1<<12),
457
    (bullshit) (1<<13), (bullshit) (1<<19), (bullshit) (1<<20),
458
    (bullshit) (1<<21), (bullshit) (1<<20|1<<21),
459

  
460
    SCOL_pcreEasyMatch,
461
    SCOL_pcreEasySplit,
462
    SCOL_pcreEasyReplace,
463

  
464
    SCOL_pcreNormalMatch,
465
    SCOL_pcreNormalSplit
466
};
467

  
468
int glib_pcre_narg[GLIB_PCRE_PKG_NB]=
469
{
470
    TYPVAR, TYPVAR,
471

  
472
    TYPVAR, TYPVAR, TYPVAR,
473
    TYPVAR, TYPVAR, TYPVAR,
474
    TYPVAR, TYPVAR, TYPVAR,
475

  
476
    TYPVAR, TYPVAR, TYPVAR,
477
    TYPVAR, TYPVAR, TYPVAR,
478
    TYPVAR, TYPVAR, TYPVAR,
479
    TYPVAR, TYPVAR, TYPVAR,
480
    TYPVAR, TYPVAR,
481

  
482
    2,
483
    2,
484
    3,
485

  
486
    6,
487
    6
488
};
489

  
490
char* glib_pcre_type[GLIB_PCRE_PKG_NB]=
491
{
492
    "I", "I",
493

  
494
    "I", "I", "I",
495
    "I", "I", "I",
496
    "I", "I", "I",
497

  
498
    "I", "I", "I",
499
    "I", "I", "I",
500
    "I", "I", "I",
501
    "I", "I", "I",
502
    "I", "I",
503

  
504
    "fun [S S] I",
505
    "fun [S S] [S r1]",
506
    "fun [S S S] S",
507

  
508
    "fun [S S I I I I] [[S I I] r1]",
509
    "fun [S S I I I I] [S r1]"
510
};
511

  
512
/**
513
 * \brief Load the Scol api
514
 */
515
int SCOLinitPcreClass (mmachine m)
516
{
517
    int k;
518

  
519
    MMechostr (0, "SCOLinitPcreClass : entering\n");
520

  
521
    k = PKhardpak (m, "PCREengine", GLIB_PCRE_PKG_NB, glib_pcre_name, glib_pcre_fun, glib_pcre_narg, glib_pcre_type);
522
    return k;
523
}
524

  
525

  
526
/**
527
 * \brief Load and free the regular expression library
528
 * Plateforms supported : MS Windows and GNU / Linux
529
 */
530

  
531
int PcreRelease ()
532
{
533
    MMechostr (0, "\nPCRE library released !\n");
534
    return 0;
535
}
536

  
537
#if ((defined _WIN32) || (defined __WIN32__))
538

  
539
__declspec (dllexport) int SCOLloadPCRE (mmachine m, cbmachine w)
540
{
541
    int k = 0;
542
    ww = w;
543
    mm = m;
544

  
545
    MMechostr (MSKDEBUG, "\nPCRE library loading .... !\n");
546
    SCOLinitplugin (w);
547
    if ((k = SCOLinitPcreClass (m))) return k;
548
    MMechostr(MSKDEBUG, "\nPCRE library loaded !\n");
549
    return k;
550
}
551

  
552
__declspec (dllexport) int SCOLfreePCRE ()
553
{
554
    PcreRelease ();
555
    return 0;
556
}
557

  
558

  
559

  
560

  
561

  
562
/* Version GNU / Linux */
563
#elif ((defined linux) || (defined __linux))
564

  
565
int SCOLloadPCRE (mmachine m)
566
{
567
    int k = 0;
568
    mm = m;
569

  
570
    MMechostr (MSKDEBUG, "\nPCRE library loading !\n");
571
    if ((k = SCOLinitPcreClass (m))) return k;
572
    MMechostr (MSKDEBUG, "\nPCRE library loaded !\n");
573

  
574
    return k;
575
}
576

  
577
int SCOLfreePCRE ()
578
{
579
    MMechostr(MSKDEBUG, "\nPCRE library release !\n");
580

  
581
    PcreRelease ();
582
    return 0;
583
}
584

  
585
#else
586
#error no platform supported
587
#endif

Also available in: Unified diff