Project

General

Profile

1 2401 stephane
/*
2
This source file is part of Scol
3
For the latest info, see http://www.scolring.org
4
5
Copyright (c) 2010 Stephane Bisaro, aka Iri <iri@irizone.net>
6
7
This program is free software; you can redistribute it and/or modify it under
8
the terms of the GNU Lesser General Public License as published by the Free Software
9
Foundation; either version 2 of the License, or (at your option) any later
10
version.
11
12
This program is distributed in the hope that it will be useful, but WITHOUT
13
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
15
16
You should have received a copy of the GNU Lesser General Public License along with
17
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
18
Place - Suite 330, Boston, MA 02111-1307, USA, or go to
19
http://www.gnu.org/copyleft/lesser.txt
20
21
For others informations, please contact us from http://www.scolring.org/
22
*/
23
24 2407 stephane
#ifdef __cplusplus
25
#error This source file is not C++ but rather C. Please use a C-compiler
26
#endif
27 2401 stephane
28 2407 stephane
29
30 2401 stephane
#include "../include/scol_glib_pregex.h"
31 2407 stephane
32
/* http://library.gnome.org/devel/glib/unstable/glib-Perl-compatible-regular-expressions.html */
33
34
#if ((defined _WIN32) || (defined __WIN32__))
35
cbmachine ww;
36
#endif
37
mmachine  mm;
38
39
/*
40
    Easy interface
41
42
    - SCOL_pcreEasyMatch
43
    - SCOL_pcreEasySplit
44
    - SCOL_pcreEasyReplace
45
*/
46
47
48
/**
49
 * \brief Scans for a match in string for pattern
50
 * \param : S : pattern : the regular expression
51
 * \param : S : string : the string to scan for matches
52
 * \return : I : 1 if matched, else 0 (or nil if string or pattern is nil)
53
 *
54
 * The compile options for the regular expression are at 0
55
 * The match options ara at 0 too
56
 */
57
int SCOL_pcreEasyMatch (mmachine m)
58
{
59
    int mpattern, mstring;
60
    gchar *pattern, *string;
61
62
    MMechostr (MSKDEBUG, "SCOL_pcreEasyMatch : entering\n");
63
64
    mstring = MTOP (MMpull (m));
65
    mpattern = MTOP (MMpull (m));
66
67
    if((mstring == NIL) || (mpattern == NIL))
68
    {
69
        MMechostr (0, "SCOL_pcreEasyMatch error : an argument is nil");
70
        MMpush (m, NIL);
71
        return 0;
72
    }
73
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
74
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
75
76
    MMpush (m, ITOM (g_regex_match_simple (pattern, string, 0, 0)));
77
    g_free (pattern);
78
    g_free (string);
79
    return 0;
80
}
81
82
/**
83
 * \brief Breaks the string on the pattern, and returns an list of the tokens.
84
 * \param : S : pattern : the regular expression
85
 * \param : S : string : the string to scan for matches
86
 * \return : [S r1] : a list of substrings or nil
87
 *
88
 * The compile options for the regular expression are at 0
89
 * The match options ara at 0 too
90
 */
91
int SCOL_pcreEasySplit (mmachine m)
92
{
93
    int mpattern, mstring;
94
    int i = 0;
95
    gchar *pattern, *string;
96
    gchar **result;
97
98
    MMechostr (MSKDEBUG, "SCOL_pcreEsaySplit : entering\n");
99
100
    mstring = MTOP (MMpull (m));
101
    mpattern = MTOP (MMpull (m));
102
103
    if (mstring == NIL)
104
    {
105
        MMechostr (0, "SCOL_pcreEsaySplit error : an argument is nil");
106
        MMpush (m, NIL);
107
        return 0;
108
    }
109
    if (mpattern == NIL)
110
    {
111
        Mpushstrbloc (m, MMstartstr (m, mstring));
112
        MMpush (m, NIL);
113
        MMpush (m, ITOM (2));
114
        MBdeftab (m);
115
        return 0;
116
    }
117
118
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
119
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
120
121
    result = g_regex_split_simple (pattern, string, 0, 0);
122
    while (result[i] != NULL)
123
    {
124
        Mpushstrbloc (m, UTF8SCOL (result[i], strlen (result[i])));
125
        i++;
126
    }
127
    g_strfreev (result);
128
    g_free (pattern);
129
    g_free (string);
130
    MMpush (m, NIL);
131
    for (; i > 0; i--)
132
    {
133
        MMpush (m, ITOM (2));
134
        MBdeftab (m);
135
    }
136
    return 0;
137
}
138
139
/**
140
 * \brief Replaces all occurrences of the pattern in string with a replacement text
141
 * \param S : pattern
142
 * \param S : string
143
 * \param S : replacement text
144
 * \return S : new string containing the replacements
145
 */
146
int SCOL_pcreEasyReplace (mmachine m)
147
{
148
    int mpattern, mstring, mreplace;
149
    GRegex *regex;
150
    gchar *result;
151
    gchar *pattern, *string, *replace;
152
153
    MMechostr (MSKDEBUG, "SCOL_pcreEasyReplace : entering\n");
154
155
    mreplace = MTOP (MMpull (m));
156
    mstring = MTOP (MMpull (m));
157
    mpattern = MTOP (MMpull (m));
158
159
    if(mstring == NIL)
160
    {
161
        MMechostr (0, "SCOL_pcreEasyReplace error : an argument is nil");
162
        MMpush (m, NIL);
163
        return 0;
164
    }
165
    if ((mreplace == NIL) || (mpattern == NIL))
166
    {
167
        Mpushstrbloc (m, MMstartstr (m, mstring));
168
        return 0;
169
    }
170
171
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
172
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
173
    replace = SCOLUTF8 (MMstartstr (m, mreplace), MMsizestr (m, mreplace));
174
175
    regex = scol_pcre_regex_new (pattern, 0, 0);
176
    result = g_regex_replace (regex, string, strlen (string), 0, replace, 0, NULL);
177
    result = UTF8SCOL (result, strlen (result));
178
    Mpushstrbloc (m, result);
179
    g_free (result);
180
    g_regex_unref (regex);
181
    g_free (pattern);
182
    g_free (string);
183
    g_free (replace);
184
    return 0;
185
}
186
187
188
/*
189
    Normal interface
190
191
    - SCOL_pcreNormalMatch
192
    - SCOL_pcreNormalSplit
193
*/
194
195
int SCOL_pcreNormalMatch (mmachine m)
196
{
197
    int mpattern, mstring, mcompile, mstart, mmatch, malgo;
198
    gchar * pattern, *string;
199
200
    MMechostr (MSKDEBUG, "SCOL_pcreNormalMatch : entering\n");
201
202
    malgo = MTOI (MMpull (m));
203
    mmatch = MMpull (m);
204
    mstart = MTOI (MMpull (m));
205
    mcompile = MMpull (m);
206
    mstring = MTOP (MMpull (m));
207
    mpattern = MTOP (MMpull (m));
208
209
    if ((mstring == NIL) || (mpattern == NIL))
210
    {
211
        MMechostr (0, "SCOL_pcreNormalMatch error : string or pattern is nil\n");
212
        MMpush (m, NIL);
213
        return 0;
214
    }
215
216
    if (mstart< 0)
217
        mstart = 0;
218
219
    if (malgo != PCRE_MATCH_DFA)
220
        malgo = PCRE_MATCH_STANDARD;
221
222
    if (mcompile == NIL)
223
        mcompile = 0;
224
225
    if (mmatch == NIL)
226
        mmatch = 0;
227
228
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
229
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
230
231
    if (malgo == PCRE_MATCH_STANDARD)
232
        scol_pcre_match_standard (m, pattern, string, mcompile, mmatch, mstart);
233
    else
234
        scol_pcre_match_DFA (m, pattern, string, mcompile, mmatch, mstart);
235
236
    g_free (pattern);
237
    g_free (string);
238
    return 0;
239
}
240
241
int SCOL_pcreNormalSplit (mmachine m)
242
{
243
    int mpattern, mstring, mcompile, mstart, mmax, mmatch;
244
    int i = 0;
245
    gchar *pattern, *string;
246
    GRegex *regex;
247
    GError *error = NULL;
248
    gchar **result;
249
250
    MMechostr (MSKDEBUG, "SCOL_pcreNormalSplit : entering\n");
251
252
    mmax = MTOI (MMpull (m));
253
    mmatch = MMpull (m);
254
    mstart = MTOI (MMpull (m));
255
    mcompile = MMpull (m);
256
    mstring = MTOP (MMpull (m));
257
    mpattern = MTOP (MMpull (m));
258
259
    if ((mstring == NIL) || (mpattern == NIL))
260
    {
261
        MMechostr (0, "SCOL_pcreNormalSplit error : string or pattern is nil\n");
262
        MMpush (m, NIL);
263
        return 0;
264
    }
265
266
    if (mstart < 0)
267
        mstart = 0;
268
    if (mcompile == NIL)
269
        mcompile = 0;
270
    if (mmatch == NIL)
271
        mmatch = 0;
272
273
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
274
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
275
276
    regex = scol_pcre_regex_new (pattern, mcompile, mmatch);
277
    result = g_regex_split_full (regex, string, strlen (string), mstart, mmatch, mmax, &error);
278
    g_regex_unref (regex);
279
    if (error != NULL)
280
    {
281
        MMechostr (0, "SCOL_pcreNormalSplit error : %s\n", error->message);
282
        MMpush (m, NIL);
283
        g_error_free (error);
284
    }
285
286
    while (result[i] != NULL)
287
    {
288
        Mpushstrbloc (m, UTF8SCOL (result[i], strlen (result[i])));
289
        i++;
290
    }
291
    g_strfreev (result);
292
    g_free (pattern);
293
    g_free (string);
294
    MMpush (m, NIL);
295
    for (; i > 0; i--)
296
    {
297
        MMpush (m, ITOM (2));
298
        MBdeftab (m);
299
    }
300
    return 0;
301
}
302
303
int SCOL_pcreNormalReplace (mmachine m)
304
{
305 2415 stephane
    int mpattern, mstring, mreplace, mcompile, mstart, mmatch;
306
    gchar *pattern, *string, *replace, *result;
307
    GRegex *regex;
308
    GError *error = NULL;
309
310
    MMechostr (MSKDEBUG, "SCOL_pcreNormalReplace : entering\n");
311
312
    mmatch = MMpull (m);
313
    mstart = MTOI (MMpull (m));
314
    mcompile = MMpull (m);
315
    mreplace = MTOP (MMpull (m));
316
    mstring = MTOP (MMpull (m));
317
    mpattern = MTOP (MMpull (m));
318
319
    if(mstring == NIL)
320
    {
321
        MMechostr (0, "SCOL_pcreNormalReplace error : string is nil");
322
        MMpush (m, NIL);
323
        return 0;
324
    }
325
    if ((mreplace == NIL) || (mpattern == NIL))
326
    {
327
        Mpushstrbloc (m, MMstartstr (m, mstring));
328
        return 0;
329
    }
330
331
    if (mstart < 0)
332
        mstart = 0;
333
    if (mcompile == NIL)
334
        mcompile = 0;
335
    if (mmatch == NIL)
336
        mmatch = 0;
337
338
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
339
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
340
    replace = SCOLUTF8 (MMstartstr (m, mreplace), MMsizestr (m, mreplace));
341
342
    regex = scol_pcre_regex_new (pattern, mcompile, mmatch);
343
    result = g_regex_replace (regex, string, strlen (string), mstart, replace, mmatch, &error);
344
    g_regex_unref (regex);
345
    if (error != NULL)
346
    {
347
        MMechostr (0, "SCOL_pcreNormalReplace error : %s\n", error->message);
348
        MMpush (m, NIL);
349
        g_error_free (error);
350
    }
351
    Mpushstrbloc (m, UTF8SCOL (result, strlen (result)));
352
    g_free (result);
353
    g_free (string);
354
    g_free (pattern);
355 2407 stephane
    return 0;
356
}
357
358
359
360
361
/*
362
    Internals functions
363
    - scol_pcre_regex_new
364
        GRegex * scol_pcre_regex_new (const gchar *, GRegexCompileFlags, GRegexMatchFlags);
365
        Create a new regular expression
366
367
    - scol_pcre_match_standard
368
        void scol_pcre_match_full (mmachine, const gchar *, const gchar *, GRegexCompileFlags  GRegexMatchFlags, gint)
369
370
    - scol_pcre_match_DFA
371
        void scol_pcre_match_DFA (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
372
373
*/
374
375
GRegex * scol_pcre_regex_new (const gchar * pattern, GRegexCompileFlags compile, GRegexMatchFlags match)
376
{
377
    return g_regex_new (pattern, compile, match, NULL);
378
}
379
380
381
void scol_pcre_match_standard (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
382
{
383
    GRegex *regex;
384
    GMatchInfo *match_info;
385
    GError *error = NULL;
386
    gchar *word;
387
    gboolean r;
388
    int i, n = 0;
389
    gint pos1 = -1, pos2 = -1;
390
391
    regex = scol_pcre_regex_new (pattern, compile, match);
392
393
    r = g_regex_match_full (regex, string, strlen (string), start, match, &match_info, &error);
394
    while (g_match_info_matches (match_info))
395
    {
396
        word = g_match_info_fetch (match_info, 0);
397
        g_match_info_fetch_pos (match_info, 0, &pos1, &pos2);
398
        Mpushstrbloc (m, word);
399
        MMpush (m, ITOM (pos1));
400
        MMpush (m, ITOM (pos2));
401
        MMpush (m, ITOM (3));
402
        MBdeftab (m);
403
        g_free (word);
404
        pos1 = -1; pos2 = -1;
405
        g_match_info_next (match_info, &error);
406
        n++;
407
    }
408
409
    g_match_info_free (match_info);
410
    g_regex_unref (regex);
411
    if (error != NULL)
412
    {
413
        MMechostr (0, "scol_pcre_match_standard error : %s\n", error->message);
414
        g_error_free (error);
415
    }
416
    MMpush (m, NIL);
417
    for (i = 0; i < n; i++)
418
    {
419
        MMpush (m, ITOM (2));
420
        MBdeftab (m);
421
    }
422
    return;
423
}
424
425
void scol_pcre_match_DFA (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
426
{
427
    GRegex *regex;
428
    GMatchInfo *match_info;
429
    GError *error = NULL;
430
    gchar *word;
431
    gboolean r;
432
    int i, n = 0;
433
    gint pos1 = -1, pos2 = -1;
434
435
    regex = scol_pcre_regex_new (pattern, compile, match);
436
437
    r = g_regex_match_all_full (regex, string, strlen (string), start, match, &match_info, &error);
438
    while (g_match_info_matches (match_info))
439
    {
440
        word = g_match_info_fetch (match_info, 0);
441
        g_match_info_fetch_pos (match_info, 0, &pos1, &pos2);
442
        Mpushstrbloc (m, word);
443
        MMpush (m, ITOM (pos1));
444
        MMpush (m, ITOM (pos2));
445
        MMpush (m, ITOM (3));
446
        MBdeftab (m);
447
        g_free (word);
448
        pos1 = -1; pos2 = -1;
449
        g_match_info_next (match_info, &error);
450
        n++;
451
    }
452
453
    g_match_info_free (match_info);
454
    g_regex_unref (regex);
455
    if (error != NULL)
456
    {
457
        MMechostr (0, "scol_pcre_match_DFA error : %s\n", error->message);
458
        g_error_free (error);
459
    }
460
    MMpush (m, NIL);
461
    for (i = 0; i < n; i++)
462
    {
463
        MMpush (m, ITOM (2));
464
        MBdeftab (m);
465
    }
466
    return;
467
}
468
469
470
471
472
/* API definitions : */
473
474
char* glib_pcre_name[GLIB_PCRE_PKG_NB]=
475
{
476
    "PCRE_MATCH_STANDARD", "PCRE_MATCH_DFA",
477
478
    "PCRE_MATCH_ANCHORED", "PCRE_MATCH_NOTBOL", "PCRE_MATCH_NOTEOL",
479
    "PCRE_MATCH_NOTEMPTY", "PCRE_MATCH_PARTIAL", "PCRE_MATCH_NEWLINE_CR",
480
    "PCRE_MATCH_NEWLINE_LF", "PCRE_MATCH_NEWLINE_CRLF", "PCRE_MATCH_NEWLINE_ANY",
481
482
    "PCRE_REGEX_CASELESS", "PCRE_REGEX_MULTILINE", "PCRE_REGEX_DOTALL",
483
    "PCRE_REGEX_EXTENDED", "PCRE_REGEX_ANCHORED", "PCRE_REGEX_DOLLAR_ENDONLY",
484
    "PCRE_REGEX_UNGREEDY", "PCRE_REGEX_RAW", "PCRE_REGEX_NO_AUTO_CAPTURE",
485
    "PCRE_REGEX_OPTIMIZE", "PCRE_REGEX_DUPNAMES", "PCRE_REGEX_NEWLINE_CR",
486
    "PCRE_REGEX_NEWLINE_LF", "PCRE_REGEX_NEWLINE_CRLF",
487
488
    "_pcreEasyMatch",
489
    "_pcreEasySplit",
490
    "_pcreEasyReplace",
491
492
    "_pcreNormalMatch",
493 2415 stephane
    "_pcreNormalSplit",
494
    "_pcreNormalReplace"
495 2407 stephane
};
496
497
int (*glib_pcre_fun[GLIB_PCRE_PKG_NB])(mmachine m)=
498
{
499
    (bullshit) (1*2), (bullshit) (2*2),
500
501
    (bullshit) (1<<4), (bullshit) (1<<7), (bullshit) (1<<8),
502
    (bullshit) (1<<10), (bullshit) (1<<15), (bullshit) (1<<20),
503
    (bullshit) (1<<21), (bullshit) (1<<20|1<<21), (bullshit) (1<<22),
504
505
    (bullshit) (1<<0), (bullshit) (1<<1), (bullshit) (1<<2),
506
    (bullshit) (1<<3), (bullshit) (1<<4), (bullshit) (1<<5),
507
    (bullshit) (1<<9), (bullshit) (1<<11), (bullshit) (1<<12),
508
    (bullshit) (1<<13), (bullshit) (1<<19), (bullshit) (1<<20),
509
    (bullshit) (1<<21), (bullshit) (1<<20|1<<21),
510
511
    SCOL_pcreEasyMatch,
512
    SCOL_pcreEasySplit,
513
    SCOL_pcreEasyReplace,
514
515
    SCOL_pcreNormalMatch,
516 2415 stephane
    SCOL_pcreNormalSplit,
517
    SCOL_pcreNormalReplace
518 2407 stephane
};
519
520
int glib_pcre_narg[GLIB_PCRE_PKG_NB]=
521
{
522
    TYPVAR, TYPVAR,
523
524
    TYPVAR, TYPVAR, TYPVAR,
525
    TYPVAR, TYPVAR, TYPVAR,
526
    TYPVAR, TYPVAR, TYPVAR,
527
528
    TYPVAR, TYPVAR, TYPVAR,
529
    TYPVAR, TYPVAR, TYPVAR,
530
    TYPVAR, TYPVAR, TYPVAR,
531
    TYPVAR, TYPVAR, TYPVAR,
532
    TYPVAR, TYPVAR,
533
534
    2,
535
    2,
536
    3,
537
538
    6,
539 2415 stephane
    6,
540 2407 stephane
    6
541
};
542
543
char* glib_pcre_type[GLIB_PCRE_PKG_NB]=
544
{
545
    "I", "I",
546
547
    "I", "I", "I",
548
    "I", "I", "I",
549
    "I", "I", "I",
550
551
    "I", "I", "I",
552
    "I", "I", "I",
553
    "I", "I", "I",
554
    "I", "I", "I",
555
    "I", "I",
556
557
    "fun [S S] I",
558
    "fun [S S] [S r1]",
559
    "fun [S S S] S",
560
561
    "fun [S S I I I I] [[S I I] r1]",
562 2415 stephane
    "fun [S S I I I I] [S r1]",
563
    "fun [S S S I I I] S"
564 2407 stephane
};
565
566
/**
567
 * \brief Load the Scol api
568
 */
569
int SCOLinitPcreClass (mmachine m)
570
{
571
    int k;
572
573
    MMechostr (0, "SCOLinitPcreClass : entering\n");
574
575
    k = PKhardpak (m, "PCREengine", GLIB_PCRE_PKG_NB, glib_pcre_name, glib_pcre_fun, glib_pcre_narg, glib_pcre_type);
576
    return k;
577
}
578
579
580
/**
581
 * \brief Load and free the regular expression library
582
 * Plateforms supported : MS Windows and GNU / Linux
583
 */
584
585
int PcreRelease ()
586
{
587
    MMechostr (0, "\nPCRE library released !\n");
588
    return 0;
589
}
590
591
#if ((defined _WIN32) || (defined __WIN32__))
592
593
__declspec (dllexport) int SCOLloadPCRE (mmachine m, cbmachine w)
594
{
595
    int k = 0;
596
    ww = w;
597
    mm = m;
598
599
    MMechostr (MSKDEBUG, "\nPCRE library loading .... !\n");
600
    SCOLinitplugin (w);
601
    if ((k = SCOLinitPcreClass (m))) return k;
602
    MMechostr(MSKDEBUG, "\nPCRE library loaded !\n");
603
    return k;
604
}
605
606
__declspec (dllexport) int SCOLfreePCRE ()
607
{
608
    PcreRelease ();
609
    return 0;
610
}
611
612
613
614
615
616
/* Version GNU / Linux */
617
#elif ((defined linux) || (defined __linux))
618
619
int SCOLloadPCRE (mmachine m)
620
{
621
    int k = 0;
622
    mm = m;
623
624
    MMechostr (MSKDEBUG, "\nPCRE library loading !\n");
625
    if ((k = SCOLinitPcreClass (m))) return k;
626
    MMechostr (MSKDEBUG, "\nPCRE library loaded !\n");
627
628
    return k;
629
}
630
631
int SCOLfreePCRE ()
632
{
633
    MMechostr(MSKDEBUG, "\nPCRE library release !\n");
634
635
    PcreRelease ();
636
    return 0;
637
}
638
639
#else
640
#error no platform supported
641
#endif