Project

General

Profile

1 2401 stephane
/*
2
This source file is part of Scol
3
For the latest info, see http://www.scolring.org
4
5
Copyright (c) 2010 Stephane Bisaro, aka Iri <iri@irizone.net>
6
7
This program is free software; you can redistribute it and/or modify it under
8
the terms of the GNU Lesser General Public License as published by the Free Software
9
Foundation; either version 2 of the License, or (at your option) any later
10
version.
11
12
This program is distributed in the hope that it will be useful, but WITHOUT
13
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
15
16
You should have received a copy of the GNU Lesser General Public License along with
17
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
18
Place - Suite 330, Boston, MA 02111-1307, USA, or go to
19
http://www.gnu.org/copyleft/lesser.txt
20
21
For others informations, please contact us from http://www.scolring.org/
22
*/
23
24 2407 stephane
#ifdef __cplusplus
25
#error This source file is not C++ but rather C. Please use a C-compiler
26
#endif
27 2401 stephane
28 2407 stephane
29
30 2401 stephane
#include "../include/scol_glib_pregex.h"
31 2407 stephane
32
/* http://library.gnome.org/devel/glib/unstable/glib-Perl-compatible-regular-expressions.html */
33
34
#if ((defined _WIN32) || (defined __WIN32__))
35
cbmachine ww;
36
#endif
37
mmachine  mm;
38
39
/*
40
    Easy interface
41
42
    - SCOL_pcreEasyMatch
43
    - SCOL_pcreEasySplit
44
    - SCOL_pcreEasyReplace
45
*/
46
47
48
/**
49
 * \brief Scans for a match in string for pattern
50
 * \param : S : pattern : the regular expression
51
 * \param : S : string : the string to scan for matches
52
 * \return : I : 1 if matched, else 0 (or nil if string or pattern is nil)
53
 *
54
 * The compile options for the regular expression are at 0
55
 * The match options ara at 0 too
56
 */
57
int SCOL_pcreEasyMatch (mmachine m)
58
{
59
    int mpattern, mstring;
60
    gchar *pattern, *string;
61
62
    MMechostr (MSKDEBUG, "SCOL_pcreEasyMatch : entering\n");
63
64
    mstring = MTOP (MMpull (m));
65
    mpattern = MTOP (MMpull (m));
66
67
    if((mstring == NIL) || (mpattern == NIL))
68
    {
69
        MMechostr (0, "SCOL_pcreEasyMatch error : an argument is nil");
70
        MMpush (m, NIL);
71
        return 0;
72
    }
73
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
74
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
75
76
    MMpush (m, ITOM (g_regex_match_simple (pattern, string, 0, 0)));
77
    g_free (pattern);
78
    g_free (string);
79
    return 0;
80
}
81
82
/**
83
 * \brief Breaks the string on the pattern, and returns an list of the tokens.
84
 * \param : S : pattern : the regular expression
85
 * \param : S : string : the string to scan for matches
86
 * \return : [S r1] : a list of substrings or nil
87
 *
88
 * The compile options for the regular expression are at 0
89
 * The match options ara at 0 too
90
 */
91
int SCOL_pcreEasySplit (mmachine m)
92
{
93
    int mpattern, mstring;
94
    int i = 0;
95
    gchar *pattern, *string;
96
    gchar **result;
97
98
    MMechostr (MSKDEBUG, "SCOL_pcreEsaySplit : entering\n");
99
100
    mstring = MTOP (MMpull (m));
101
    mpattern = MTOP (MMpull (m));
102
103
    if (mstring == NIL)
104
    {
105
        MMechostr (0, "SCOL_pcreEsaySplit error : an argument is nil");
106
        MMpush (m, NIL);
107
        return 0;
108
    }
109
    if (mpattern == NIL)
110
    {
111
        Mpushstrbloc (m, MMstartstr (m, mstring));
112
        MMpush (m, NIL);
113
        MMpush (m, ITOM (2));
114
        MBdeftab (m);
115
        return 0;
116
    }
117
118
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
119
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
120
121
    result = g_regex_split_simple (pattern, string, 0, 0);
122
    while (result[i] != NULL)
123
    {
124
        Mpushstrbloc (m, UTF8SCOL (result[i], strlen (result[i])));
125
        i++;
126
    }
127
    g_strfreev (result);
128
    g_free (pattern);
129
    g_free (string);
130
    MMpush (m, NIL);
131
    for (; i > 0; i--)
132
    {
133
        MMpush (m, ITOM (2));
134
        MBdeftab (m);
135
    }
136
    return 0;
137
}
138
139
/**
140
 * \brief Replaces all occurrences of the pattern in string with a replacement text
141
 * \param S : pattern
142
 * \param S : string
143
 * \param S : replacement text
144
 * \return S : new string containing the replacements
145
 */
146
int SCOL_pcreEasyReplace (mmachine m)
147
{
148
    int mpattern, mstring, mreplace;
149
    GRegex *regex;
150
    gchar *result;
151
    gchar *pattern, *string, *replace;
152
153
    MMechostr (MSKDEBUG, "SCOL_pcreEasyReplace : entering\n");
154
155
    mreplace = MTOP (MMpull (m));
156
    mstring = MTOP (MMpull (m));
157
    mpattern = MTOP (MMpull (m));
158
159
    if(mstring == NIL)
160
    {
161
        MMechostr (0, "SCOL_pcreEasyReplace error : an argument is nil");
162
        MMpush (m, NIL);
163
        return 0;
164
    }
165
    if ((mreplace == NIL) || (mpattern == NIL))
166
    {
167
        Mpushstrbloc (m, MMstartstr (m, mstring));
168
        return 0;
169
    }
170
171
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
172
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
173
    replace = SCOLUTF8 (MMstartstr (m, mreplace), MMsizestr (m, mreplace));
174
175
    regex = scol_pcre_regex_new (pattern, 0, 0);
176
    result = g_regex_replace (regex, string, strlen (string), 0, replace, 0, NULL);
177
    result = UTF8SCOL (result, strlen (result));
178
    Mpushstrbloc (m, result);
179
    g_free (result);
180
    g_regex_unref (regex);
181
    g_free (pattern);
182
    g_free (string);
183
    g_free (replace);
184
    return 0;
185
}
186
187
188
/*
189
    Normal interface
190
191
    - SCOL_pcreNormalMatch
192
    - SCOL_pcreNormalSplit
193
*/
194
195
int SCOL_pcreNormalMatch (mmachine m)
196
{
197
    int mpattern, mstring, mcompile, mstart, mmatch, malgo;
198
    gchar * pattern, *string;
199
200
    MMechostr (MSKDEBUG, "SCOL_pcreNormalMatch : entering\n");
201
202
    malgo = MTOI (MMpull (m));
203
    mmatch = MMpull (m);
204
    mstart = MTOI (MMpull (m));
205
    mcompile = MMpull (m);
206
    mstring = MTOP (MMpull (m));
207
    mpattern = MTOP (MMpull (m));
208
209
    if ((mstring == NIL) || (mpattern == NIL))
210
    {
211
        MMechostr (0, "SCOL_pcreNormalMatch error : string or pattern is nil\n");
212
        MMpush (m, NIL);
213
        return 0;
214
    }
215
216
    if (mstart< 0)
217
        mstart = 0;
218
219
    if (malgo != PCRE_MATCH_DFA)
220
        malgo = PCRE_MATCH_STANDARD;
221
222
    if (mcompile == NIL)
223
        mcompile = 0;
224
225
    if (mmatch == NIL)
226
        mmatch = 0;
227
228
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
229
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
230
231
    if (malgo == PCRE_MATCH_STANDARD)
232
        scol_pcre_match_standard (m, pattern, string, mcompile, mmatch, mstart);
233
    else
234
        scol_pcre_match_DFA (m, pattern, string, mcompile, mmatch, mstart);
235
236
    g_free (pattern);
237
    g_free (string);
238
    return 0;
239
}
240
241
int SCOL_pcreNormalSplit (mmachine m)
242
{
243
    int mpattern, mstring, mcompile, mstart, mmax, mmatch;
244
    int i = 0;
245
    gchar *pattern, *string;
246
    GRegex *regex;
247
    GError *error = NULL;
248
    gchar **result;
249
250
    MMechostr (MSKDEBUG, "SCOL_pcreNormalSplit : entering\n");
251
252
    mmax = MTOI (MMpull (m));
253
    mmatch = MMpull (m);
254
    mstart = MTOI (MMpull (m));
255
    mcompile = MMpull (m);
256
    mstring = MTOP (MMpull (m));
257
    mpattern = MTOP (MMpull (m));
258
259
    if ((mstring == NIL) || (mpattern == NIL))
260
    {
261
        MMechostr (0, "SCOL_pcreNormalSplit error : string or pattern is nil\n");
262
        MMpush (m, NIL);
263
        return 0;
264
    }
265
266
    if (mstart < 0)
267
        mstart = 0;
268
    if (mcompile == NIL)
269
        mcompile = 0;
270
    if (mmatch == NIL)
271
        mmatch = 0;
272
273
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
274
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
275
276
    regex = scol_pcre_regex_new (pattern, mcompile, mmatch);
277
    result = g_regex_split_full (regex, string, strlen (string), mstart, mmatch, mmax, &error);
278
    g_regex_unref (regex);
279
    if (error != NULL)
280
    {
281
        MMechostr (0, "SCOL_pcreNormalSplit error : %s\n", error->message);
282
        MMpush (m, NIL);
283
        g_error_free (error);
284
    }
285
286
    while (result[i] != NULL)
287
    {
288
        Mpushstrbloc (m, UTF8SCOL (result[i], strlen (result[i])));
289
        i++;
290
    }
291
    g_strfreev (result);
292
    g_free (pattern);
293
    g_free (string);
294
    MMpush (m, NIL);
295
    for (; i > 0; i--)
296
    {
297
        MMpush (m, ITOM (2));
298
        MBdeftab (m);
299
    }
300
    return 0;
301
}
302
303
int SCOL_pcreNormalReplace (mmachine m)
304
{
305
    return 0;
306
}
307
308
309
310
311
/*
312
    Internals functions
313
    - scol_pcre_regex_new
314
        GRegex * scol_pcre_regex_new (const gchar *, GRegexCompileFlags, GRegexMatchFlags);
315
        Create a new regular expression
316
317
    - scol_pcre_match_standard
318
        void scol_pcre_match_full (mmachine, const gchar *, const gchar *, GRegexCompileFlags  GRegexMatchFlags, gint)
319
320
    - scol_pcre_match_DFA
321
        void scol_pcre_match_DFA (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
322
323
*/
324
325
GRegex * scol_pcre_regex_new (const gchar * pattern, GRegexCompileFlags compile, GRegexMatchFlags match)
326
{
327
    return g_regex_new (pattern, compile, match, NULL);
328
}
329
330
331
void scol_pcre_match_standard (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
332
{
333
    GRegex *regex;
334
    GMatchInfo *match_info;
335
    GError *error = NULL;
336
    gchar *word;
337
    gboolean r;
338
    int i, n = 0;
339
    gint pos1 = -1, pos2 = -1;
340
341
    regex = scol_pcre_regex_new (pattern, compile, match);
342
343
    r = g_regex_match_full (regex, string, strlen (string), start, match, &match_info, &error);
344
    while (g_match_info_matches (match_info))
345
    {
346
        word = g_match_info_fetch (match_info, 0);
347
        g_match_info_fetch_pos (match_info, 0, &pos1, &pos2);
348
        Mpushstrbloc (m, word);
349
        MMpush (m, ITOM (pos1));
350
        MMpush (m, ITOM (pos2));
351
        MMpush (m, ITOM (3));
352
        MBdeftab (m);
353
        g_free (word);
354
        pos1 = -1; pos2 = -1;
355
        g_match_info_next (match_info, &error);
356
        n++;
357
    }
358
359
    g_match_info_free (match_info);
360
    g_regex_unref (regex);
361
    if (error != NULL)
362
    {
363
        MMechostr (0, "scol_pcre_match_standard error : %s\n", error->message);
364
        g_error_free (error);
365
    }
366
    MMpush (m, NIL);
367
    for (i = 0; i < n; i++)
368
    {
369
        MMpush (m, ITOM (2));
370
        MBdeftab (m);
371
    }
372
    return;
373
}
374
375
void scol_pcre_match_DFA (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
376
{
377
    GRegex *regex;
378
    GMatchInfo *match_info;
379
    GError *error = NULL;
380
    gchar *word;
381
    gboolean r;
382
    int i, n = 0;
383
    gint pos1 = -1, pos2 = -1;
384
385
    regex = scol_pcre_regex_new (pattern, compile, match);
386
387
    r = g_regex_match_all_full (regex, string, strlen (string), start, match, &match_info, &error);
388
    while (g_match_info_matches (match_info))
389
    {
390
        word = g_match_info_fetch (match_info, 0);
391
        g_match_info_fetch_pos (match_info, 0, &pos1, &pos2);
392
        Mpushstrbloc (m, word);
393
        MMpush (m, ITOM (pos1));
394
        MMpush (m, ITOM (pos2));
395
        MMpush (m, ITOM (3));
396
        MBdeftab (m);
397
        g_free (word);
398
        pos1 = -1; pos2 = -1;
399
        g_match_info_next (match_info, &error);
400
        n++;
401
    }
402
403
    g_match_info_free (match_info);
404
    g_regex_unref (regex);
405
    if (error != NULL)
406
    {
407
        MMechostr (0, "scol_pcre_match_DFA error : %s\n", error->message);
408
        g_error_free (error);
409
    }
410
    MMpush (m, NIL);
411
    for (i = 0; i < n; i++)
412
    {
413
        MMpush (m, ITOM (2));
414
        MBdeftab (m);
415
    }
416
    return;
417
}
418
419
420
421
422
/* API definitions : */
423
424
char* glib_pcre_name[GLIB_PCRE_PKG_NB]=
425
{
426
    "PCRE_MATCH_STANDARD", "PCRE_MATCH_DFA",
427
428
    "PCRE_MATCH_ANCHORED", "PCRE_MATCH_NOTBOL", "PCRE_MATCH_NOTEOL",
429
    "PCRE_MATCH_NOTEMPTY", "PCRE_MATCH_PARTIAL", "PCRE_MATCH_NEWLINE_CR",
430
    "PCRE_MATCH_NEWLINE_LF", "PCRE_MATCH_NEWLINE_CRLF", "PCRE_MATCH_NEWLINE_ANY",
431
432
    "PCRE_REGEX_CASELESS", "PCRE_REGEX_MULTILINE", "PCRE_REGEX_DOTALL",
433
    "PCRE_REGEX_EXTENDED", "PCRE_REGEX_ANCHORED", "PCRE_REGEX_DOLLAR_ENDONLY",
434
    "PCRE_REGEX_UNGREEDY", "PCRE_REGEX_RAW", "PCRE_REGEX_NO_AUTO_CAPTURE",
435
    "PCRE_REGEX_OPTIMIZE", "PCRE_REGEX_DUPNAMES", "PCRE_REGEX_NEWLINE_CR",
436
    "PCRE_REGEX_NEWLINE_LF", "PCRE_REGEX_NEWLINE_CRLF",
437
438
    "_pcreEasyMatch",
439
    "_pcreEasySplit",
440
    "_pcreEasyReplace",
441
442
    "_pcreNormalMatch",
443
    "_pcreNormalSplit"
444
};
445
446
int (*glib_pcre_fun[GLIB_PCRE_PKG_NB])(mmachine m)=
447
{
448
    (bullshit) (1*2), (bullshit) (2*2),
449
450
    (bullshit) (1<<4), (bullshit) (1<<7), (bullshit) (1<<8),
451
    (bullshit) (1<<10), (bullshit) (1<<15), (bullshit) (1<<20),
452
    (bullshit) (1<<21), (bullshit) (1<<20|1<<21), (bullshit) (1<<22),
453
454
    (bullshit) (1<<0), (bullshit) (1<<1), (bullshit) (1<<2),
455
    (bullshit) (1<<3), (bullshit) (1<<4), (bullshit) (1<<5),
456
    (bullshit) (1<<9), (bullshit) (1<<11), (bullshit) (1<<12),
457
    (bullshit) (1<<13), (bullshit) (1<<19), (bullshit) (1<<20),
458
    (bullshit) (1<<21), (bullshit) (1<<20|1<<21),
459
460
    SCOL_pcreEasyMatch,
461
    SCOL_pcreEasySplit,
462
    SCOL_pcreEasyReplace,
463
464
    SCOL_pcreNormalMatch,
465
    SCOL_pcreNormalSplit
466
};
467
468
int glib_pcre_narg[GLIB_PCRE_PKG_NB]=
469
{
470
    TYPVAR, TYPVAR,
471
472
    TYPVAR, TYPVAR, TYPVAR,
473
    TYPVAR, TYPVAR, TYPVAR,
474
    TYPVAR, TYPVAR, TYPVAR,
475
476
    TYPVAR, TYPVAR, TYPVAR,
477
    TYPVAR, TYPVAR, TYPVAR,
478
    TYPVAR, TYPVAR, TYPVAR,
479
    TYPVAR, TYPVAR, TYPVAR,
480
    TYPVAR, TYPVAR,
481
482
    2,
483
    2,
484
    3,
485
486
    6,
487
    6
488
};
489
490
char* glib_pcre_type[GLIB_PCRE_PKG_NB]=
491
{
492
    "I", "I",
493
494
    "I", "I", "I",
495
    "I", "I", "I",
496
    "I", "I", "I",
497
498
    "I", "I", "I",
499
    "I", "I", "I",
500
    "I", "I", "I",
501
    "I", "I", "I",
502
    "I", "I",
503
504
    "fun [S S] I",
505
    "fun [S S] [S r1]",
506
    "fun [S S S] S",
507
508
    "fun [S S I I I I] [[S I I] r1]",
509
    "fun [S S I I I I] [S r1]"
510
};
511
512
/**
513
 * \brief Load the Scol api
514
 */
515
int SCOLinitPcreClass (mmachine m)
516
{
517
    int k;
518
519
    MMechostr (0, "SCOLinitPcreClass : entering\n");
520
521
    k = PKhardpak (m, "PCREengine", GLIB_PCRE_PKG_NB, glib_pcre_name, glib_pcre_fun, glib_pcre_narg, glib_pcre_type);
522
    return k;
523
}
524
525
526
/**
527
 * \brief Load and free the regular expression library
528
 * Plateforms supported : MS Windows and GNU / Linux
529
 */
530
531
int PcreRelease ()
532
{
533
    MMechostr (0, "\nPCRE library released !\n");
534
    return 0;
535
}
536
537
#if ((defined _WIN32) || (defined __WIN32__))
538
539
__declspec (dllexport) int SCOLloadPCRE (mmachine m, cbmachine w)
540
{
541
    int k = 0;
542
    ww = w;
543
    mm = m;
544
545
    MMechostr (MSKDEBUG, "\nPCRE library loading .... !\n");
546
    SCOLinitplugin (w);
547
    if ((k = SCOLinitPcreClass (m))) return k;
548
    MMechostr(MSKDEBUG, "\nPCRE library loaded !\n");
549
    return k;
550
}
551
552
__declspec (dllexport) int SCOLfreePCRE ()
553
{
554
    PcreRelease ();
555
    return 0;
556
}
557
558
559
560
561
562
/* Version GNU / Linux */
563
#elif ((defined linux) || (defined __linux))
564
565
int SCOLloadPCRE (mmachine m)
566
{
567
    int k = 0;
568
    mm = m;
569
570
    MMechostr (MSKDEBUG, "\nPCRE library loading !\n");
571
    if ((k = SCOLinitPcreClass (m))) return k;
572
    MMechostr (MSKDEBUG, "\nPCRE library loaded !\n");
573
574
    return k;
575
}
576
577
int SCOLfreePCRE ()
578
{
579
    MMechostr(MSKDEBUG, "\nPCRE library release !\n");
580
581
    PcreRelease ();
582
    return 0;
583
}
584
585
#else
586
#error no platform supported
587
#endif