Project

General

Profile

1
/*
2
This source file is part of Scol
3
For the latest info, see http://www.scolring.org
4

    
5
Copyright (c) 2010 Stephane Bisaro, aka Iri <iri@irizone.net>
6

    
7
This program is free software; you can redistribute it and/or modify it under
8
the terms of the GNU Lesser General Public License as published by the Free Software
9
Foundation; either version 2 of the License, or (at your option) any later
10
version.
11

    
12
This program is distributed in the hope that it will be useful, but WITHOUT
13
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
15

    
16
You should have received a copy of the GNU Lesser General Public License along with
17
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
18
Place - Suite 330, Boston, MA 02111-1307, USA, or go to
19
http://www.gnu.org/copyleft/lesser.txt
20

    
21
For others informations, please contact us from http://www.scolring.org/
22
*/
23

    
24
#ifdef __cplusplus
25
#error This source file is not C++ but rather C. Please use a C-compiler
26
#endif
27

    
28

    
29

    
30
#include "../include/scol_glib_pregex.h"
31

    
32
/* http://library.gnome.org/devel/glib/unstable/glib-Perl-compatible-regular-expressions.html */
33

    
34
#if ((defined _WIN32) || (defined __WIN32__))
35
cbmachine ww;
36
#endif
37
mmachine  mm;
38

    
39
/*
40
    Easy interface
41

    
42
    - SCOL_pcreEasyMatch
43
    - SCOL_pcreEasySplit
44
    - SCOL_pcreEasyReplace
45
*/
46

    
47

    
48
/**
49
 * \brief Scans for a match in string for pattern
50
 * \param : S : pattern : the regular expression
51
 * \param : S : string : the string to scan for matches
52
 * \return : I : 1 if matched, else 0 (or nil if string or pattern is nil)
53
 *
54
 * The compile options for the regular expression are at 0
55
 * The match options ara at 0 too
56
 */
57
int SCOL_pcreEasyMatch (mmachine m)
58
{
59
    int mpattern, mstring;
60
    gchar *pattern, *string;
61

    
62
    MMechostr (MSKDEBUG, "SCOL_pcreEasyMatch : entering\n");
63

    
64
    mstring = MTOP (MMpull (m));
65
    mpattern = MTOP (MMpull (m));
66

    
67
    if((mstring == NIL) || (mpattern == NIL))
68
    {
69
        MMechostr (0, "SCOL_pcreEasyMatch error : an argument is nil");
70
        MMpush (m, NIL);
71
        return 0;
72
    }
73
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
74
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
75

    
76
    MMpush (m, ITOM (g_regex_match_simple (pattern, string, 0, 0)));
77
    g_free (pattern);
78
    g_free (string);
79
    return 0;
80
}
81

    
82
/**
83
 * \brief Breaks the string on the pattern, and returns an list of the tokens.
84
 * \param : S : pattern : the regular expression
85
 * \param : S : string : the string to scan for matches
86
 * \return : [S r1] : a list of substrings or nil
87
 *
88
 * The compile options for the regular expression are at 0
89
 * The match options ara at 0 too
90
 */
91
int SCOL_pcreEasySplit (mmachine m)
92
{
93
    int mpattern, mstring;
94
    int i = 0;
95
    gchar *pattern, *string;
96
    gchar **result;
97

    
98
    MMechostr (MSKDEBUG, "SCOL_pcreEsaySplit : entering\n");
99

    
100
    mstring = MTOP (MMpull (m));
101
    mpattern = MTOP (MMpull (m));
102

    
103
    if (mstring == NIL)
104
    {
105
        MMechostr (0, "SCOL_pcreEsaySplit error : an argument is nil");
106
        MMpush (m, NIL);
107
        return 0;
108
    }
109
    if (mpattern == NIL)
110
    {
111
        Mpushstrbloc (m, MMstartstr (m, mstring));
112
        MMpush (m, NIL);
113
        MMpush (m, ITOM (2));
114
        MBdeftab (m);
115
        return 0;
116
    }
117

    
118
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
119
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
120

    
121
    result = g_regex_split_simple (pattern, string, 0, 0);
122
    while (result[i] != NULL)
123
    {
124
        Mpushstrbloc (m, UTF8SCOL (result[i], strlen (result[i])));
125
        i++;
126
    }
127
    g_strfreev (result);
128
    g_free (pattern);
129
    g_free (string);
130
    MMpush (m, NIL);
131
    for (; i > 0; i--)
132
    {
133
        MMpush (m, ITOM (2));
134
        MBdeftab (m);
135
    }
136
    return 0;
137
}
138

    
139
/**
140
 * \brief Replaces all occurrences of the pattern in string with a replacement text
141
 * \param S : pattern
142
 * \param S : string
143
 * \param S : replacement text
144
 * \return S : new string containing the replacements
145
 */
146
int SCOL_pcreEasyReplace (mmachine m)
147
{
148
    int mpattern, mstring, mreplace;
149
    GRegex *regex;
150
    gchar *result;
151
    gchar *pattern, *string, *replace;
152

    
153
    MMechostr (MSKDEBUG, "SCOL_pcreEasyReplace : entering\n");
154

    
155
    mreplace = MTOP (MMpull (m));
156
    mstring = MTOP (MMpull (m));
157
    mpattern = MTOP (MMpull (m));
158

    
159
    if(mstring == NIL)
160
    {
161
        MMechostr (0, "SCOL_pcreEasyReplace error : an argument is nil");
162
        MMpush (m, NIL);
163
        return 0;
164
    }
165
    if ((mreplace == NIL) || (mpattern == NIL))
166
    {
167
        Mpushstrbloc (m, MMstartstr (m, mstring));
168
        return 0;
169
    }
170

    
171
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
172
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
173
    replace = SCOLUTF8 (MMstartstr (m, mreplace), MMsizestr (m, mreplace));
174

    
175
    regex = scol_pcre_regex_new (pattern, 0, 0);
176
    result = g_regex_replace (regex, string, strlen (string), 0, replace, 0, NULL);
177
    result = UTF8SCOL (result, strlen (result));
178
    Mpushstrbloc (m, result);
179
    g_free (result);
180
    g_regex_unref (regex);
181
    g_free (pattern);
182
    g_free (string);
183
    g_free (replace);
184
    return 0;
185
}
186

    
187

    
188
/*
189
    Normal interface
190

    
191
    - SCOL_pcreNormalMatch
192
    - SCOL_pcreNormalSplit
193
*/
194

    
195
int SCOL_pcreNormalMatch (mmachine m)
196
{
197
    int mpattern, mstring, mcompile, mstart, mmatch, malgo;
198
    gchar * pattern, *string;
199

    
200
    MMechostr (MSKDEBUG, "SCOL_pcreNormalMatch : entering\n");
201

    
202
    malgo = MTOI (MMpull (m));
203
    mmatch = MMpull (m);
204
    mstart = MTOI (MMpull (m));
205
    mcompile = MMpull (m);
206
    mstring = MTOP (MMpull (m));
207
    mpattern = MTOP (MMpull (m));
208

    
209
    if ((mstring == NIL) || (mpattern == NIL))
210
    {
211
        MMechostr (0, "SCOL_pcreNormalMatch error : string or pattern is nil\n");
212
        MMpush (m, NIL);
213
        return 0;
214
    }
215

    
216
    if (mstart< 0)
217
        mstart = 0;
218

    
219
    if (malgo != PCRE_MATCH_DFA)
220
        malgo = PCRE_MATCH_STANDARD;
221

    
222
    if (mcompile == NIL)
223
        mcompile = 0;
224

    
225
    if (mmatch == NIL)
226
        mmatch = 0;
227

    
228
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
229
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
230

    
231
    if (malgo == PCRE_MATCH_STANDARD)
232
        scol_pcre_match_standard (m, pattern, string, mcompile, mmatch, mstart);
233
    else
234
        scol_pcre_match_DFA (m, pattern, string, mcompile, mmatch, mstart);
235

    
236
    g_free (pattern);
237
    g_free (string);
238
    return 0;
239
}
240

    
241
int SCOL_pcreNormalSplit (mmachine m)
242
{
243
    int mpattern, mstring, mcompile, mstart, mmax, mmatch;
244
    int i = 0;
245
    gchar *pattern, *string;
246
    GRegex *regex;
247
    GError *error = NULL;
248
    gchar **result;
249

    
250
    MMechostr (MSKDEBUG, "SCOL_pcreNormalSplit : entering\n");
251

    
252
    mmax = MTOI (MMpull (m));
253
    mmatch = MMpull (m);
254
    mstart = MTOI (MMpull (m));
255
    mcompile = MMpull (m);
256
    mstring = MTOP (MMpull (m));
257
    mpattern = MTOP (MMpull (m));
258

    
259
    if ((mstring == NIL) || (mpattern == NIL))
260
    {
261
        MMechostr (0, "SCOL_pcreNormalSplit error : string or pattern is nil\n");
262
        MMpush (m, NIL);
263
        return 0;
264
    }
265

    
266
    if (mstart < 0)
267
        mstart = 0;
268
    if (mcompile == NIL)
269
        mcompile = 0;
270
    if (mmatch == NIL)
271
        mmatch = 0;
272

    
273
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
274
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
275

    
276
    regex = scol_pcre_regex_new (pattern, mcompile, mmatch);
277
    result = g_regex_split_full (regex, string, strlen (string), mstart, mmatch, mmax, &error);
278
    g_regex_unref (regex);
279
    if (error != NULL)
280
    {
281
        MMechostr (0, "SCOL_pcreNormalSplit error : %s\n", error->message);
282
        MMpush (m, NIL);
283
        g_error_free (error);
284
    }
285

    
286
    while (result[i] != NULL)
287
    {
288
        Mpushstrbloc (m, UTF8SCOL (result[i], strlen (result[i])));
289
        i++;
290
    }
291
    g_strfreev (result);
292
    g_free (pattern);
293
    g_free (string);
294
    MMpush (m, NIL);
295
    for (; i > 0; i--)
296
    {
297
        MMpush (m, ITOM (2));
298
        MBdeftab (m);
299
    }
300
    return 0;
301
}
302

    
303
int SCOL_pcreNormalReplace (mmachine m)
304
{
305
    int mpattern, mstring, mreplace, mcompile, mstart, mmatch;
306
    gchar *pattern, *string, *replace, *result;
307
    GRegex *regex;
308
    GError *error = NULL;
309

    
310
    MMechostr (MSKDEBUG, "SCOL_pcreNormalReplace : entering\n");
311

    
312
    mmatch = MMpull (m);
313
    mstart = MTOI (MMpull (m));
314
    mcompile = MMpull (m);
315
    mreplace = MTOP (MMpull (m));
316
    mstring = MTOP (MMpull (m));
317
    mpattern = MTOP (MMpull (m));
318

    
319
    if(mstring == NIL)
320
    {
321
        MMechostr (0, "SCOL_pcreNormalReplace error : string is nil");
322
        MMpush (m, NIL);
323
        return 0;
324
    }
325
    if ((mreplace == NIL) || (mpattern == NIL))
326
    {
327
        Mpushstrbloc (m, MMstartstr (m, mstring));
328
        return 0;
329
    }
330

    
331
    if (mstart < 0)
332
        mstart = 0;
333
    if (mcompile == NIL)
334
        mcompile = 0;
335
    if (mmatch == NIL)
336
        mmatch = 0;
337

    
338
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
339
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
340
    replace = SCOLUTF8 (MMstartstr (m, mreplace), MMsizestr (m, mreplace));
341

    
342
    regex = scol_pcre_regex_new (pattern, mcompile, mmatch);
343
    result = g_regex_replace (regex, string, strlen (string), mstart, replace, mmatch, &error);
344
    g_regex_unref (regex);
345
    if (error != NULL)
346
    {
347
        MMechostr (0, "SCOL_pcreNormalReplace error : %s\n", error->message);
348
        MMpush (m, NIL);
349
        g_error_free (error);
350
    }
351
    Mpushstrbloc (m, UTF8SCOL (result, strlen (result)));
352
    g_free (result);
353
    g_free (string);
354
    g_free (pattern);
355
    return 0;
356
}
357

    
358

    
359

    
360

    
361
/*
362
    Internals functions
363
    - scol_pcre_regex_new
364
        GRegex * scol_pcre_regex_new (const gchar *, GRegexCompileFlags, GRegexMatchFlags);
365
        Create a new regular expression
366

    
367
    - scol_pcre_match_standard
368
        void scol_pcre_match_full (mmachine, const gchar *, const gchar *, GRegexCompileFlags  GRegexMatchFlags, gint)
369

    
370
    - scol_pcre_match_DFA
371
        void scol_pcre_match_DFA (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
372

    
373
*/
374

    
375
GRegex * scol_pcre_regex_new (const gchar * pattern, GRegexCompileFlags compile, GRegexMatchFlags match)
376
{
377
    return g_regex_new (pattern, compile, match, NULL);
378
}
379

    
380

    
381
void scol_pcre_match_standard (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
382
{
383
    GRegex *regex;
384
    GMatchInfo *match_info;
385
    GError *error = NULL;
386
    gchar *word;
387
    gboolean r;
388
    int i, n = 0;
389
    gint pos1 = -1, pos2 = -1;
390

    
391
    regex = scol_pcre_regex_new (pattern, compile, match);
392

    
393
    r = g_regex_match_full (regex, string, strlen (string), start, match, &match_info, &error);
394
    while (g_match_info_matches (match_info))
395
    {
396
        word = g_match_info_fetch (match_info, 0);
397
        g_match_info_fetch_pos (match_info, 0, &pos1, &pos2);
398
        Mpushstrbloc (m, word);
399
        MMpush (m, ITOM (pos1));
400
        MMpush (m, ITOM (pos2));
401
        MMpush (m, ITOM (3));
402
        MBdeftab (m);
403
        g_free (word);
404
        pos1 = -1; pos2 = -1;
405
        g_match_info_next (match_info, &error);
406
        n++;
407
    }
408

    
409
    g_match_info_free (match_info);
410
    g_regex_unref (regex);
411
    if (error != NULL)
412
    {
413
        MMechostr (0, "scol_pcre_match_standard error : %s\n", error->message);
414
        g_error_free (error);
415
    }
416
    MMpush (m, NIL);
417
    for (i = 0; i < n; i++)
418
    {
419
        MMpush (m, ITOM (2));
420
        MBdeftab (m);
421
    }
422
    return;
423
}
424

    
425
void scol_pcre_match_DFA (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
426
{
427
    GRegex *regex;
428
    GMatchInfo *match_info;
429
    GError *error = NULL;
430
    gchar *word;
431
    gboolean r;
432
    int i, n = 0;
433
    gint pos1 = -1, pos2 = -1;
434

    
435
    regex = scol_pcre_regex_new (pattern, compile, match);
436

    
437
    r = g_regex_match_all_full (regex, string, strlen (string), start, match, &match_info, &error);
438
    while (g_match_info_matches (match_info))
439
    {
440
        word = g_match_info_fetch (match_info, 0);
441
        g_match_info_fetch_pos (match_info, 0, &pos1, &pos2);
442
        Mpushstrbloc (m, word);
443
        MMpush (m, ITOM (pos1));
444
        MMpush (m, ITOM (pos2));
445
        MMpush (m, ITOM (3));
446
        MBdeftab (m);
447
        g_free (word);
448
        pos1 = -1; pos2 = -1;
449
        g_match_info_next (match_info, &error);
450
        n++;
451
    }
452

    
453
    g_match_info_free (match_info);
454
    g_regex_unref (regex);
455
    if (error != NULL)
456
    {
457
        MMechostr (0, "scol_pcre_match_DFA error : %s\n", error->message);
458
        g_error_free (error);
459
    }
460
    MMpush (m, NIL);
461
    for (i = 0; i < n; i++)
462
    {
463
        MMpush (m, ITOM (2));
464
        MBdeftab (m);
465
    }
466
    return;
467
}
468

    
469

    
470

    
471

    
472
/* API definitions : */
473

    
474
char* glib_pcre_name[GLIB_PCRE_PKG_NB]=
475
{
476
    "PCRE_MATCH_STANDARD", "PCRE_MATCH_DFA",
477

    
478
    "PCRE_MATCH_ANCHORED", "PCRE_MATCH_NOTBOL", "PCRE_MATCH_NOTEOL",
479
    "PCRE_MATCH_NOTEMPTY", "PCRE_MATCH_PARTIAL", "PCRE_MATCH_NEWLINE_CR",
480
    "PCRE_MATCH_NEWLINE_LF", "PCRE_MATCH_NEWLINE_CRLF", "PCRE_MATCH_NEWLINE_ANY",
481

    
482
    "PCRE_REGEX_CASELESS", "PCRE_REGEX_MULTILINE", "PCRE_REGEX_DOTALL",
483
    "PCRE_REGEX_EXTENDED", "PCRE_REGEX_ANCHORED", "PCRE_REGEX_DOLLAR_ENDONLY",
484
    "PCRE_REGEX_UNGREEDY", "PCRE_REGEX_RAW", "PCRE_REGEX_NO_AUTO_CAPTURE",
485
    "PCRE_REGEX_OPTIMIZE", "PCRE_REGEX_DUPNAMES", "PCRE_REGEX_NEWLINE_CR",
486
    "PCRE_REGEX_NEWLINE_LF", "PCRE_REGEX_NEWLINE_CRLF",
487

    
488
    "_pcreEasyMatch",
489
    "_pcreEasySplit",
490
    "_pcreEasyReplace",
491

    
492
    "_pcreNormalMatch",
493
    "_pcreNormalSplit",
494
    "_pcreNormalReplace"
495
};
496

    
497
int (*glib_pcre_fun[GLIB_PCRE_PKG_NB])(mmachine m)=
498
{
499
    (bullshit) (1*2), (bullshit) (2*2),
500

    
501
    (bullshit) (1<<4), (bullshit) (1<<7), (bullshit) (1<<8),
502
    (bullshit) (1<<10), (bullshit) (1<<15), (bullshit) (1<<20),
503
    (bullshit) (1<<21), (bullshit) (1<<20|1<<21), (bullshit) (1<<22),
504

    
505
    (bullshit) (1<<0), (bullshit) (1<<1), (bullshit) (1<<2),
506
    (bullshit) (1<<3), (bullshit) (1<<4), (bullshit) (1<<5),
507
    (bullshit) (1<<9), (bullshit) (1<<11), (bullshit) (1<<12),
508
    (bullshit) (1<<13), (bullshit) (1<<19), (bullshit) (1<<20),
509
    (bullshit) (1<<21), (bullshit) (1<<20|1<<21),
510

    
511
    SCOL_pcreEasyMatch,
512
    SCOL_pcreEasySplit,
513
    SCOL_pcreEasyReplace,
514

    
515
    SCOL_pcreNormalMatch,
516
    SCOL_pcreNormalSplit,
517
    SCOL_pcreNormalReplace
518
};
519

    
520
int glib_pcre_narg[GLIB_PCRE_PKG_NB]=
521
{
522
    TYPVAR, TYPVAR,
523

    
524
    TYPVAR, TYPVAR, TYPVAR,
525
    TYPVAR, TYPVAR, TYPVAR,
526
    TYPVAR, TYPVAR, TYPVAR,
527

    
528
    TYPVAR, TYPVAR, TYPVAR,
529
    TYPVAR, TYPVAR, TYPVAR,
530
    TYPVAR, TYPVAR, TYPVAR,
531
    TYPVAR, TYPVAR, TYPVAR,
532
    TYPVAR, TYPVAR,
533

    
534
    2,
535
    2,
536
    3,
537

    
538
    6,
539
    6,
540
    6
541
};
542

    
543
char* glib_pcre_type[GLIB_PCRE_PKG_NB]=
544
{
545
    "I", "I",
546

    
547
    "I", "I", "I",
548
    "I", "I", "I",
549
    "I", "I", "I",
550

    
551
    "I", "I", "I",
552
    "I", "I", "I",
553
    "I", "I", "I",
554
    "I", "I", "I",
555
    "I", "I",
556

    
557
    "fun [S S] I",
558
    "fun [S S] [S r1]",
559
    "fun [S S S] S",
560

    
561
    "fun [S S I I I I] [[S I I] r1]",
562
    "fun [S S I I I I] [S r1]",
563
    "fun [S S S I I I] S"
564
};
565

    
566
/**
567
 * \brief Load the Scol api
568
 */
569
int SCOLinitPcreClass (mmachine m)
570
{
571
    int k;
572

    
573
    MMechostr (0, "SCOLinitPcreClass : entering\n");
574

    
575
    k = PKhardpak (m, "PCREengine", GLIB_PCRE_PKG_NB, glib_pcre_name, glib_pcre_fun, glib_pcre_narg, glib_pcre_type);
576
    return k;
577
}
578

    
579

    
580
/**
581
 * \brief Load and free the regular expression library
582
 * Plateforms supported : MS Windows and GNU / Linux
583
 */
584

    
585
int PcreRelease ()
586
{
587
    MMechostr (0, "\nPCRE library released !\n");
588
    return 0;
589
}
590

    
591
#if ((defined _WIN32) || (defined __WIN32__))
592

    
593
__declspec (dllexport) int SCOLloadPCRE (mmachine m, cbmachine w)
594
{
595
    int k = 0;
596
    ww = w;
597
    mm = m;
598

    
599
    MMechostr (MSKDEBUG, "\nPCRE library loading .... !\n");
600
    SCOLinitplugin (w);
601
    if ((k = SCOLinitPcreClass (m))) return k;
602
    MMechostr(MSKDEBUG, "\nPCRE library loaded !\n");
603
    return k;
604
}
605

    
606
__declspec (dllexport) int SCOLfreePCRE ()
607
{
608
    PcreRelease ();
609
    return 0;
610
}
611

    
612

    
613

    
614

    
615

    
616
/* Version GNU / Linux */
617
#elif ((defined linux) || (defined __linux))
618

    
619
int SCOLloadPCRE (mmachine m)
620
{
621
    int k = 0;
622
    mm = m;
623

    
624
    MMechostr (MSKDEBUG, "\nPCRE library loading !\n");
625
    if ((k = SCOLinitPcreClass (m))) return k;
626
    MMechostr (MSKDEBUG, "\nPCRE library loaded !\n");
627

    
628
    return k;
629
}
630

    
631
int SCOLfreePCRE ()
632
{
633
    MMechostr(MSKDEBUG, "\nPCRE library release !\n");
634

    
635
    PcreRelease ();
636
    return 0;
637
}
638

    
639
#else
640
#error no platform supported
641
#endif
(2-2/2)