Project

General

Profile

1
/*
2
This source file is part of Scol
3
For the latest info, see http://www.scolring.org
4

    
5
Copyright (c) 2010 Stephane Bisaro, aka Iri <iri@irizone.net>
6

    
7
This program is free software; you can redistribute it and/or modify it under
8
the terms of the GNU Lesser General Public License as published by the Free Software
9
Foundation; either version 2 of the License, or (at your option) any later
10
version.
11

    
12
This program is distributed in the hope that it will be useful, but WITHOUT
13
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
15

    
16
You should have received a copy of the GNU Lesser General Public License along with
17
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
18
Place - Suite 330, Boston, MA 02111-1307, USA, or go to
19
http://www.gnu.org/copyleft/lesser.txt
20

    
21
For others informations, please contact us from http://www.scolring.org/
22
*/
23

    
24
#ifdef __cplusplus
25
#error This source file is not C++ but rather C. Please use a C-compiler
26
#endif
27

    
28

    
29

    
30
#include "../include/scol_glib_pregex.h"
31

    
32
/* http://library.gnome.org/devel/glib/unstable/glib-Perl-compatible-regular-expressions.html */
33

    
34
#if ((defined _WIN32) || (defined __WIN32__))
35
cbmachine ww;
36
#endif
37
mmachine  mm;
38

    
39
/*
40
    Easy interface
41

    
42
    - SCOL_pcreEasyMatch
43
    - SCOL_pcreEasySplit
44
    - SCOL_pcreEasyReplace
45
*/
46

    
47

    
48
/**
49
 * \brief Scans for a match in string for pattern
50
 * \param : S : pattern : the regular expression
51
 * \param : S : string : the string to scan for matches
52
 * \return : I : 1 if matched, else 0 (or nil if string or pattern is nil)
53
 *
54
 * The compile options for the regular expression are at 0
55
 * The match options ara at 0 too
56
 */
57
int SCOL_pcreEasyMatch (mmachine m)
58
{
59
    int mpattern, mstring;
60
    gchar *pattern, *string;
61

    
62
    MMechostr (MSKDEBUG, "SCOL_pcreEasyMatch : entering\n");
63

    
64
    mstring = MTOP (MMpull (m));
65
    mpattern = MTOP (MMpull (m));
66

    
67
    if((mstring == NIL) || (mpattern == NIL))
68
    {
69
        MMechostr (0, "SCOL_pcreEasyMatch error : an argument is nil");
70
        MMpush (m, NIL);
71
        return 0;
72
    }
73
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
74
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
75

    
76
    MMpush (m, ITOM (g_regex_match_simple (pattern, string, 0, 0)));
77
    g_free (pattern);
78
    g_free (string);
79
    return 0;
80
}
81

    
82
/**
83
 * \brief Breaks the string on the pattern, and returns an list of the tokens.
84
 * \param : S : pattern : the regular expression
85
 * \param : S : string : the string to scan for matches
86
 * \return : [S r1] : a list of substrings or nil
87
 *
88
 * The compile options for the regular expression are at 0
89
 * The match options ara at 0 too
90
 */
91
int SCOL_pcreEasySplit (mmachine m)
92
{
93
    int mpattern, mstring;
94
    int i = 0;
95
    gchar *pattern, *string;
96
    gchar **result;
97

    
98
    MMechostr (MSKDEBUG, "SCOL_pcreEsaySplit : entering\n");
99

    
100
    mstring = MTOP (MMpull (m));
101
    mpattern = MTOP (MMpull (m));
102

    
103
    if (mstring == NIL)
104
    {
105
        MMechostr (0, "SCOL_pcreEsaySplit error : an argument is nil");
106
        MMpush (m, NIL);
107
        return 0;
108
    }
109
    if (mpattern == NIL)
110
    {
111
        Mpushstrbloc (m, MMstartstr (m, mstring));
112
        MMpush (m, NIL);
113
        MMpush (m, ITOM (2));
114
        MBdeftab (m);
115
        return 0;
116
    }
117

    
118
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
119
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
120

    
121
    result = g_regex_split_simple (pattern, string, 0, 0);
122
    while (result[i] != NULL)
123
    {
124
        Mpushstrbloc (m, UTF8SCOL (result[i], strlen (result[i])));
125
        i++;
126
    }
127
    g_strfreev (result);
128
    g_free (pattern);
129
    g_free (string);
130
    MMpush (m, NIL);
131
    for (; i > 0; i--)
132
    {
133
        MMpush (m, ITOM (2));
134
        MBdeftab (m);
135
    }
136
    return 0;
137
}
138

    
139
/**
140
 * \brief Replaces all occurrences of the pattern in string with a replacement text
141
 * \param S : pattern
142
 * \param S : string
143
 * \param S : replacement text
144
 * \return S : new string containing the replacements
145
 */
146
int SCOL_pcreEasyReplace (mmachine m)
147
{
148
    int mpattern, mstring, mreplace;
149
    GRegex *regex;
150
    gchar *result;
151
    gchar *pattern, *string, *replace;
152

    
153
    MMechostr (MSKDEBUG, "SCOL_pcreEasyReplace : entering\n");
154

    
155
    mreplace = MTOP (MMpull (m));
156
    mstring = MTOP (MMpull (m));
157
    mpattern = MTOP (MMpull (m));
158

    
159
    if(mstring == NIL)
160
    {
161
        MMechostr (0, "SCOL_pcreEasyReplace error : an argument is nil");
162
        MMpush (m, NIL);
163
        return 0;
164
    }
165
    if ((mreplace == NIL) || (mpattern == NIL))
166
    {
167
        Mpushstrbloc (m, MMstartstr (m, mstring));
168
        return 0;
169
    }
170

    
171
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
172
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
173
    replace = SCOLUTF8 (MMstartstr (m, mreplace), MMsizestr (m, mreplace));
174

    
175
    regex = scol_pcre_regex_new (pattern, 0, 0);
176
    result = g_regex_replace (regex, string, strlen (string), 0, replace, 0, NULL);
177
    result = UTF8SCOL (result, strlen (result));
178
    Mpushstrbloc (m, result);
179
    g_free (result);
180
    g_regex_unref (regex);
181
    g_free (pattern);
182
    g_free (string);
183
    g_free (replace);
184
    return 0;
185
}
186

    
187

    
188
/*
189
    Normal interface
190

    
191
    - SCOL_pcreNormalMatch
192
    - SCOL_pcreNormalSplit
193
*/
194

    
195
int SCOL_pcreNormalMatch (mmachine m)
196
{
197
    int mpattern, mstring, mcompile, mstart, mmatch, malgo;
198
    gchar * pattern, *string;
199

    
200
    MMechostr (MSKDEBUG, "SCOL_pcreNormalMatch : entering\n");
201

    
202
    malgo = MTOI (MMpull (m));
203
    mmatch = MMpull (m);
204
    mstart = MTOI (MMpull (m));
205
    mcompile = MMpull (m);
206
    mstring = MTOP (MMpull (m));
207
    mpattern = MTOP (MMpull (m));
208

    
209
    if ((mstring == NIL) || (mpattern == NIL))
210
    {
211
        MMechostr (0, "SCOL_pcreNormalMatch error : string or pattern is nil\n");
212
        MMpush (m, NIL);
213
        return 0;
214
    }
215

    
216
    if (mstart< 0)
217
        mstart = 0;
218

    
219
    if (malgo != PCRE_MATCH_DFA)
220
        malgo = PCRE_MATCH_STANDARD;
221

    
222
    if (mcompile == NIL)
223
        mcompile = 0;
224

    
225
    if (mmatch == NIL)
226
        mmatch = 0;
227

    
228
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
229
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
230

    
231
    if (malgo == PCRE_MATCH_STANDARD)
232
        scol_pcre_match_standard (m, pattern, string, mcompile, mmatch, mstart);
233
    else
234
        scol_pcre_match_DFA (m, pattern, string, mcompile, mmatch, mstart);
235

    
236
    g_free (pattern);
237
    g_free (string);
238
    return 0;
239
}
240

    
241
int SCOL_pcreNormalSplit (mmachine m)
242
{
243
    int mpattern, mstring, mcompile, mstart, mmax, mmatch;
244
    int i = 0;
245
    gchar *pattern, *string;
246
    GRegex *regex;
247
    GError *error = NULL;
248
    gchar **result;
249

    
250
    MMechostr (MSKDEBUG, "SCOL_pcreNormalSplit : entering\n");
251

    
252
    mmax = MTOI (MMpull (m));
253
    mmatch = MMpull (m);
254
    mstart = MTOI (MMpull (m));
255
    mcompile = MMpull (m);
256
    mstring = MTOP (MMpull (m));
257
    mpattern = MTOP (MMpull (m));
258

    
259
    if ((mstring == NIL) || (mpattern == NIL))
260
    {
261
        MMechostr (0, "SCOL_pcreNormalSplit error : string or pattern is nil\n");
262
        MMpush (m, NIL);
263
        return 0;
264
    }
265

    
266
    if (mstart < 0)
267
        mstart = 0;
268
    if (mcompile == NIL)
269
        mcompile = 0;
270
    if (mmatch == NIL)
271
        mmatch = 0;
272

    
273
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
274
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
275

    
276
    regex = scol_pcre_regex_new (pattern, mcompile, mmatch);
277
    result = g_regex_split_full (regex, string, strlen (string), mstart, mmatch, mmax, &error);
278
    g_regex_unref (regex);
279
    if (error != NULL)
280
    {
281
        MMechostr (0, "SCOL_pcreNormalSplit error : %s\n", error->message);
282
        MMpush (m, NIL);
283
        g_error_free (error);
284
    }
285

    
286
    while (result[i] != NULL)
287
    {
288
        Mpushstrbloc (m, UTF8SCOL (result[i], strlen (result[i])));
289
        i++;
290
    }
291
    g_strfreev (result);
292
    g_free (pattern);
293
    g_free (string);
294
    MMpush (m, NIL);
295
    for (; i > 0; i--)
296
    {
297
        MMpush (m, ITOM (2));
298
        MBdeftab (m);
299
    }
300
    return 0;
301
}
302

    
303
int SCOL_pcreNormalReplace (mmachine m)
304
{
305
    return 0;
306
}
307

    
308

    
309

    
310

    
311
/*
312
    Internals functions
313
    - scol_pcre_regex_new
314
        GRegex * scol_pcre_regex_new (const gchar *, GRegexCompileFlags, GRegexMatchFlags);
315
        Create a new regular expression
316

    
317
    - scol_pcre_match_standard
318
        void scol_pcre_match_full (mmachine, const gchar *, const gchar *, GRegexCompileFlags  GRegexMatchFlags, gint)
319

    
320
    - scol_pcre_match_DFA
321
        void scol_pcre_match_DFA (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
322

    
323
*/
324

    
325
GRegex * scol_pcre_regex_new (const gchar * pattern, GRegexCompileFlags compile, GRegexMatchFlags match)
326
{
327
    return g_regex_new (pattern, compile, match, NULL);
328
}
329

    
330

    
331
void scol_pcre_match_standard (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
332
{
333
    GRegex *regex;
334
    GMatchInfo *match_info;
335
    GError *error = NULL;
336
    gchar *word;
337
    gboolean r;
338
    int i, n = 0;
339
    gint pos1 = -1, pos2 = -1;
340

    
341
    regex = scol_pcre_regex_new (pattern, compile, match);
342

    
343
    r = g_regex_match_full (regex, string, strlen (string), start, match, &match_info, &error);
344
    while (g_match_info_matches (match_info))
345
    {
346
        word = g_match_info_fetch (match_info, 0);
347
        g_match_info_fetch_pos (match_info, 0, &pos1, &pos2);
348
        Mpushstrbloc (m, word);
349
        MMpush (m, ITOM (pos1));
350
        MMpush (m, ITOM (pos2));
351
        MMpush (m, ITOM (3));
352
        MBdeftab (m);
353
        g_free (word);
354
        pos1 = -1; pos2 = -1;
355
        g_match_info_next (match_info, &error);
356
        n++;
357
    }
358

    
359
    g_match_info_free (match_info);
360
    g_regex_unref (regex);
361
    if (error != NULL)
362
    {
363
        MMechostr (0, "scol_pcre_match_standard error : %s\n", error->message);
364
        g_error_free (error);
365
    }
366
    MMpush (m, NIL);
367
    for (i = 0; i < n; i++)
368
    {
369
        MMpush (m, ITOM (2));
370
        MBdeftab (m);
371
    }
372
    return;
373
}
374

    
375
void scol_pcre_match_DFA (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
376
{
377
    GRegex *regex;
378
    GMatchInfo *match_info;
379
    GError *error = NULL;
380
    gchar *word;
381
    gboolean r;
382
    int i, n = 0;
383
    gint pos1 = -1, pos2 = -1;
384

    
385
    regex = scol_pcre_regex_new (pattern, compile, match);
386

    
387
    r = g_regex_match_all_full (regex, string, strlen (string), start, match, &match_info, &error);
388
    while (g_match_info_matches (match_info))
389
    {
390
        word = g_match_info_fetch (match_info, 0);
391
        g_match_info_fetch_pos (match_info, 0, &pos1, &pos2);
392
        Mpushstrbloc (m, word);
393
        MMpush (m, ITOM (pos1));
394
        MMpush (m, ITOM (pos2));
395
        MMpush (m, ITOM (3));
396
        MBdeftab (m);
397
        g_free (word);
398
        pos1 = -1; pos2 = -1;
399
        g_match_info_next (match_info, &error);
400
        n++;
401
    }
402

    
403
    g_match_info_free (match_info);
404
    g_regex_unref (regex);
405
    if (error != NULL)
406
    {
407
        MMechostr (0, "scol_pcre_match_DFA error : %s\n", error->message);
408
        g_error_free (error);
409
    }
410
    MMpush (m, NIL);
411
    for (i = 0; i < n; i++)
412
    {
413
        MMpush (m, ITOM (2));
414
        MBdeftab (m);
415
    }
416
    return;
417
}
418

    
419

    
420

    
421

    
422
/* API definitions : */
423

    
424
char* glib_pcre_name[GLIB_PCRE_PKG_NB]=
425
{
426
    "PCRE_MATCH_STANDARD", "PCRE_MATCH_DFA",
427

    
428
    "PCRE_MATCH_ANCHORED", "PCRE_MATCH_NOTBOL", "PCRE_MATCH_NOTEOL",
429
    "PCRE_MATCH_NOTEMPTY", "PCRE_MATCH_PARTIAL", "PCRE_MATCH_NEWLINE_CR",
430
    "PCRE_MATCH_NEWLINE_LF", "PCRE_MATCH_NEWLINE_CRLF", "PCRE_MATCH_NEWLINE_ANY",
431

    
432
    "PCRE_REGEX_CASELESS", "PCRE_REGEX_MULTILINE", "PCRE_REGEX_DOTALL",
433
    "PCRE_REGEX_EXTENDED", "PCRE_REGEX_ANCHORED", "PCRE_REGEX_DOLLAR_ENDONLY",
434
    "PCRE_REGEX_UNGREEDY", "PCRE_REGEX_RAW", "PCRE_REGEX_NO_AUTO_CAPTURE",
435
    "PCRE_REGEX_OPTIMIZE", "PCRE_REGEX_DUPNAMES", "PCRE_REGEX_NEWLINE_CR",
436
    "PCRE_REGEX_NEWLINE_LF", "PCRE_REGEX_NEWLINE_CRLF",
437

    
438
    "_pcreEasyMatch",
439
    "_pcreEasySplit",
440
    "_pcreEasyReplace",
441

    
442
    "_pcreNormalMatch",
443
    "_pcreNormalSplit"
444
};
445

    
446
int (*glib_pcre_fun[GLIB_PCRE_PKG_NB])(mmachine m)=
447
{
448
    (bullshit) (1*2), (bullshit) (2*2),
449

    
450
    (bullshit) (1<<4), (bullshit) (1<<7), (bullshit) (1<<8),
451
    (bullshit) (1<<10), (bullshit) (1<<15), (bullshit) (1<<20),
452
    (bullshit) (1<<21), (bullshit) (1<<20|1<<21), (bullshit) (1<<22),
453

    
454
    (bullshit) (1<<0), (bullshit) (1<<1), (bullshit) (1<<2),
455
    (bullshit) (1<<3), (bullshit) (1<<4), (bullshit) (1<<5),
456
    (bullshit) (1<<9), (bullshit) (1<<11), (bullshit) (1<<12),
457
    (bullshit) (1<<13), (bullshit) (1<<19), (bullshit) (1<<20),
458
    (bullshit) (1<<21), (bullshit) (1<<20|1<<21),
459

    
460
    SCOL_pcreEasyMatch,
461
    SCOL_pcreEasySplit,
462
    SCOL_pcreEasyReplace,
463

    
464
    SCOL_pcreNormalMatch,
465
    SCOL_pcreNormalSplit
466
};
467

    
468
int glib_pcre_narg[GLIB_PCRE_PKG_NB]=
469
{
470
    TYPVAR, TYPVAR,
471

    
472
    TYPVAR, TYPVAR, TYPVAR,
473
    TYPVAR, TYPVAR, TYPVAR,
474
    TYPVAR, TYPVAR, TYPVAR,
475

    
476
    TYPVAR, TYPVAR, TYPVAR,
477
    TYPVAR, TYPVAR, TYPVAR,
478
    TYPVAR, TYPVAR, TYPVAR,
479
    TYPVAR, TYPVAR, TYPVAR,
480
    TYPVAR, TYPVAR,
481

    
482
    2,
483
    2,
484
    3,
485

    
486
    6,
487
    6
488
};
489

    
490
char* glib_pcre_type[GLIB_PCRE_PKG_NB]=
491
{
492
    "I", "I",
493

    
494
    "I", "I", "I",
495
    "I", "I", "I",
496
    "I", "I", "I",
497

    
498
    "I", "I", "I",
499
    "I", "I", "I",
500
    "I", "I", "I",
501
    "I", "I", "I",
502
    "I", "I",
503

    
504
    "fun [S S] I",
505
    "fun [S S] [S r1]",
506
    "fun [S S S] S",
507

    
508
    "fun [S S I I I I] [[S I I] r1]",
509
    "fun [S S I I I I] [S r1]"
510
};
511

    
512
/**
513
 * \brief Load the Scol api
514
 */
515
int SCOLinitPcreClass (mmachine m)
516
{
517
    int k;
518

    
519
    MMechostr (0, "SCOLinitPcreClass : entering\n");
520

    
521
    k = PKhardpak (m, "PCREengine", GLIB_PCRE_PKG_NB, glib_pcre_name, glib_pcre_fun, glib_pcre_narg, glib_pcre_type);
522
    return k;
523
}
524

    
525

    
526
/**
527
 * \brief Load and free the regular expression library
528
 * Plateforms supported : MS Windows and GNU / Linux
529
 */
530

    
531
int PcreRelease ()
532
{
533
    MMechostr (0, "\nPCRE library released !\n");
534
    return 0;
535
}
536

    
537
#if ((defined _WIN32) || (defined __WIN32__))
538

    
539
__declspec (dllexport) int SCOLloadPCRE (mmachine m, cbmachine w)
540
{
541
    int k = 0;
542
    ww = w;
543
    mm = m;
544

    
545
    MMechostr (MSKDEBUG, "\nPCRE library loading .... !\n");
546
    SCOLinitplugin (w);
547
    if ((k = SCOLinitPcreClass (m))) return k;
548
    MMechostr(MSKDEBUG, "\nPCRE library loaded !\n");
549
    return k;
550
}
551

    
552
__declspec (dllexport) int SCOLfreePCRE ()
553
{
554
    PcreRelease ();
555
    return 0;
556
}
557

    
558

    
559

    
560

    
561

    
562
/* Version GNU / Linux */
563
#elif ((defined linux) || (defined __linux))
564

    
565
int SCOLloadPCRE (mmachine m)
566
{
567
    int k = 0;
568
    mm = m;
569

    
570
    MMechostr (MSKDEBUG, "\nPCRE library loading !\n");
571
    if ((k = SCOLinitPcreClass (m))) return k;
572
    MMechostr (MSKDEBUG, "\nPCRE library loaded !\n");
573

    
574
    return k;
575
}
576

    
577
int SCOLfreePCRE ()
578
{
579
    MMechostr(MSKDEBUG, "\nPCRE library release !\n");
580

    
581
    PcreRelease ();
582
    return 0;
583
}
584

    
585
#else
586
#error no platform supported
587
#endif
(2-2/2)