PIPS
lexer.l
Go to the documentation of this file.
1 /* $Id: lexer.l 23366 2017-02-03 16:21:23Z coelho $ */
2 
3 /******************** LEXICAL ANALYZER **************************
4 
5  Here are the lexical rules, based on the work of people from
6  Open Source Quality projects (http://osq.cs.berkeley.edu/), used
7  by the CCured source-to-source translator for C
8 
9 
10 *****************************************************************/
11 
12 /*(*
13  *
14  * Copyright (c) 2001-2003,
15  * George C. Necula <necula@cs.berkeley.edu>
16  * Scott McPeak <smcpeak@cs.berkeley.edu>
17  * Wes Weimer <weimer@cs.berkeley.edu>
18  * Ben Liblit <liblit@cs.berkeley.edu>
19  * All rights reserved.
20  *
21  * Redistribution and use in source and binary forms, with or without
22  * modification, are permitted provided that the following conditions are
23  * met:
24  *
25  * 1. Redistributions of source code must retain the above copyright
26  * notice, this list of conditions and the following disclaimer.
27  *
28  * 2. Redistributions in binary form must reproduce the above copyright
29  * notice, this list of conditions and the following disclaimer in the
30  * documentation and/or other materials provided with the distribution.
31  *
32  * 3. The names of the contributors may not be used to endorse or promote
33  * products derived from this software without specific prior written
34  * permission.
35  *
36  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
37  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
38  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
39  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
40  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
41  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
42  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
43  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
44  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
45  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
46  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47  *
48  *)
49 (* FrontC -- lexical analyzer
50 **
51 ** 1.0 3.22.99 Hugues Cassé First version.
52 ** 2.0 George Necula 12/12/00: Many extensions
53 *)*/
54 
55 %option noinput
56 %option nounput
57 %{
58 #ifdef HAVE_CONFIG_H
59  #include "pips_config.h"
60 #endif
61 #include <stdlib.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <ctype.h>
65 
66 #include "genC.h"
67 #include "linear.h"
68 #include "ri.h"
69 #include "ri-util.h"
70 #include "misc.h"
71 
72 #include "splitc.h"
73 #include "preprocessor.h"
74 
75 
76 int csplit_line_number = 1; /**< To be exported to the parser for
77  splitting the preprocessed file */
78 int user_line_number = 1; /**< To be exported to the parser for error
79  messages related to the user file */
80 
81 /* To track current file position: */
82 #define YY_USER_ACTION update_csplit_file_offset_information();
83 size_t current_csplit_file_offset = 0;
84 size_t csplit_file_offset_beginning = 0;
85 
86 /* To track nesting in brackets */
87 static int bracket_depth = 0;
88 
89 /* A state-machine to gather the trailing comments of a statement with an
90  heuristic to decide if some comments and spaces will go into the
91  previous or next top-level construct: */
92 enum gather_comment_state_t {
93  PUT_IN_NEXT_TOP_LEVEL_CONSTRUCT, /**< Normal mode: comments and spaces
94  are associated to the top-level
95  construct to encounter: */
96  GATHER_SPACES, /**< We gather only spaces in the previous top-level
97  construct */
98  GATHER_COMMENT, /**< We gather comments in the previous top-level construct */
99 };
100 /* Indeed it does not work since csplit_copy() is called from splitc.y
101  when a function definition is found and thus we cannot gather the
102  comment in it... :-( */
103 enum gather_comment_state_t
104  gather_comment_state_machine = PUT_IN_NEXT_TOP_LEVEL_CONSTRUCT;
105 
106 
107 static void gdb_marker()
108 {
109 /* Just to be able to set an easy breakpoint */
110 ;
111 }
112 
113 
114 /* Reinitialise global position numbers for a new file. */
115 void reset_csplit_line_number()
116 {
117  csplit_line_number = 1;
118  user_line_number = 1;
119  current_csplit_file_offset = 0;
120  csplit_file_offset_beginning = 0;
121  gather_comment_state_machine = PUT_IN_NEXT_TOP_LEVEL_CONSTRUCT;
122  bracket_depth = 0;
123 }
124 
125 static int csplit_current_beginning = 1000000;
126 static int user_current_beginning = 1000000;
127 
128 
129 /* If we encounter a possible function begin, just snapshot the position
130  of the function beginning: */
131 static void update_csplit_current_beginning() {
132  user_current_beginning = csplit_current_beginning > csplit_line_number?
133  user_line_number : user_current_beginning;
134  csplit_current_beginning = csplit_current_beginning > csplit_line_number?
135  csplit_line_number : csplit_current_beginning;
136  /* From now, the comments and spaces are associated to the current
137  top-level construct: */
138  gather_comment_state_machine = PUT_IN_NEXT_TOP_LEVEL_CONSTRUCT;
139 }
140 
141 
142 /* This function is called at the end of any top-level C construct: */
143 void reset_csplit_current_beginning()
144 {
145  int leaked = 0;
146 
147  csplit_current_beginning = 1000000;
148  user_current_beginning = 1000000;
149  csplit_is_static_p = false;
150 
151  /* Snapshot the current file position as a potential function begin: */
152  csplit_file_offset_beginning = current_csplit_file_offset;
153 
154  /* Aggregate following spaces and comments on the sameline to the
155  previous top-level construct: */
156  gather_comment_state_machine = GATHER_COMMENT;
157 
158 
159  if(!string_undefined_p(csplit_current_function_name))
160  free(csplit_current_function_name);
161  csplit_current_function_name = string_undefined;
162 
163  if(!string_undefined_p(csplit_current_function_name2))
164  free(csplit_current_function_name2);
165  csplit_current_function_name2 = string_undefined;
166 
167  if(!string_undefined_p(csplit_definite_function_name))
168  free(csplit_definite_function_name);
169  csplit_definite_function_name = string_undefined;
170 
171  if(!string_undefined_p(csplit_definite_function_signature))
172  free(csplit_definite_function_signature);
173  csplit_definite_function_signature = string_undefined;
174 
175  if((leaked=check_signature_balance())!=0) {
176  /* FI: I'm not ready to abort() here... */
177  pips_debug(5, "Likely memory leaks: %d\n", leaked);
178  }
179 }
180 
181 int get_csplit_current_beginning()
182 {
183  return csplit_current_beginning;
184 }
185 
186 
187 /* Get the current line number in the file to split from the user point of
188  view (before preprocessor expansion): */
189 int get_user_current_beginning() {
190  return user_current_beginning;
191 }
192 
193 
194 /* Get the file position in the file to split where the current function
195  begins: */
196 size_t get_csplit_file_offset_beginning() {
197  return csplit_file_offset_beginning;
198 }
199 
200 
201 /* Get the current file position in the file to split: */
202 size_t get_current_csplit_file_offset() {
203  return current_csplit_file_offset;
204 }
205 
206 
207 /* Function called each time a token is read to trac file position
208  information: */
209 void update_csplit_file_offset_information() {
210  current_csplit_file_offset += yyleng;
211 }
212 
213 
214 static int lf_count(string s)
215 {
216  string cs = s;
217  int count = 0;
218 
219  while(*cs) {
220  if(*cs=='\n') count++;
221  cs++;
222  }
223  return count;
224 }
225 ␌
226 /* The lexer cannot handle the ambiguity between named types and
227  *variables without extra-help.
228  */
229 
230 #define TOKEN_UNDEFINED (-1)
231 static int previous_keyword_token = TOKEN_UNDEFINED;
232 
233 /* This is going to be the previous token because LEXER_RETURN is not
234 used in case the lexer handles either a named type or a variable. */
235 #define LEXER_RETURN(t) return(previous_keyword_token=t)
236 
237 /* See if id is a keyword, a typedef or an identifier.
238  * Returns the token number for keywords and typedefs.
239  * Returns 0 for variable identifiers.
240  */
241 static int is_c_preprocessor_keyword_typedef(char * id)
242 {
243  /* No need to bother for scopes when dealing with C keywords,
244  * but do not take into account top-level typedefs which may be masked.
245  */
246  int t = is_c_keyword_typedef(id);
247  if(t==0 || t==TK_NAMED_TYPE) {
248  // id may be a keyword, but scopes must be used
249  string sn = get_preprocessor_current_scope();
250  string scoped_id = strdup(concatenate(id, "%", sn, NULL));
251  t = is_c_keyword_typedef(scoped_id);
252  free(scoped_id);
253  if(t == 0) {
254  int i, n = preprocessor_scope_number();
255  for(i=2; i<=n && t==0; i++) {
256  sn = get_preprocessor_nth_scope(i);
257  scoped_id = strdup(concatenate(id, "%", sn, NULL));
258  t = is_c_keyword_typedef(scoped_id);
259  ifdebug(1) {
260  if(t==TK_NAMED_TYPE)
261  fprintf(stderr, "Token \"%s\" identified as TK_NAMED_TYPE.\n",
262  scoped_id);
263  }
264  free(scoped_id);
265  }
266  /* Check again for a global typedef */
267  t = is_c_keyword_typedef(id);
268  }
269  }
270  /* FI: we *bet* here that "extern foo(t1, t2, t3);"
271  * with t1, t2 and t3 named type is not possible:-(
272  */
273 #define TK_TYPE_P(tk) \
274  ((tk)==TK_CHAR || (tk)==TK_INT || \
275  (tk)==TK_DOUBLE || (tk)==TK_FLOAT || (tk)==TK_COMPLEX || \
276  (tk)==TK_ENUM || (tk)==TK_STRUCT || (tk)==TK_UNION || \
277  (tk)==TK_SIGNED|| (tk)==TK_UNSIGNED|| (tk)==TK_LONG|| (tk)==TK_SHORT ||\
278  (tk)==TK_RETURN)
279  if(t==TK_NAMED_TYPE
280  && (TK_TYPE_P(previous_keyword_token)
281  ||previous_keyword_token==TK_NAMED_TYPE)) {
282  // Identifier
283  t = 0;
284  pips_debug(1, "Token \"%s\" is in fact assumed to be an identifier.\n",
285  id);
286  }
287  previous_keyword_token = t;
288  if(t==TK_STATIC && bracket_depth>0)
289  t = TK_STATIC_DIMENSION;
290  return t;
291 }
292 
293 %}
294 
295 /* To track file line number automatically: */
296 %option yylineno
297 
298 decdigit [0-9]
299 octdigit [0-7]
300 hexdigit [0-9a-fA-F]
301 letter [a-zA-Z]
302 usuffix [uU]
303 lsuffix (l|L|ll|LL)
304 intsuffix (({lsuffix})|({usuffix})|({usuffix}{lsuffix})|({lsuffix}{usuffix}))
305 hexprefix (0[xX])
306 intnum ({decdigit}+{intsuffix}?)
307 octnum (0{octdigit}+{intsuffix}?)
308 hexnum ({hexprefix}{hexdigit}+{intsuffix}?)
309 exponent ([eE][\+\-]?{decdigit}+)
310 fraction (\.{decdigit}+)
311 decfloat (({intnum}?{fraction})|({intnum}{exponent})|({intnum}?{fraction}{exponent})|({intnum}\.)|({intnum}\.{exponent}))
312 hexfraction (({hexdigit}*"."{hexdigit}+)|({hexdigit}+))
313 binexponent ([pP][\+\-]?{decdigit}+)
314 floatonlynum ({decfloat}{floatsuffix}?)
315 floatexponent ([pP][\+\-]?{floatonlynum})
316 hexfloat (({hexprefix}{hexfraction}{binexponent})|({hexprefix}{hexdigit}+{binexponent})|({hexprefix}{hexdigit}{floatexponent}))
317 floatsuffix [fFlL]
318 complexsuffix [ij]
319 floatnum (({decfloat}|{hexfloat}){floatsuffix}?)
320 complexnum (({decfloat}|{hexfloat})({complexsuffix}{floatsuffix}?|{floatsuffix}{complexsuffix}))
321 ident ({letter}|"_")({letter}|{decdigit}|"_")*
322 attribident (({letter}|"_")({letter}|{decdigit}|"_"|":"))
323 escape \\
324  /* From C norm A.1: */
325 hex_escape ({escape}x{hexdigit}{1,2})
326 wide_hex_escape ({escape}x{hexdigit}{1,8})
327 oct_escape ({escape}{octdigit}{1,3})
328 char_escape ({escape}[abfnrtv\'\\?"])
329 /* " */
330 universal-character-name ({escape}(u{hexdigit}{4}|U{hexdigit}{8}))
331 
332 
333 %%
334 
335 ("/*"([^*]|("*"+[^/*]))*"*"+"/")|("//"("\\\n"|[^\n])*"\n"?) { // help emacs: */
336  // Bug in the Flex 2.5.35 manual A.4.3, "/***/" fails. :-(
337  csplit_line_number += lf_count(yytext);
338  user_line_number += lf_count(yytext);
339 
340 
341  pips_debug(5,"Comment \"%s\"\n",yytext);
342 
343  if (gather_comment_state_machine == GATHER_COMMENT)
344  /* This comment is associated to previous
345  top-level construct: */
346  reset_csplit_current_beginning();
347  if (gather_comment_state_machine == GATHER_SPACES)
348  /* We encounter a comment whereas we were
349  gathering spaces into the previous
350  top-level construct. So now this comment
351  will go in the next top-level construct: */
352  gather_comment_state_machine = PUT_IN_NEXT_TOP_LEVEL_CONSTRUCT;
353  }
354 
355 ^"#"("\\\n"|[^\n])* {
356  /* Look for a # up to the end of line,
357  dealing with any backslashed new lines: */
358  /* csplit_line_number++; */
359  pips_debug(5,"Pragma comment %s\n",yytext);
360  /* Must be a line pragma left by the preprocessor:
361  # 1 blablabla.c
362  #pragma blablabla
363  #line nnn
364  */
365  gdb_marker();
366  size_t n;
367  if(strstr(yytext, "#pragma")==yytext) {
368  // FC 2016-06-18: Why not count pragma lines? This just breaks line numbering...
369  // commented out: user_line_number--;
370  }
371  /* the preprocessor seems to provide line information in pragmas
372  which are not #line pragmas. */
373  else if(/* strstr(yytext, "#line")==yytext && */ (n=strlen(yytext))>=4) {
374  int initial_C_line_number = -1;
375  char include_file[n] ;
376  int items = sscanf(yytext+1, "%d \"%[^\"]\"", &initial_C_line_number,include_file);
377  if(items>=1) {
378  /* Get rid of the pragma LF itslef */
379  user_line_number = initial_C_line_number-1;
380  if(items==2) {
381  if(!current_file_path) current_file_path = strdup(include_file);
382  else if(!current_include_file_path) current_include_file_path=strdup(include_file);
383  else if(strcmp(include_file,current_file_path)==0) { free(current_include_file_path) ; current_include_file_path=NULL; }
384  }
385  }
386  else {
387  pips_user_warning("No line number in # pragma: \"%s\".\n", yytext);
388  splitc_error("Ill. formated # pragma\n");
389  }
390  }
391  }
392 
393 \n {
394  csplit_line_number++;
395  user_line_number++;
396  pips_debug(5, "New line %d, ext=%d, func=%d\n", csplit_line_number, csplit_is_external, csplit_is_function);
397 
398  if (gather_comment_state_machine == GATHER_COMMENT) {
399  /* We encounter a newline, stop gathering
400  comments and gather spaces from now: */
401  //gather_comment_state_machine = GATHER_SPACES;
402  /* Well, ideed, we want to be compatible with
403  old behaviour: associate newlines and other
404  from now to next top-level construct: */
405  gather_comment_state_machine = PUT_IN_NEXT_TOP_LEVEL_CONSTRUCT;
406  /* Associate the current \n to the previous
407  top-level construct: */
408  reset_csplit_current_beginning();
409  }
410  if (gather_comment_state_machine == GATHER_SPACES) {
411  /* Add this newline to the previous top-level
412  construct: */
413  reset_csplit_current_beginning();
414  }
415  }
416 
417 [[:blank:]] {
418  /* Eat up whitespaces. After the previous '\n'
419  since it includes '\n' and we want
420  lower priority */
421  if (gather_comment_state_machine == GATHER_COMMENT
422  || gather_comment_state_machine == GATHER_SPACES) {
423  /* Add this space to the previous top-level
424  construct: */
425  reset_csplit_current_beginning();
426  }
427  }
428 
429 "_Pragma" {
430  LEXER_RETURN(TK_PRAGMA);
431  }
432 
433 \'([^\\]|{char_escape}|{oct_escape}|{hex_escape}|{universal-character-name})\' {
434  /* Have a look to Flex A.4.3 too... */
435  /* Escaped character constants. Their
436  syntax is understood in PIPS character
437  constant construtors. */
438  pips_debug(9,"TK_CHARCON: %s\n",yytext);
439  splitc_lval.string = strdup(yytext);
440  LEXER_RETURN(TK_CHARCON);
441  }
442 
443 L\'([^\\]|{char_escape}|{oct_escape}|{wide_hex_escape}|{universal-character-name})\' {
444  /* Escaped wide character constants. Their
445  syntax is understood in PIPS character
446  constant construtors. */
447  pips_debug(9,"TK_CHARCON wide character constant: %s\n",yytext);
448  splitc_lval.string = strdup(yytext);
449  LEXER_RETURN(TK_CHARCON);
450  }
451 
452 \"(\\\"|[^\"\\]|\\[^\"])*\" {
453  pips_debug(5,"TK_STRINGCON regular or wide string: %s\n",yytext);
454  /* the "world" in L"Hello, " "world" should be treated as
455  wide even though there's no L immediately preceding it */
456  splitc_lval.string = strdup(yytext);
457  LEXER_RETURN(TK_STRINGCON);
458  }
459 
460 L\"(\\\"|[^\"\\]|\\[^\"])*\" {
461  pips_debug(5,"TK_WSTRINGCON wide string: %s\n",yytext);
462  splitc_lval.string = strdup(yytext);
463  LEXER_RETURN(TK_WSTRINGCON);
464  }
465 {floatnum} {
466  pips_debug(5,"TK_FLOATCON %s\n",yytext);
467  splitc_lval.string = strdup(yytext);
468  LEXER_RETURN(TK_FLOATCON);
469  }
470 {complexnum} {
471  pips_debug(5,"TK_COMPLEXCON %s\n",yytext);
472  splitc_lval.string = strdup(yytext);
473  LEXER_RETURN(TK_COMPLEXCON);
474  }
475 {hexnum} {
476  pips_debug(5,"Hexnum TK_INTCON %s\n",yytext);
477  splitc_lval.string = strdup(yytext);
478  LEXER_RETURN(TK_INTCON);
479  }
480 {octnum} {
481  pips_debug(5,"Octnum TK_INTCON %s\n",yytext);
482  splitc_lval.string = strdup(yytext);
483  LEXER_RETURN(TK_INTCON);
484  }
485 {intnum} {
486  pips_debug(5,"TK_INTCON %s\n",yytext);
487  splitc_lval.string = strdup(yytext);
488  LEXER_RETURN(TK_INTCON);
489  }
490 "!quit!" {
491  pips_debug(5,"TK_EOF %s\n",yytext);
492  yyterminate();
493  }
494 "..." {
495  pips_debug(5,"TK_ELLIPSIS %s\n",yytext);
496  LEXER_RETURN(TK_ELLIPSIS);
497  }
498 "+=" {
499  pips_debug(5,"TK_PLUS_EQ %s\n",yytext);
500  LEXER_RETURN(TK_PLUS_EQ);
501  }
502 "-=" {
503  pips_debug(5,"TK_MINUS_EQ %s\n",yytext);
504  LEXER_RETURN(TK_MINUS_EQ);
505  }
506 "*=" {
507  pips_debug(5,"TK_STAR_EQ %s\n",yytext);
508  LEXER_RETURN(TK_STAR_EQ);
509  }
510 "/=" {
511  pips_debug(5,"TK_SLASH_EQ %s\n",yytext);
512  LEXER_RETURN(TK_SLASH_EQ);
513  }
514 "%=" {
515  pips_debug(5,"TK_PERCENT_EQ %s\n",yytext);
516  LEXER_RETURN(TK_PERCENT_EQ);
517  }
518 "|=" {
519  pips_debug(5,"TK_PIPE_EQ %s\n",yytext);
520  LEXER_RETURN(TK_PIPE_EQ);
521  }
522 "&=" {
523  pips_debug(5,"TK_AND_EQ %s\n",yytext);
524  LEXER_RETURN(TK_AND_EQ);
525  }
526 "^=" {
527  pips_debug(5,"TK_CIRC_EQ %s\n",yytext);
528  LEXER_RETURN(TK_CIRC_EQ);
529  }
530 "<<=" {
531  pips_debug(5,"TK_INF_INF_EQ %s\n",yytext);
532  LEXER_RETURN(TK_INF_INF_EQ);
533  }
534 ">>=" {
535  pips_debug(5,"TK_SUP_SUP_EQ %s\n",yytext);
536  LEXER_RETURN(TK_SUP_SUP_EQ);
537  }
538 "<<" {
539  pips_debug(5,"TK_INF_INF %s\n",yytext);
540  LEXER_RETURN(TK_INF_INF);
541  }
542 ">>" {
543  pips_debug(5,"TK_SUP_SUP %s\n",yytext);
544  LEXER_RETURN(TK_SUP_SUP);
545  }
546 "==" {
547  pips_debug(5,"TK_EQ_EQ %s\n",yytext);
548  LEXER_RETURN(TK_EQ_EQ);
549  }
550 "!=" {
551  pips_debug(5,"TK_EXCLAM_EQ %s\n",yytext);
552  LEXER_RETURN(TK_EXCLAM_EQ);
553  }
554 "<=" {
555  pips_debug(5,"TK_INF_EQ %s\n",yytext);
556  LEXER_RETURN(TK_INF_EQ);
557  }
558 ">=" {
559  pips_debug(5,"TK_SUP_EQ %s\n",yytext);
560  LEXER_RETURN(TK_SUP_EQ);
561  }
562 "=" {
563  pips_debug(5,"TK_EQ %s\n",yytext);
564  LEXER_RETURN(TK_EQ);
565  }
566 "<" {
567  pips_debug(5,"TK_INF %s\n",yytext);
568  LEXER_RETURN(TK_INF);
569  }
570 ">" {
571  pips_debug(5,"TK_SUP %s\n",yytext);
572  LEXER_RETURN(TK_SUP);
573  }
574 "++" {
575  pips_debug(5,"TK_PLUS_PLUS %s\n",yytext);
576  LEXER_RETURN(TK_PLUS_PLUS);
577  }
578 "--" {
579  pips_debug(5,"TK_MINUS_MINUS %s\n",yytext);
580  LEXER_RETURN(TK_MINUS_MINUS);
581  }
582 "->" {
583  pips_debug(5,"TK_ARROW %s\n",yytext);
584  LEXER_RETURN(TK_ARROW);
585  }
586 "+" {
587  pips_debug(5,"TK_PLUS %s\n",yytext);
588  LEXER_RETURN(TK_PLUS);
589  }
590 "-" {
591  pips_debug(5,"TK_MINUS %s\n",yytext);
592  LEXER_RETURN(TK_MINUS);
593  }
594 "*" {
595  pips_debug(5,"TK_STAR %s\n",yytext);
596  LEXER_RETURN(TK_STAR);
597  }
598 "/" {
599  pips_debug(5,"TK_SLASH %s\n",yytext);
600  LEXER_RETURN(TK_SLASH);
601  }
602 "%" {
603  pips_debug(5,"TK_PERCENT %s\n",yytext);
604  LEXER_RETURN(TK_PERCENT);
605  }
606 "!" {
607  pips_debug(5,"TK_EXCLAM %s\n",yytext);
608  LEXER_RETURN(TK_EXCLAM);
609  }
610 "&&" {
611  pips_debug(5,"TK_AND_AND %s\n",yytext);
612  LEXER_RETURN(TK_AND_AND);
613  }
614 "||" {
615  pips_debug(5,"TK_PIPE_PIPE %s\n",yytext);
616  LEXER_RETURN(TK_PIPE_PIPE);
617  }
618 "&" {
619  pips_debug(5,"TK_AND %s\n",yytext);
620  LEXER_RETURN(TK_AND);
621  }
622 "|" {
623  pips_debug(5,"TK_PIPE %s\n",yytext);
624  LEXER_RETURN(TK_PIPE);
625  }
626 "^" {
627  pips_debug(5,"TK_CIRC %s\n",yytext);
628  LEXER_RETURN(TK_CIRC);
629  }
630 "?" {
631  pips_debug(5,"TK_QUEST %s\n",yytext);
632  LEXER_RETURN(TK_QUEST);
633  }
634 ":" {
635  pips_debug(5,"TK_COLON %s\n",yytext);
636  LEXER_RETURN(TK_COLON);
637  }
638 "~" {
639  pips_debug(5,"TK_TILDE %s\n",yytext);
640  LEXER_RETURN(TK_TILDE);
641  }
642 "\{" {
643  pips_debug(5,"TK_LBRACE %s\n",yytext);
644  LEXER_RETURN(TK_LBRACE);
645  }
646 "\}" {
647  pips_debug(5,"TK_RBRACE %s\n",yytext);
648  LEXER_RETURN(TK_RBRACE);
649  }
650 "[" {
651  pips_debug(5,"TK_LBRACKET %s\n",yytext);
652  bracket_depth++;
653  LEXER_RETURN(TK_LBRACKET);
654  }
655 "]" {
656  pips_debug(5,"TK_RBRACKET %s\n",yytext);
657  bracket_depth--;
658  LEXER_RETURN(TK_RBRACKET);
659  }
660 "(" {
661  pips_debug(5,"TK_LPAREN %s\n",yytext);
662  LEXER_RETURN(TK_LPAREN);
663  }
664 ")" {
665  pips_debug(5,"TK_RPAREN %s\n",yytext);
666  LEXER_RETURN(TK_RPAREN);
667  }
668 ";" {
669  pips_debug(5,"TK_SEMICOLON %s\n",yytext);
670  LEXER_RETURN(TK_SEMICOLON);
671  }
672 "," {
673  pips_debug(5,"TK_COMMA %s\n",yytext);
674  LEXER_RETURN(TK_COMMA);
675  }
676 "." {
677  pips_debug(5,"TK_DOT %s\n",yytext);
678  LEXER_RETURN(TK_DOT);
679  }
680 "sizeof" {
681  pips_debug(5,"TK_SIZEOF %s\n",yytext);
682  LEXER_RETURN(TK_SIZEOF);
683  }
684 "__asm" {
685  pips_debug(5,"TK_ASM %s\n",yytext);
686  LEXER_RETURN(TK_ASM);
687  }
688 "__va_list" {
689  /* convert __va_list from bsd into __builtin_va_list */
690  pips_debug(5,"TK_VA_LIST %s\n",yytext);
691  yytext = strdup("__builtin_va_list");
692  int t = is_c_preprocessor_keyword_typedef(yytext);
693  update_csplit_current_beginning();
694  pips_assert("builtin valist found\n",t>0);
695  pips_debug(5,"Keyword or typedef name: %s\n",yytext);
696  splitc_lval.string = strdup(yytext);
697  return (t);
698 }
699 "__attribute__" {
700 
701 /* FI: when we change our minds and want to make these tokens, we must
702  * insert them in keyword_typedef_table.
703  */
704 
705  pips_debug(5,"TK_ATTRIBUTE %s\n",yytext);
706  LEXER_RETURN(TK_ATTRIBUTE);
707  }
708 "__extension__" {
709  pips_user_warning("gcc extension keyword \"__extension__\" is ignored\n");
710  }
711 "__inline" {
712  pips_user_warning("gcc extension keyword \"__inline\" is ignored\n");
713  }
714 "__inline__" {
715  pips_user_warning("gcc extension keyword \"__inline__\" is ignored\n");
716  }
717 "__signed__" {
718  pips_user_warning("gcc extension keyword \"__signed__\" is ignored\n");
719  }
720 {ident} {
721  /* C keywords or identifiers */
722  int t = is_c_preprocessor_keyword_typedef(yytext);
723  update_csplit_current_beginning();
724  if (t>0)
725  {
726  pips_debug(5,"Keyword or typedef name: %s\n",yytext);
727  splitc_lval.string = strdup(yytext);
728  return (t);
729  }
730  else
731  {
732  pips_debug(5,"TK_IDENT: %s\n",yytext);
733  /* Might not work if a function returns a struct declared locally in the function declaration, or a pointer to such a structure */
734  if(string_undefined_p(csplit_current_function_name)) {
735  csplit_current_function_name = strdup(yytext);
736  pips_debug(5, "Temptative function name found: \"%s\"\n",
737  csplit_current_function_name);
738  }
739  else if(string_undefined_p(csplit_current_function_name2)) {
740  pips_debug(5, "Identifier \"%s\" ignored because of two previous identifiers \"%s\"\n",
741  yytext, csplit_current_function_name);
742  csplit_current_function_name2 = strdup(yytext);
743  }
744  else {
745  free(csplit_current_function_name2);
746  pips_debug(5, "Identifier \"%s\" not ignored in spite of two previous identifiers \"%s\"\n",
747  yytext, csplit_current_function_name);
748  csplit_current_function_name2 = strdup(yytext);
749  }
750 
751  splitc_lval.string = strdup(yytext);
752  LEXER_RETURN(TK_IDENT);
753  }
754  }
755 <<EOF>> {
756  pips_debug(5,"TK_EOF %s\n",yytext);
757  LEXER_RETURN(TK_EOF);
758  }
759 . {
760  if (*yytext=='#') {
761  csplit_parser_warning("Unrecognized character '%s'\n", yytext);
762  pips_user_warning("Preprocessor directives must have disappeared thanks to the C preprocessor.\n"
763 "Check the input code and/or modify the preprocessor options using environment variables PIPS_CPP.\n");
764  }
765  else
766  csplit_parser_warning("Unrecognized character '%s'\n", yytext);
767  }
768 %%
769 
770 /* This function is renamed splitc_error(). It should probably reset more variables */
771 void splitc_error(const char * msg)
772 {
773  /* I should call csplit_parser_error() or the equivalent module */
774  string ifn = get_splitc_input_file_name();
775  string current_line = safe_read_nth_line(ifn, csplit_line_number);
776  /* pips_user_warning(
777  "C %s near \"%s\" at preprocessed line %d (user line %d):\n%s\n\n",
778  msg, yytext, csplit_line_number, user_line_number,
779  current_line); */
780  pips_user_warning(
781  "C %s near \"%s\" at line %d in file \"%s\":\n%s\n\n",
782  msg, yytext, user_line_number,
783  preprocessor_current_initial_file_name,
784  current_line);
785  free(current_line);
786  pips_user_error("Syntax error detected by PIPS C preprocessor.\n"
787  "Suggestions:\n"
788  " 1. check the legality of the source code with a production C compiler\n"
789  " 2. see if the issue is linked to a non-standard C feature\n"
790  " 3. see if the issue is a C feature unsupported by PIPS C parser\n"
791  " 4. see if the source code can be rewritten differently.\n");
792 }
793 
794 int yywrap() { return 1;}