PIPS
clex.l
Go to the documentation of this file.
1 /* $Id: clex.l 23412 2017-08-09 15:07:09Z irigoin $ */
2 
3 /******************** LEXICAL ANALYZER **************************
4 
5  Here are the lexical rules, based on the work of people from
6  Open Source Quality projects (http://osq.cs.berkeley.edu/), used
7  by the CCured source-to-source translator for C
8 
9 
10 *****************************************************************/
11 
12 /*(*
13  *
14  * Copyright (c) 2001-2003,
15  * George C. Necula <necula@cs.berkeley.edu>
16  * Scott McPeak <smcpeak@cs.berkeley.edu>
17  * Wes Weimer <weimer@cs.berkeley.edu>
18  * Ben Liblit <liblit@cs.berkeley.edu>
19  * All rights reserved.
20  *
21  * Redistribution and use in source and binary forms, with or without
22  * modification, are permitted provided that the following conditions are
23  * met:
24  *
25  * 1. Redistributions of source code must retain the above copyright
26  * notice, this list of conditions and the following disclaimer.
27  *
28  * 2. Redistributions in binary form must reproduce the above copyright
29  * notice, this list of conditions and the following disclaimer in the
30  * documentation and/or other materials provided with the distribution.
31  *
32  * 3. The names of the contributors may not be used to endorse or promote
33  * products derived from this software without specific prior written
34  * permission.
35  *
36  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
37  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
38  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
39  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
40  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
41  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
42  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
43  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
44  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
45  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
46  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47  *
48  *)
49 (* FrontC -- lexical analyzer
50 **
51 ** 1.0 3.22.99 Hugues Cassé First version.
52 ** 2.0 George Necula 12/12/00: Many extensions
53 *)*/
54 
55 %option nounput
56 %option noinput
57 
58 %{
59 #ifdef HAVE_CONFIG_H
60  #include "pips_config.h"
61 #endif
62 #include <stdlib.h>
63 #include <stdio.h>
64 #include <string.h>
65 #include <ctype.h>
66 
67 #include "genC.h"
68 #include "linear.h"
69 #include "ri.h"
70 #include "ri-util.h"
71 #include "misc.h"
72 
73 #include "c_syntax.h"
74 #include "cyacc.h"
75 
76 
77 /* To track the user line number, that is the one in the original user file */
78 static int C_line_number = UNDEFINED_C_LINE_NUMBER;
79 
80 /* The line increment is set to zero when header files are parsed. The
81  * goal is to reconstruct the line number in the user file and not the
82  * line number in the preprocessed file.
83  *
84  * This is performed in analyze_preprocessor_line() (util.c)
85  */
86 int C_line_increment = 1;
87 
88 /* To keep track of line intervals */
89 static int previous_c_lineno = UNDEFINED_C_LINE_NUMBER;
90 
91 int get_previous_c_lineno()
92 {
93  return previous_c_lineno;
94 }
95 
96 /* Use a stack to survive to file inclusions if any.
97 
98  I guess that it is currently not used since file inclusion is already
99  done by the preprocessor... RK
100 */
101 DEFINE_LOCAL_STACK(line_number, _int)
102 
103 /* To track the absolute line number in the file that is parsed
104  use the default flex yylineno that is renamed c_lineno here */
105 /* Use a stack to survive to file inclusions if any */
106 DEFINE_LOCAL_STACK(absolute_line_number, _int)
107 
108 /* Count the number of c in s. */
109 unsigned int character_occurences_in_string(string s, char c)
110 {
111  string p = string_undefined;
112  unsigned int count = 0;
113 
114  for(p=s; *p!='\0'; p++) {
115  count += (*p==c)? 1 : 0 ;
116  }
117  return count;
118 }
119 ␌
120 static int previous_C_line_number = UNDEFINED_C_LINE_NUMBER;
121 
122 int get_current_C_line_number(void)
123 {
124  /* FI: I assume that get_current_C_line_number is called only
125  * by some kind of make_statement()
126  *
127  * The PIPS preprocessed C function files contain a specific first line
128  * to convert the line number in the workspace unto a line number in the
129  * user source file.
130  */
131  previous_C_line_number = C_line_number;
132  previous_c_lineno = c_lineno;
133  return C_line_number;
134 }
135 
136 /* Should be called just before get_current_C_line_number */
137 int get_previous_C_line_number(void)
138 {
139  return previous_C_line_number;
140 }
141 
142 void set_current_C_line_number(void)
143 {
144  if (C_line_number == UNDEFINED_C_LINE_NUMBER) {
145  /* Initialize the user line number... */
146  C_line_number = FIRST_C_LINE_NUMBER;
147  // Rely on preprocessor pragma lines, which are not always present
148  // especially for code synthesized by PIPS...
149  // C_line_number = UNDEFINED_C_LINE_NUMBER;
150  previous_C_line_number = FIRST_C_LINE_NUMBER;
151  /* ... and the absolute line number in the current file */
152  c_lineno = FIRST_C_LINE_NUMBER;
153  /* The first line is used to indicate the line number in the user
154  * source file
155  */
156  previous_c_lineno = FIRST_C_LINE_NUMBER + 1;
157  }
158  else
159  pips_internal_error("C_line_number not resetted\n");
160 
161  C_line_increment = 1;
162 
163  /* Some check on it first? It should have been disallocated by reset_current_C_line_number() */
164  make_line_number_stack();
165  make_absolute_line_number_stack();
166 }
167 
168 /* The line number stack, designed for structured control structure, is
169  not used yet. */
170 void push_current_C_line_number(void)
171 {
172  int ln = get_current_C_line_number();
173 
174  line_number_push(ln);
175  absolute_line_number_push(c_lineno);
176 }
177 
178 int pop_current_C_line_number(void)
179 {
180  int ln = line_number_pop();
181  c_lineno = absolute_line_number_pop();
182 
183  return ln;
184 }
185 
186 void reset_current_C_line_number(void)
187 {
188  C_line_number = UNDEFINED_C_LINE_NUMBER;
189  previous_C_line_number = UNDEFINED_C_LINE_NUMBER;
190  c_lineno = FIRST_C_LINE_NUMBER;
191  previous_c_lineno = UNDEFINED_C_LINE_NUMBER;
192 
193  if(!line_number_empty_p()) {
194  pips_internal_error("Line number stack is not empty\n");
195  }
196  free_line_number_stack();
197  free_absolute_line_number_stack();
198 }
199 
200 void error_reset_current_C_line_number(void)
201 {
202  C_line_number = UNDEFINED_C_LINE_NUMBER;
203  previous_C_line_number = UNDEFINED_C_LINE_NUMBER;
204  c_lineno = FIRST_C_LINE_NUMBER;
205  previous_c_lineno = UNDEFINED_C_LINE_NUMBER;
206 
207  free_line_number_stack();
208  free_absolute_line_number_stack();
209 }
210 ␌
211 /* Comment management:
212 
213  - comments within declarations are misplaced (because there is no
214  corresponding attachment point)
215 
216  - comments for structured control structures such as "for", "switch",
217  "while",... are stacked so that we can get them back when building
218  the statement at the end of the statement;
219 
220  "do .... while()" is not handled properly; "else" cannot carry a
221  comment, because the "while" and "else" do not really exist in the
222  RI;
223 
224  - the comments for other statements are the current comments
225 
226  - end-of-line comments placed after a statement are stored as comment
227  for the next statement
228 
229  - end-of-line before comments that can be retained by the C parser are
230  gathered with the comment
231 
232  - comments appearing by the end of a block are lost (no NOP statement
233  is generated to carry them yet)
234 
235  - linefeed and comments inside a statement are collected as a comment
236  before statement (some weird impact on print-out may occur); same as
237  for declaration statements, but less frequent
238 
239  - some comments are still ignored, although available (to be implemented)
240 */
241 
242 static string C_current_comment = string_undefined;
243 /* To see if comments are collected within a statement or outside it is pretty easy to turn it on. It is more difficult to turn it off. */
244 static bool token_has_been_seen_p = false;
245 
246 void reset_token_has_been_seen_p() {token_has_been_seen_p = false;}
247 
248 DEFINE_LOCAL_STACK(comments, string)
249 
250 static int bracket_depth = -1;
251 
252 /* Return the current comment as a string to be freed by the caller and
253  reset the current comment. If the current comment is undefined, returns
254  a copy of the empty string, "".
255 
256  Reset also the current comment.
257  */
258 string get_current_C_comment(void)
259 {
260  string cc = C_current_comment;
261  C_current_comment = string_undefined;
262  if (cc != string_undefined) {
263  if (cc[0] == '\n') {
264  /* If the comment begins with a new-line, it is indeed the new-line
265  that ends the previous statement, so skip it. Quicker than strlen()
266  + memmove(): */
267  char * p = &cc[0];
268  do {
269  p[0] = p[1];
270  }
271  while (*p++ != '\0');
272  /* Note there won't be a memory leak since the orginal '\0' is in the
273  malloc() bloc to be free()ed anyway... */
274  }
275  /* If the comment is only an empty one, do not retain it: */
276  if (cc[0] == '\0') {
277  /* Do retain it to keep the statement data structure easy to use, allowing strdup() on its text fields */
278  //free(cc);
279  //cc = string_undefined;
280  ;
281  }
282  else {
283  /* Remove the trailing new-line if any since the RI is already
284  line-oriented at the comment level: This is already done above. */
285  char * last_newline = strrchr(cc, '\n');
286  if (last_newline != NULL && last_newline[1] == '\0') {
287  /* It is a trailing new-line: just get rid of it: */
288  //last_newline[0] = '\0';
289  ;
290  }
291  }
292  }
293  else
294  cc = strdup("");
295  /* pips_debug(3, "get_current_C_comment comment \"%s\"\n",
296  cc); */
297  // Too early
298  // token_has_been_seen_p = false;
299  return cc;
300 }
301 
302 /* Push the current C comment so that we can get it back when building the
303  statement later
304 
305  This reset the current comment through get_current_C_comment()
306 */
307 void push_current_C_comment(void)
308 {
309  string cc = get_current_C_comment();
310 
311  if (string_undefined_p(cc))
312  pips_debug(3, "empty comment pushed at line %d\n",
313  get_current_C_line_number());
314  else
315  pips_debug(3, "comment \"%s\" pushed at line %d\n", cc,
316  get_current_C_line_number());
317  comments_push(cc);
318 }
319 
320 /* Pop the current comment.
321 
322  @return the previous current comment.
323 
324  This is typically used at the end of a statement to be built.
325 
326  Note this do not set the current comment. Strange API...
327 */
328 string pop_current_C_comment(void)
329 {
330  string cc = comments_pop();
331  if (string_undefined_p(cc))
332  pips_debug(3, "empty comment popped at line %d\n",
333  get_current_C_line_number());
334  else
335  pips_debug(3, "comment \"%s\" popped at line %d\n", cc,
336  get_current_C_line_number());
337  return cc;
338 }
339 
340 /* Add a comment to the current one */
341 void update_C_comment(string a_comment)
342 {
343  /* Do not add LFs that appear within a statement */
344  bool is_LF_p = *a_comment=='\n' && *(a_comment+1)=='\000';
345  if(!(token_has_been_seen_p && a_comment!=NULL
346  && is_LF_p)) {
347  string new_comment = string_undefined;
348  // FI: seems to imply that concatenate accepts string_undefined as an argument...
349  int l = string_undefined_p(C_current_comment)? 0 : strlen(C_current_comment);
350  /* Do not concatenate two comments without a LF */
351  if(l>0 && *(C_current_comment+l-1)!='\n' && !is_LF_p)
352  new_comment = strdup(concatenate(C_current_comment, "\n", a_comment, NULL));
353  else
354  new_comment = strdup(concatenate(C_current_comment, a_comment, NULL));
355 
356  if (!string_undefined_p(C_current_comment))
357  free(C_current_comment);
358  C_current_comment = new_comment;
359  }
360  else {
361  static int c = 0;
362  c++;
363  pips_debug(8, "LF ignored: %d\n", c++);
364  }
365 
366  pips_debug(3,"update_C_comment %s\n",
367  C_current_comment==string_undefined?
368  "still undefined" : C_current_comment);
369 }
370 
371 /* Remove "extra_LF" trailing LF from C_current_comment if they can be
372  * found at the end of the comment string.
373  */
374 void remove_LFs_from_C_comment(int extra_LF)
375 {
376  if(!token_has_been_seen_p && C_current_comment != string_undefined) {
377  int l = strlen(C_current_comment);
378  int c = 0;
379  char * p = C_current_comment+l-1;
380  pips_debug(3,"begin: %s\n", C_current_comment);
381  // pips_assert("The comment string is longer than the number of LF to remove\n",
382  // extra_LF<=l);
383  if(extra_LF<=l) {
384  while(c<extra_LF && *p=='\n') {
385  c++;
386  *p = '\000';
387  p--;
388  }
389  if(c==extra_LF) { // We are fine
390  ;
391  }
392  else {
393  // Should be a pips_internal_warning()
394  pips_user_warning("%d extraneous LF left in comment\n", extra_LF-c);
395  }
396  }
397  pips_debug(3,"end: %s\n", C_current_comment);
398  }
399 }
400 
401 /* Discard a C comment because we don't know how to deal with it */
402 void discard_C_comment()
403 {
404  if(!string_undefined_p(C_current_comment)) {
405  if (character_occurences_in_string(C_current_comment, '\n')
406  == strlen(C_current_comment)) {
407  /* The comments are only made of '\n', just silently discarding them */
408  pips_debug(3,"The \\n are lost, so the code presentation may be wrong...\n");
409  }
410  else {
411  /*
412  pips_user_warning("Comment \"%s\" is lost at line %d, "
413  "probably because comments cannot be attached to declarations.\n",
414  C_current_comment, C_line_number);
415  */
416  pips_debug(8, "Comment \"%s\" is lost at line %d, "
417  "probably because comments cannot be attached to declarations.\n",
418  C_current_comment, C_line_number);
419  }
420  free(C_current_comment);
421  C_current_comment = string_undefined;
422  }
423 }
424 
425 /* reset and reset_error should be handled differently */
426 void reset_C_comment(bool is_compilation_unit_p)
427 {
428  if(!string_undefined_p(C_current_comment)) {
429  free(C_current_comment);
430  C_current_comment = string_undefined;
431  }
432  /* Comments in the compilation unit are lost because they are related only to
433  declarations and because comments on declarations are lost. Also, comments
434  located at the end of a block are lost, as we do not generate an extra NOP to carry them. */
435  if(!is_compilation_unit_p && !comments_empty_p()) {
436  int count = 0;
437  pips_user_warning("Comments stack is not empty (only meaningful comments are shown):\n");
438  while(!comments_empty_p()) {
439  string c = comments_pop();
440  count++;
441  if(strcmp(c, "\n")!=0)
442  fprintf(stderr, "Element %d: \"%s\"\n", count, c);
443  free(c);
444  }
445  /* pips_internal_error("Comments stack is not empty\n"); */
446  }
447  clear_C_comment();
448  free_comments_stack();
449  bracket_depth = -1;
450  token_has_been_seen_p = false;
451 }
452 
453 void error_reset_C_comment(bool is_compilation_unit_p __attribute__ ((__unused__)))
454 {
455  if(!string_undefined_p(C_current_comment)) {
456  free(C_current_comment);
457  C_current_comment = string_undefined;
458  }
459  clear_C_comment();
460  free_comments_stack();
461  bracket_depth = -1;
462  token_has_been_seen_p = false;
463 }
464 
465 void clear_C_comment()
466 {
467  if(!string_undefined_p(C_current_comment)) {
468  free(C_current_comment);
469  C_current_comment = string_undefined;
470  }
471  /* Comments in the compilation unit and outside of function bodies
472  are lost because they are related only to
473  declarations and because comments on declarations are lost.*/
474  if(!comments_empty_p()) {
475  int count = 0;
476  pips_debug(3, "Comments stack is not empty:\n");
477  while(!comments_empty_p()) {
478  string c = comments_pop();
479  count++;
480  pips_debug(3, "Element %d: \"%s\"\n",
481  count, string_undefined_p(c) ? "string undefined" : c);
482  if(!string_undefined_p(c))
483  free(c);
484  }
485  }
486  pips_assert("The comment stack is empty\n", comments_empty_p());
487 }
488 
489 void init_C_comment()
490 {
491  bracket_depth = 0;
492  if(!string_undefined_p(C_current_comment)) {
493  pips_internal_error("Missing reset for C_current_comment");
494  }
495  if(!stack_undefined_p(comments_stack) && !STACK_NULL_P(comments_stack) && !comments_empty_p()) {
496  pips_internal_error("Comment stack is not empty");
497  }
498  make_comments_stack();
499  token_has_been_seen_p = false;
500 }
501 ␌
502 /* compatibility layer for BSD */
503 static void bsd_rewrite(char ** pyytext) {
504  char* aliases [][2] = {
505  { "__stdinp", "stdin" },
506  { "__stdoutp", "stdout" },
507  { "__stderrp", "stderr" },
508  { "__isnanl", "isnanl" },
509  { "__isnanf", "isnanf" },
510  { "__isnan", "isnan" },
511  { NULL, NULL }
512  };
513  for(char *(*iter)[2] = &aliases[0] ; (*iter)[0] ; ++iter) {
514  if(same_string_p(*pyytext, (*iter)[0] )) {
515  *pyytext=(*iter)[1];
516  break;
517  }
518  }
519 }
520 ␌
521 /* The lexer cannot handle the ambiguity between named types and
522  * variables without extra-help.
523  *
524  * This piece of code is copied from preprocessor/lexer.l.
525  */
526 
527 #define TOKEN_UNDEFINED (-1)
528 static int previous_keyword_token = TOKEN_UNDEFINED;
529 //static int penultimate_keyword_token = TOKEN_UNDEFINED;
530 
531 /* This is going to be the previous token because LEXER_RETURN is not
532 used in case the lexer handles either a named type or a variable. */
533 #define LEXER_RETURN(t) \
534  return(token_has_been_seen_p = true, \
535  /*penultimate_keyword_token=previous_keyword_token,*/ \
536  previous_keyword_token=t)
537 /* Some tokens, such as { and } do not mean that a statement has been entered */
538 #define SIMPLE_LEXER_RETURN(t) \
539  return( \
540  /*penultimate_keyword_token=previous_keyword_token,*/ \
541  previous_keyword_token=t)
542 
543 /* See if id is a keyword, a typedef or an identifier.
544  * Returns the token number for keywords and typedefs.
545  * Returns 0 for variable identifiers.
546  *
547  * The new typedefs are stored by c_parser_put_new_typedef().
548  */
549 static int is_c_parser_keyword_typedef(char * id)
550 {
551  token_has_been_seen_p = true;
552  /* No need to bother for scopes when dealing with C keywords,
553  * but do not take into account top-level typedefs which may be masked.
554  */
555  int t = is_c_keyword_typedef(id);
556  if(t==0 || t==TK_NAMED_TYPE) {
557  // id may be a keyword, but scopes must be used
558  //string sn = get_preprocessor_current_scope();
559  string sn = get_c_parser_current_scope();
560  string scoped_id = strdup(concatenate(id, "%", sn, NULL));
561  t = is_c_keyword_typedef(scoped_id);
562  free(scoped_id);
563  if(t == 0) {
564  // int i, n = preprocessor_scope_number();
565  int i, n = c_parser_number_of_scopes();
566  for(i=2; i<=n && t==0; i++) {
567  // sn = get_preprocessor_nth_scope(i);
568  sn = get_c_parser_nth_scope(i);
569  scoped_id = strdup(concatenate(id, "%", sn, NULL));
570  t = is_c_keyword_typedef(scoped_id);
571  ifdebug(1) {
572  if(t==TK_NAMED_TYPE)
573  fprintf(stderr, "Token \"%s\" identified as TK_NAMED_TYPE.\n",
574  scoped_id);
575  }
576  free(scoped_id);
577  }
578  /* Check again for a global typedef */
579  t = is_c_keyword_typedef(id);
580  }
581  }
582 #define TK_TYPE_P(tk) \
583  ((tk)==TK_CHAR || (tk)==TK_INT || \
584  (tk)==TK_DOUBLE || (tk)==TK_FLOAT || (tk)==TK_COMPLEX || \
585  (tk)==TK_ENUM || (tk)==TK_STRUCT || (tk)==TK_UNION || \
586  (tk)==TK_SIGNED|| (tk)==TK_UNSIGNED|| (tk)==TK_LONG|| (tk)==TK_SHORT ||\
587  (tk)==TK_RETURN)
588  if(t==TK_NAMED_TYPE
589  && (TK_TYPE_P(previous_keyword_token)
590  ||previous_keyword_token==TK_NAMED_TYPE)) {
591  //if(penultimate_keyword_token!=TK_TYPEDEF) {
592  // Identifier
593  t = 0;
594  pips_debug(1, "Token \"%s\" is in fact assumed to be an identifier.\n",
595  id);
596  //}
597  }
598  if(t==TK_STATIC && bracket_depth>0)
599  t = TK_STATIC_DIMENSION;
600  previous_keyword_token = t;
601  pips_debug(5, "Token \"%s\" is assumed to be %d.\n", id, t);
602  return t;
603 }
604 
605 %}
606 
607 
608 /* This should be used instead of the manual computation of
609  * C_line_number... but C_line_number is adjusted according to the
610  * preprocessor line pragmas.
611  */
612 %option yylineno
613 
614  /* To deal more cleanly with the comments:
615  %x COMMENT
616  */
617  /* To deal cleanly with complex pragma line escape, use a separate state: */
618 %x the_end_of_pragma
619 
620 decdigit [0-9]
621 octdigit [0-7]
622 hexdigit [0-9a-fA-F]
623 letter [a-zA-Z]
624 usuffix [uU]
625 lsuffix (l|L|ll|LL)
626 intsuffix (({lsuffix})|({usuffix})|({usuffix}{lsuffix})|({lsuffix}{usuffix}))
627 hexprefix (0[xX])
628 intnum ({decdigit}+{intsuffix}?)
629 octnum (0{octdigit}+{intsuffix}?)
630 hexnum ({hexprefix}{hexdigit}+{intsuffix}?)
631 exponent ([eE][\+\-]?{decdigit}+)
632 fraction (\.{decdigit}+)
633 decfloat (({intnum}?{fraction})|({intnum}{exponent})|({intnum}?{fraction}{exponent})|({intnum}\.)|({intnum}\.{exponent}))
634 hexfraction (({hexdigit}*"."{hexdigit}+)|({hexdigit}+))
635 binexponent ([pP][\+\-]?{decdigit}+)
636 floatonlynum ({decfloat}{floatsuffix}?)
637 floatexponent ([pP][\+\-]?{floatonlynum})
638 hexfloat (({hexprefix}{hexfraction}{binexponent})|({hexprefix}{hexdigit}+{binexponent})|({hexprefix}{hexdigit}{floatexponent}))
639 floatsuffix [fFlL]
640 complexsuffix [ij]
641 floatnum (({decfloat}|{hexfloat}){floatsuffix}?)
642 complexnum (({decfloat}|{hexfloat})({complexsuffix}{floatsuffix}?|{floatsuffix}{complexsuffix}))
643 ident ({letter}|"_")({letter}|{decdigit}|"_")*
644 attribident (({letter}|"_")({letter}|{decdigit}|"_"|":"))
645 escape \\
646  /* From C norm A.1: */
647 hex_escape ({escape}x{hexdigit}{1,2})
648 wide_hex_escape ({escape}x{hexdigit}{1,8})
649 oct_escape ({escape}{octdigit}{1,3})
650 char_escape ({escape}[abfnrtv\'\\?"])
651 /* " */
652 universal-character-name ({escape}(u{hexdigit}{4}|U{hexdigit}{8}))
653 
654 
655 %%
656 
657 <the_end_of_pragma>("\\\n"|[^\n])*"\n" {
658  /* Parse some potentially escaped line up to a normal new
659  line that is the end of a pragma: */
660  /* Return the pragma value without the final \n: */
661  c_lval.string = strndup(yytext, strlen(yytext) - 1);
662  C_line_number += C_line_increment * character_occurences_in_string(yytext, '\n');
663  BEGIN(INITIAL);
664  LEXER_RETURN(TK_PRAGMA);
665  }
666 
667 
668 ("/*"([^*]|("*"+[^/*]))*"*"+"/")|("//"("\\\n"|[^\n])*"\n"?) { // */
669 
670  pips_debug(9,"Comment %s\n",yytext);
671  C_line_number += C_line_increment * character_occurences_in_string(yytext, '\n');
672  update_C_comment(yytext);
673  }
674 
675 ^"#"("\\\n"|[ \t])*"pragma"([ \t])* {
676 
677  /* Look for a # followed by any whitespace and any
678  backslashed end-of line, followed by "pragma" up to the
679  end of line, dealing with any backslashed new lines: */
680  pips_debug(1, "#pragma found: \"%s\"\n", yytext);
681  /* To deal with complex line escape, put the end of the
682  analysis in another rule: */
683  BEGIN(the_end_of_pragma);
684  }
685 
686 ^"#"[ \t]*{decdigit}+[^\n]*"\n" {
687 
688  // The current comment may be adjusted if required
689  C_line_number = analyze_preprocessor_line(yytext, C_line_number);
690  }
691 
692 \n {
693  pips_debug(9,"New line %d, increment=%d, token_seen_p=%d\n", C_line_number, C_line_increment, token_has_been_seen_p);
694 
695  /* Gather a free "\n" as a comment to keep
696  user presentation information. If it is
697  in a place where we cannot store a
698  comment in the RI, it will be silenty
699  lost by update_C_comment(). */
700  update_C_comment(yytext);
701  C_line_number += C_line_increment;
702  }
703 
704 [[:blank:]] {
705  /* Eat up whitespaces */
706  }
707 
708 "_Pragma" {
709  pips_debug(9,"_Pragma found in \"%s\"\n",yytext);
710  LEXER_RETURN(TK__Pragma);
711  }
712 
713 \'([^\\]|{char_escape}|{oct_escape}|{hex_escape}|{universal-character-name})\' {
714  /* Escaped character constants. Their
715  syntax is understood in PIPS character
716  constant construtors. */
717  pips_debug(9,"TK_CHARCON: %s\n",yytext);
718  c_lval.string = strdup(yytext);
719  LEXER_RETURN(TK_CHARCON);
720  }
721 
722 L\'([^\\]|{char_escape}|{oct_escape}|{wide_hex_escape}|{universal-character-name})\' {
723  /* Escaped wide character constants. Their
724  syntax is understood in PIPS character
725  constant construtors. */
726  pips_debug(9,"TK_CHARCON wide character constant: %s\n",yytext);
727  c_lval.string = strdup(yytext);
728  LEXER_RETURN(TK_CHARCON);
729  }
730 
731 \"(\\\"|[^\"\\]|\\[^\"])*\" {
732  pips_debug(9,"TK_STRINGCON regular or wide string: %s\n",yytext);
733  /* the "world" in L"Hello, " "world"
734  should be treated as wide even though
735  there's no L immediately preceding
736  it */
737  /* Indeed there is no special caracter
738  handling here since it is done
739  somewhere else in PIPS in the constant
740  string constructor. */
741  c_lval.string = strdup(yytext);
742  LEXER_RETURN(TK_STRINGCON);
743  }
744 
745 L\"(\\\"|[^\"\\]|\\[^\"])*\" {
746  pips_debug(9,"TK_WSTRINGCON wide string: %s\n",yytext);
747  c_lval.string = strdup(yytext);
748  LEXER_RETURN(TK_WSTRINGCON);
749  }
750 {floatnum} {
751  pips_debug(9,"TK_FLOATCON %s\n",yytext);
752  c_lval.string = strdup(yytext);
753  LEXER_RETURN(TK_FLOATCON);
754  }
755 {complexnum} {
756  pips_debug(5,"TK_COMPLEXCON %s\n",yytext);
757  c_lval.string = strdup(yytext);
758  LEXER_RETURN(TK_COMPLEXCON);
759  }
760 {hexnum} {
761  pips_debug(9,"Hexnum TK_INTCON %s\n",yytext);
762  c_lval.string = strdup(yytext);
763  LEXER_RETURN(TK_INTCON);
764  }
765 {octnum} {
766  pips_debug(9,"Octnum TK_INTCON %s\n",yytext);
767  c_lval.string = strdup(yytext);
768  LEXER_RETURN(TK_INTCON);
769  }
770 {intnum} {
771  pips_debug(9,"TK_INTCON %s\n",yytext);
772  c_lval.string = strdup(yytext);
773  LEXER_RETURN(TK_INTCON);
774  }
775 "!quit!" {
776  pips_debug(9,"TK_EOF %s\n",yytext);
777  yyterminate();
778  }
779 "..." {
780  pips_debug(9,"TK_ELLIPSIS %s\n",yytext);
781  LEXER_RETURN(TK_ELLIPSIS);
782  }
783 "+=" {
784  pips_debug(9,"TK_PLUS_EQ %s\n",yytext);
785  LEXER_RETURN(TK_PLUS_EQ);
786  }
787 "-=" {
788  pips_debug(9,"TK_MINUS_EQ %s\n",yytext);
789  LEXER_RETURN(TK_MINUS_EQ);
790  }
791 "*=" {
792  pips_debug(9,"TK_STAR_EQ %s\n",yytext);
793  LEXER_RETURN(TK_STAR_EQ);
794  }
795 "/=" {
796  pips_debug(9,"TK_SLASH_EQ %s\n",yytext);
797  LEXER_RETURN(TK_SLASH_EQ);
798  }
799 "%=" {
800  pips_debug(9,"TK_PERCENT_EQ %s\n",yytext);
801  LEXER_RETURN(TK_PERCENT_EQ);
802  }
803 "|=" {
804  pips_debug(9,"TK_PIPE_EQ %s\n",yytext);
805  LEXER_RETURN(TK_PIPE_EQ);
806  }
807 "&=" {
808  pips_debug(9,"TK_AND_EQ %s\n",yytext);
809  LEXER_RETURN(TK_AND_EQ);
810  }
811 "^=" {
812  pips_debug(9,"TK_CIRC_EQ %s\n",yytext);
813  LEXER_RETURN(TK_CIRC_EQ);
814  }
815 "<<=" {
816  pips_debug(9,"TK_INF_INF_EQ %s\n",yytext);
817  LEXER_RETURN(TK_INF_INF_EQ);
818  }
819 ">>=" {
820  pips_debug(9,"TK_SUP_SUP_EQ %s\n",yytext);
821  LEXER_RETURN(TK_SUP_SUP_EQ);
822  }
823 "<<" {
824  pips_debug(9,"TK_INF_INF %s\n",yytext);
825  LEXER_RETURN(TK_INF_INF);
826  }
827 ">>" {
828  pips_debug(9,"TK_SUP_SUP %s\n",yytext);
829  LEXER_RETURN(TK_SUP_SUP);
830  }
831 "==" {
832  pips_debug(9,"TK_EQ_EQ %s\n",yytext);
833  LEXER_RETURN(TK_EQ_EQ);
834  }
835 "!=" {
836  pips_debug(9,"TK_EXCLAM_EQ %s\n",yytext);
837  LEXER_RETURN(TK_EXCLAM_EQ);
838  }
839 "<=" {
840  pips_debug(9,"TK_INF_EQ %s\n",yytext);
841  LEXER_RETURN(TK_INF_EQ);
842  }
843 ">=" {
844  pips_debug(9,"TK_SUP_EQ %s\n",yytext);
845  LEXER_RETURN(TK_SUP_EQ);
846  }
847 "=" {
848  pips_debug(9,"TK_EQ %s\n",yytext);
849  LEXER_RETURN(TK_EQ);
850  }
851 "<" {
852  pips_debug(9,"TK_INF %s\n",yytext);
853  LEXER_RETURN(TK_INF);
854  }
855 ">" {
856  pips_debug(9,"TK_SUP %s\n",yytext);
857  LEXER_RETURN(TK_SUP);
858  }
859 "++" {
860  pips_debug(9,"TK_PLUS_PLUS %s\n",yytext);
861  LEXER_RETURN(TK_PLUS_PLUS);
862  }
863 "--" {
864  pips_debug(9,"TK_MINUS_MINUS %s\n",yytext);
865  LEXER_RETURN(TK_MINUS_MINUS);
866  }
867 "->" {
868  pips_debug(9,"TK_ARROW %s\n",yytext);
869  LEXER_RETURN(TK_ARROW);
870  }
871 "+" {
872  pips_debug(9,"TK_PLUS %s\n",yytext);
873  LEXER_RETURN(TK_PLUS);
874  }
875 "-" {
876  pips_debug(9,"TK_MINUS %s\n",yytext);
877  LEXER_RETURN(TK_MINUS);
878  }
879 "*" {
880  pips_debug(9,"TK_STAR %s\n",yytext);
881  LEXER_RETURN(TK_STAR);
882  }
883 "/" {
884  pips_debug(9,"TK_SLASH %s\n",yytext);
885  LEXER_RETURN(TK_SLASH);
886  }
887 "%" {
888  pips_debug(9,"TK_PERCENT %s\n",yytext);
889  LEXER_RETURN(TK_PERCENT);
890  }
891 "!" {
892  pips_debug(9,"TK_EXCLAM %s\n",yytext);
893  LEXER_RETURN(TK_EXCLAM);
894  }
895 "&&" {
896  pips_debug(9,"TK_AND_AND %s\n",yytext);
897  LEXER_RETURN(TK_AND_AND);
898  }
899 "||" {
900  pips_debug(9,"TK_PIPE_PIPE %s\n",yytext);
901  LEXER_RETURN(TK_PIPE_PIPE);
902  }
903 "&" {
904  pips_debug(9,"TK_AND %s\n",yytext);
905  LEXER_RETURN(TK_AND);
906  }
907 "|" {
908  pips_debug(9,"TK_PIPE %s\n",yytext);
909  LEXER_RETURN(TK_PIPE);
910  }
911 "^" {
912  pips_debug(9,"TK_CIRC %s\n",yytext);
913  LEXER_RETURN(TK_CIRC);
914  }
915 "?" {
916  pips_debug(9,"TK_QUEST %s\n",yytext);
917  LEXER_RETURN(TK_QUEST);
918  }
919 ":" {
920  pips_debug(9,"TK_COLON %s\n",yytext);
921  LEXER_RETURN(TK_COLON);
922  }
923 "~" {
924  pips_debug(9,"TK_TILDE %s\n",yytext);
925  LEXER_RETURN(TK_TILDE);
926  }
927 "\{" {
928  pips_debug(9,"TK_LBRACE %s\n",yytext);
929  // We do not know what to do for a block,
930  // but we would like to keep them for struct
931  // and union declaration
932  //discard_C_comment();
933  SIMPLE_LEXER_RETURN(TK_LBRACE);
934  }
935 "\}" {
936  pips_debug(9,"TK_RBRACE %s\n",yytext);
937  //discard_C_comment();
938  SIMPLE_LEXER_RETURN(TK_RBRACE);
939  }
940 "[" {
941  pips_debug(9,"TK_LBRACKET %s\n",yytext);
942  bracket_depth++;
943  LEXER_RETURN(TK_LBRACKET);
944  }
945 "]" {
946  pips_debug(9,"TK_RBRACKET %s\n",yytext);
947  bracket_depth--;
948  LEXER_RETURN(TK_RBRACKET);
949  }
950 "(" {
951  pips_debug(9,"TK_LPAREN %s\n",yytext);
952  LEXER_RETURN(TK_LPAREN);
953  }
954 ")" {
955  pips_debug(9,"TK_RPAREN %s\n",yytext);
956  LEXER_RETURN(TK_RPAREN);
957  }
958 ";" {
959  pips_debug(9,"TK_SEMICOLON %s\n",yytext);
960  /* discard_C_comment(); */
961  LEXER_RETURN(TK_SEMICOLON);
962  }
963 "," {
964  pips_debug(9,"TK_COMMA %s\n",yytext);
965  LEXER_RETURN(TK_COMMA);
966  }
967 "." {
968  pips_debug(9,"TK_DOT %s\n",yytext);
969  LEXER_RETURN(TK_DOT);
970  }
971 "sizeof" {
972  pips_debug(9,"TK_SIZEOF %s\n",yytext);
973  LEXER_RETURN(TK_SIZEOF);
974  }
975 "__asm" {
976  pips_debug(9,"TK_ASM %s\n",yytext);
977  LEXER_RETURN(TK_ASM);
978  }
979 "__va_list" {
980  /* convert __va_list from bsd into __builtin_va_list */
981  pips_debug(5,"TK_VA_LIST %s\n",yytext);
982  yytext = strdup("__builtin_va_list");
983  int t = is_c_keyword_typedef(yytext);
984  pips_assert("builtin valist found\n",t>0);
985  pips_debug(9,"Keyword or typedef name: %s\n",yytext);
986  c_lval.string = strdup(yytext);
987  LEXER_RETURN(t);
988 }
989 
990 "__extension__" {
991  pips_user_warning("gcc extension keyword \"__extension__\" is ignored\n");
992  }
993 "__inline" {
994  pips_user_warning("gcc extension keyword \"__inline\" is ignored\n");
995  }
996 "__inline__" {
997  pips_user_warning("gcc extension keyword \"__inline__\" is ignored\n");
998  }
999 "__signed__" {
1000  pips_user_warning("gcc extension keyword \"__signed__\" is ignored\n");
1001  }
1002 {ident} {
1003  bsd_rewrite(&yytext);
1004  /* C keywords or identifiers */
1005  int t = is_c_parser_keyword_typedef(yytext);
1006  if (t>0)
1007  {
1008  pips_debug(9,"Keyword or typedef name: %s\n",yytext);
1009  c_lval.string = strdup(yytext);
1010  /* Some comments are lost, but not especially in declarations
1011  because type keywords may appear in casts */
1012  if(t==TK_ELSE /*|| (t>=TK_CHAR && t <=TK_REGISTER)*/ )
1013  discard_C_comment();
1014  //No LEXER_RETURN in this case: beware!
1015  return(t);
1016  }
1017  else
1018  {
1019  pips_debug(9,"Identifier: %s\n",yytext);
1020  c_lval.string = strdup(yytext);
1021  /* push_current_C_comment(); */
1022  LEXER_RETURN(TK_IDENT);
1023  }
1024  }
1025 <<EOF>> {
1026  pips_debug(9,"TK_EOF %s\n",yytext);
1027  LEXER_RETURN(TK_EOF);
1028  }
1029 . {
1030  pips_user_warning("Unrecognized character %s\n",yytext);
1031  }
1032 %%
1033 
1034 /* The default function called when a parsing error happens */
1035 void c_error(char * msg)
1036 {
1037  user_warning("yyerror", "C %s near \"%s\" at user line %d (local line %d in the effective file)\n", msg, yytext, get_current_C_line_number(), c_lineno);
1038  CParserError("Parsing failed\n");
1039 }
1040 
1041 
1042 /* Mainly reset the line number and restart on c_in... */
1043 void c_reset_lex(void)
1044 {
1045  c_lineno = 1;
1046 #ifdef FLEX_SCANNER
1047  c_restart(c_in);
1048 #else
1049  /* reset lex... Might be better to read the whole file?
1050  */
1051  /* Huh... What is this ? RK */
1052  syn_sptr = syn_sbuf;
1053 # define MMNEWLINE 10
1054  syn_previous = MMNEWLINE;
1055 #endif
1056 }
1057 
1058 #ifndef c_wrap
1059 int c_wrap() { return 1;}
1060 #endif