From 93ca0217a2aa3047c10518e991ab8578e90829e7 Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Thu, 13 Dec 2012 01:10:46 +0000 Subject: [PATCH] docs: More reST conversion. Sorry for the large commit, but it is much faster to convert in batches. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@170067 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/AddressSanitizer.html | 171 ---- docs/AddressSanitizer.rst | 158 ++++ docs/AnalyzerRegions.html | 260 ------ docs/AnalyzerRegions.rst | 259 ++++++ docs/ClangPlugins.html | 170 ---- docs/ClangPlugins.rst | 149 +++ docs/ClangTools.html | 110 --- docs/ClangTools.rst | 91 ++ docs/HowToSetupToolingForLLVM.html | 212 ----- docs/HowToSetupToolingForLLVM.rst | 211 +++++ docs/IntroductionToTheClangAST.html | 139 --- docs/IntroductionToTheClangAST.rst | 135 +++ docs/JSONCompilationDatabase.html | 89 -- docs/JSONCompilationDatabase.rst | 85 ++ docs/LibASTMatchersTutorial.html | 533 ----------- docs/LibASTMatchersTutorial.rst | 532 +++++++++++ docs/PTHInternals.html | 179 ---- docs/PTHInternals.rst | 164 ++++ docs/RAVFrontendAction.html | 224 ----- docs/RAVFrontendAction.rst | 216 +++++ docs/UsersManual.html | 1338 --------------------------- docs/UsersManual.rst | 1238 +++++++++++++++++++++++++ docs/index.rst | 11 + 23 files changed, 3249 insertions(+), 3425 deletions(-) delete mode 100644 docs/AddressSanitizer.html create mode 100644 docs/AddressSanitizer.rst delete mode 100644 docs/AnalyzerRegions.html create mode 100644 docs/AnalyzerRegions.rst delete mode 100644 docs/ClangPlugins.html create mode 100644 docs/ClangPlugins.rst delete mode 100644 docs/ClangTools.html create mode 100644 docs/ClangTools.rst delete mode 100644 docs/HowToSetupToolingForLLVM.html create mode 100644 docs/HowToSetupToolingForLLVM.rst delete mode 100644 docs/IntroductionToTheClangAST.html create mode 100644 docs/IntroductionToTheClangAST.rst delete mode 100644 docs/JSONCompilationDatabase.html create mode 100644 docs/JSONCompilationDatabase.rst delete mode 100644 docs/LibASTMatchersTutorial.html create mode 100644 docs/LibASTMatchersTutorial.rst delete mode 100644 docs/PTHInternals.html create mode 100644 docs/PTHInternals.rst delete mode 100644 docs/RAVFrontendAction.html create mode 100644 docs/RAVFrontendAction.rst delete mode 100644 docs/UsersManual.html create mode 100644 docs/UsersManual.rst diff --git a/docs/AddressSanitizer.html b/docs/AddressSanitizer.html deleted file mode 100644 index 397eafc2d5..0000000000 --- a/docs/AddressSanitizer.html +++ /dev/null @@ -1,171 +0,0 @@ - - - - - - AddressSanitizer, a fast memory error detector - - - - - - - - -
- -

AddressSanitizer

- - -

Introduction

-AddressSanitizer is a fast memory error detector. -It consists of a compiler instrumentation module and a run-time library. -The tool can detect the following types of bugs: - -Typical slowdown introduced by AddressSanitizer is 2x. - -

How to build

-Follow the clang build instructions. -CMake build is supported.
- -

Usage

-Simply compile and link your program with -fsanitize=address flag.
-The AddressSanitizer run-time library should be linked to the final executable, -so make sure to use clang (not ld) for the final link step.
-When linking shared libraries, the AddressSanitizer run-time is not linked, -so -Wl,-z,defs may cause link errors (don't use it with AddressSanitizer).
- -To get a reasonable performance add -O1 or higher.
-To get nicer stack traces in error messages add --fno-omit-frame-pointer.
-To get perfect stack traces you may need to disable inlining (just use -O1) and tail call -elimination (-fno-optimize-sibling-calls). - -
-% cat example_UseAfterFree.cc
-int main(int argc, char **argv) {
-  int *array = new int[100];
-  delete [] array;
-  return array[argc];  // BOOM
-}
-
- -
-# Compile and link
-% clang -O1 -g -fsanitize=address -fno-omit-frame-pointer example_UseAfterFree.cc
-
-OR -
-# Compile
-% clang -O1 -g -fsanitize=address -fno-omit-frame-pointer -c example_UseAfterFree.cc
-# Link
-% clang -g -fsanitize=address example_UseAfterFree.o
-
- -If a bug is detected, the program will print an error message to stderr and exit with a -non-zero exit code. -Currently, AddressSanitizer does not symbolize its output, so you may need to use a -separate script to symbolize the result offline (this will be fixed in future). -
-% ./a.out 2> log
-% projects/compiler-rt/lib/asan/scripts/asan_symbolize.py / < log | c++filt
-==9442== ERROR: AddressSanitizer heap-use-after-free on address 0x7f7ddab8c084 at pc 0x403c8c bp 0x7fff87fb82d0 sp 0x7fff87fb82c8
-READ of size 4 at 0x7f7ddab8c084 thread T0
-    #0 0x403c8c in main example_UseAfterFree.cc:4
-    #1 0x7f7ddabcac4d in __libc_start_main ??:0
-0x7f7ddab8c084 is located 4 bytes inside of 400-byte region [0x7f7ddab8c080,0x7f7ddab8c210)
-freed by thread T0 here:
-    #0 0x404704 in operator delete[](void*) ??:0
-    #1 0x403c53 in main example_UseAfterFree.cc:4
-    #2 0x7f7ddabcac4d in __libc_start_main ??:0
-previously allocated by thread T0 here:
-    #0 0x404544 in operator new[](unsigned long) ??:0
-    #1 0x403c43 in main example_UseAfterFree.cc:2
-    #2 0x7f7ddabcac4d in __libc_start_main ??:0
-==9442== ABORTING
-
- -AddressSanitizer exits on the first detected error. This is by design. -One reason: it makes the generated code smaller and faster (both by ~5%). -Another reason: this makes fixing bugs unavoidable. With Valgrind, it is often -the case that users treat Valgrind warnings as false positives -(which they are not) and don't fix them. - - -

__has_feature(address_sanitizer)

-In some cases one may need to execute different code depending on whether -AddressSanitizer is enabled. -__has_feature -can be used for this purpose. -
-#if defined(__has_feature)
-# if __has_feature(address_sanitizer)
-  code that builds only under AddressSanitizer
-# endif
-#endif
-
- -

__attribute__((no_address_safety_analysis))

-Some code should not be instrumented by AddressSanitizer. -One may use the function attribute - - no_address_safety_analysis -to disable instrumentation of a particular function. -This attribute may not be supported by other compilers, so we suggest to -use it together with __has_feature(address_sanitizer). -Note: currently, this attribute will be lost if the function is inlined. - -

Supported Platforms

-AddressSanitizer is supported on - -Support for Linux ARM (and Android ARM) is in progress -(it may work, but is not guaranteed too). - - -

Limitations

- - - -

Current Status

-AddressSanitizer is fully functional on supported platforms starting from LLVM 3.1. -The test suite is integrated into CMake build and can be run with -make check-asan command. - -

More Information

-http://code.google.com/p/address-sanitizer. - - -
- - diff --git a/docs/AddressSanitizer.rst b/docs/AddressSanitizer.rst new file mode 100644 index 0000000000..0ee108bd9e --- /dev/null +++ b/docs/AddressSanitizer.rst @@ -0,0 +1,158 @@ +================ +AddressSanitizer +================ + +.. contents:: + :local: + +Introduction +============ + +AddressSanitizer is a fast memory error detector. It consists of a +compiler instrumentation module and a run-time library. The tool can +detect the following types of bugs: + +- Out-of-bounds accesses to heap, stack and globals +- Use-after-free +- Use-after-return (to some extent) +- Double-free, invalid free + +Typical slowdown introduced by AddressSanitizer is **2x**. + +How to build +============ + +Follow the `clang build instructions <../get_started.html>`_. CMake +build is supported. + +Usage +===== + +Simply compile and link your program with ``-fsanitize=address`` flag. +The AddressSanitizer run-time library should be linked to the final +executable, so make sure to use ``clang`` (not ``ld``) for the final +link step. +When linking shared libraries, the AddressSanitizer run-time is not +linked, so ``-Wl,-z,defs`` may cause link errors (don't use it with +AddressSanitizer). +To get a reasonable performance add ``-O1`` or higher. +To get nicer stack traces in error messages add +``-fno-omit-frame-pointer``. +To get perfect stack traces you may need to disable inlining (just use +``-O1``) and tail call elimination (``-fno-optimize-sibling-calls``). + +:: + + % cat example_UseAfterFree.cc + int main(int argc, char **argv) { + int *array = new int[100]; + delete [] array; + return array[argc]; // BOOM + } + +:: + + # Compile and link + % clang -O1 -g -fsanitize=address -fno-omit-frame-pointer example_UseAfterFree.cc + +OR + +:: + + # Compile + % clang -O1 -g -fsanitize=address -fno-omit-frame-pointer -c example_UseAfterFree.cc + # Link + % clang -g -fsanitize=address example_UseAfterFree.o + +If a bug is detected, the program will print an error message to stderr +and exit with a non-zero exit code. Currently, AddressSanitizer does not +symbolize its output, so you may need to use a separate script to +symbolize the result offline (this will be fixed in future). + +:: + + % ./a.out 2> log + % projects/compiler-rt/lib/asan/scripts/asan_symbolize.py / < log | c++filt + ==9442== ERROR: AddressSanitizer heap-use-after-free on address 0x7f7ddab8c084 at pc 0x403c8c bp 0x7fff87fb82d0 sp 0x7fff87fb82c8 + READ of size 4 at 0x7f7ddab8c084 thread T0 + #0 0x403c8c in main example_UseAfterFree.cc:4 + #1 0x7f7ddabcac4d in __libc_start_main ??:0 + 0x7f7ddab8c084 is located 4 bytes inside of 400-byte region [0x7f7ddab8c080,0x7f7ddab8c210) + freed by thread T0 here: + #0 0x404704 in operator delete[](void*) ??:0 + #1 0x403c53 in main example_UseAfterFree.cc:4 + #2 0x7f7ddabcac4d in __libc_start_main ??:0 + previously allocated by thread T0 here: + #0 0x404544 in operator new[](unsigned long) ??:0 + #1 0x403c43 in main example_UseAfterFree.cc:2 + #2 0x7f7ddabcac4d in __libc_start_main ??:0 + ==9442== ABORTING + +AddressSanitizer exits on the first detected error. This is by design. +One reason: it makes the generated code smaller and faster (both by +~5%). Another reason: this makes fixing bugs unavoidable. With Valgrind, +it is often the case that users treat Valgrind warnings as false +positives (which they are not) and don't fix them. + +\_\_has\_feature(address\_sanitizer) +------------------------------------ + +In some cases one may need to execute different code depending on +whether AddressSanitizer is enabled. +`\_\_has\_feature `_ +can be used for this purpose. + +:: + + #if defined(__has_feature) + # if __has_feature(address_sanitizer) + code that builds only under AddressSanitizer + # endif + #endif + +``__attribute__((no_address_safety_analysis))`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some code should not be instrumented by AddressSanitizer. One may use +the function attribute +`no_address_safety_analysis `_ +to disable instrumentation of a particular function. This attribute may +not be supported by other compilers, so we suggest to use it together +with ``__has_feature(address_sanitizer)``. Note: currently, this +attribute will be lost if the function is inlined. + +Supported Platforms +=================== + +AddressSanitizer is supported on + +- Linux i386/x86\_64 (tested on Ubuntu 10.04 and 12.04). +- MacOS 10.6, 10.7 and 10.8 (i386/x86\_64). + +Support for Linux ARM (and Android ARM) is in progress (it may work, but +is not guaranteed too). + +Limitations +=========== + +- AddressSanitizer uses more real memory than a native run. Exact + overhead depends on the allocations sizes. The smaller the + allocations you make the bigger the overhead is. +- AddressSanitizer uses more stack memory. We have seen up to 3x + increase. +- On 64-bit platforms AddressSanitizer maps (but not reserves) 16+ + Terabytes of virtual address space. This means that tools like + ``ulimit`` may not work as usually expected. +- Static linking is not supported. + +Current Status +============== + +AddressSanitizer is fully functional on supported platforms starting +from LLVM 3.1. The test suite is integrated into CMake build and can be +run with ``make check-asan`` command. + +More Information +================ + +`http://code.google.com/p/address-sanitizer `_. diff --git a/docs/AnalyzerRegions.html b/docs/AnalyzerRegions.html deleted file mode 100644 index f9d3337920..0000000000 --- a/docs/AnalyzerRegions.html +++ /dev/null @@ -1,260 +0,0 @@ - - - -Static Analyzer Design Document: Memory Regions - - - -

Static Analyzer Design Document: Memory Regions

- -

Authors

- -

Ted Kremenek, kremenek at apple
-Zhongxing Xu, xuzhongzhing at gmail

- -

Introduction

- -

The path-sensitive analysis engine in libAnalysis employs an extensible API -for abstractly modeling the memory of an analyzed program. This API employs the -concept of "memory regions" to abstractly model chunks of program memory such as -program variables and dynamically allocated memory such as those returned from -'malloc' and 'alloca'. Regions are hierarchical, with subregions modeling -subtyping relationships, field and array offsets into larger chunks of memory, -and so on.

- -

The region API consists of two components:

- -
  • A taxonomy and representation of regions themselves within the analyzer -engine. The primary definitions and interfaces are described in MemRegion.h. -At the root of the region hierarchy is the class MemRegion with -specific subclasses refining the region concept for variables, heap allocated -memory, and so forth.
  • The modeling of binding of values to regions. For -example, modeling the value stored to a local variable x consists of -recording the binding between the region for x (which represents the -raw memory associated with x) and the value stored to x. This -binding relationship is captured with the notion of "symbolic -stores."
- -

Symbolic stores, which can be thought of as representing the relation -regions -> values, are implemented by subclasses of the -StoreManager class (Store.h). A -particular StoreManager implementation has complete flexibility concerning the -following: - -

    -
  • How to model the binding between regions and values
  • -
  • What bindings are recorded -
- -

Together, both points allow different StoreManagers to tradeoff between -different levels of analysis precision and scalability concerning the reasoning -of program memory. Meanwhile, the core path-sensitive engine makes no -assumptions about either points, and queries a StoreManager about the bindings -to a memory region through a generic interface that all StoreManagers share. If -a particular StoreManager cannot reason about the potential bindings of a given -memory region (e.g., 'BasicStoreManager' does not reason about fields -of structures) then the StoreManager can simply return 'unknown' (represented by -'UnknownVal') for a particular region-binding. This separation of -concerns not only isolates the core analysis engine from the details of -reasoning about program memory but also facilities the option of a client of the -path-sensitive engine to easily swap in different StoreManager implementations -that internally reason about program memory in very different ways.

- -

The rest of this document is divided into two parts. We first discuss region -taxonomy and the semantics of regions. We then discuss the StoreManager -interface, and details of how the currently available StoreManager classes -implement region bindings.

- -

Memory Regions and Region Taxonomy

- -

Pointers

- -

Before talking about the memory regions, we would talk about the pointers -since memory regions are essentially used to represent pointer values.

- -

The pointer is a type of values. Pointer values have two semantic aspects. -One is its physical value, which is an address or location. The other is the -type of the memory object residing in the address.

- -

Memory regions are designed to abstract these two properties of the pointer. -The physical value of a pointer is represented by MemRegion pointers. The rvalue -type of the region corresponds to the type of the pointee object.

- -

One complication is that we could have different view regions on the same -memory chunk. They represent the same memory location, but have different -abstract location, i.e., MemRegion pointers. Thus we need to canonicalize the -abstract locations to get a unique abstract location for one physical -location.

- -

Furthermore, these different view regions may or may not represent memory -objects of different types. Some different types are semantically the same, -for example, 'struct s' and 'my_type' are the same type.

- -
-struct s;
-typedef struct s my_type;
-
- -

But char and int are not the same type in the code below:

- -
-void *p;
-int *q = (int*) p;
-char *r = (char*) p;
-
- -

Thus we need to canonicalize the MemRegion which is used in binding and -retrieving.

- -

Regions

-

Region is the entity used to model pointer values. A Region has the following -properties:

- -
    -
  • Kind
  • - -
  • ObjectType: the type of the object residing on the region.
  • - -
  • LocationType: the type of the pointer value that the region corresponds to. - Usually this is the pointer to the ObjectType. But sometimes we want to cache - this type explicitly, for example, for a CodeTextRegion.
  • - -
  • StartLocation
  • - -
  • EndLocation
  • -
- -

Symbolic Regions

- -

A symbolic region is a map of the concept of symbolic values into the domain -of regions. It is the way that we represent symbolic pointers. Whenever a -symbolic pointer value is needed, a symbolic region is created to represent -it.

- -

A symbolic region has no type. It wraps a SymbolData. But sometimes we have -type information associated with a symbolic region. For this case, a -TypedViewRegion is created to layer the type information on top of the symbolic -region. The reason we do not carry type information with the symbolic region is -that the symbolic regions can have no type. To be consistent, we don't let them -to carry type information.

- -

Like a symbolic pointer, a symbolic region may be NULL, has unknown extent, -and represents a generic chunk of memory.

- -

NOTE: We plan not to use loc::SymbolVal in RegionStore and remove it - gradually.

- -

Symbolic regions get their rvalue types through the following ways:

- -
    -
  • Through the parameter or global variable that points to it, e.g.: -
    -void f(struct s* p) {
    -  ...
    -}
    -
    - -

    The symbolic region pointed to by p has type struct -s.

  • - -
  • Through explicit or implicit casts, e.g.: -
    -void f(void* p) {
    -  struct s* q = (struct s*) p;
    -  ...
    -}
    -
    -
  • -
- -

We attach the type information to the symbolic region lazily. For the first -case above, we create the TypedViewRegion only when the pointer is -actually used to access the pointee memory object, that is when the element or -field region is created. For the cast case, the TypedViewRegion is -created when visiting the CastExpr.

- -

The reason for doing lazy typing is that symbolic regions are sometimes only -used to do location comparison.

- -

Pointer Casts

- -

Pointer casts allow people to impose different 'views' onto a chunk of -memory.

- -

Usually we have two kinds of casts. One kind of casts cast down with in the -type hierarchy. It imposes more specific views onto more generic memory regions. -The other kind of casts cast up with in the type hierarchy. It strips away more -specific views on top of the more generic memory regions.

- -

We simulate the down casts by layering another TypedViewRegion on -top of the original region. We simulate the up casts by striping away the top -TypedViewRegion. Down casts is usually simple. For up casts, if the -there is no TypedViewRegion to be stripped, we return the original -region. If the underlying region is of the different type than the cast-to type, -we flag an error state.

- -

For toll-free bridging casts, we return the original region.

- -

We can set up a partial order for pointer types, with the most general type -void* at the top. The partial order forms a tree with void* as -its root node.

- -

Every MemRegion has a root position in the type tree. For example, -the pointee region of void *p has its root position at the root node of -the tree. VarRegion of int x has its root position at the 'int -type' node.

- -

TypedViewRegion is used to move the region down or up in the tree. -Moving down in the tree adds a TypedViewRegion. Moving up in the tree -removes a TypedViewRegion.

- -

Do we want to allow moving up beyond the root position? This happens -when:

 int x; void *p = &x; 
- -

The region of x has its root position at 'int*' node. the cast to -void* moves that region up to the 'void*' node. I propose to not allow such -casts, and assign the region of x for p.

- -

Another non-ideal case is that people might cast to a non-generic pointer -from another non-generic pointer instead of first casting it back to the generic -pointer. Direct handling of this case would result in multiple layers of -TypedViewRegions. This enforces an incorrect semantic view to the region, -because we can only have one typed view on a region at a time. To avoid this -inconsistency, before casting the region, we strip the TypedViewRegion, then do -the cast. In summary, we only allow one layer of TypedViewRegion.

- -

Region Bindings

- -

The following region kinds are boundable: VarRegion, CompoundLiteralRegion, -StringRegion, ElementRegion, FieldRegion, and ObjCIvarRegion.

- -

When binding regions, we perform canonicalization on element regions and field -regions. This is because we can have different views on the same region, some -of which are essentially the same view with different sugar type names.

- -

To canonicalize a region, we get the canonical types for all TypedViewRegions -along the way up to the root region, and make new TypedViewRegions with those -canonical types.

- -

For Objective-C and C++, perhaps another canonicalization rule should be -added: for FieldRegion, the least derived class that has the field is used as -the type of the super region of the FieldRegion.

- -

All bindings and retrievings are done on the canonicalized regions.

- -

Canonicalization is transparent outside the region store manager, and more -specifically, unaware outside the Bind() and Retrieve() method. We don't need to -consider region canonicalization when doing pointer cast.

- -

Constraint Manager

- -

The constraint manager reasons about the abstract location of memory objects. -We can have different views on a region, but none of these views changes the -location of that object. Thus we should get the same abstract location for those -regions.

- - - diff --git a/docs/AnalyzerRegions.rst b/docs/AnalyzerRegions.rst new file mode 100644 index 0000000000..80b3882bc9 --- /dev/null +++ b/docs/AnalyzerRegions.rst @@ -0,0 +1,259 @@ +=============================================== +Static Analyzer Design Document: Memory Regions +=============================================== + +Authors: Ted Kremenek, ``kremenek at apple``, +Zhongxing Xu, ``xuzhongzhing at gmail`` + +Introduction +============ + +The path-sensitive analysis engine in libAnalysis employs an extensible +API for abstractly modeling the memory of an analyzed program. This API +employs the concept of "memory regions" to abstractly model chunks of +program memory such as program variables and dynamically allocated +memory such as those returned from 'malloc' and 'alloca'. Regions are +hierarchical, with subregions modeling subtyping relationships, field +and array offsets into larger chunks of memory, and so on. + +The region API consists of two components: + +- A taxonomy and representation of regions themselves within the + analyzer engine. The primary definitions and interfaces are described + in ``MemRegion.h``. At the root of the region hierarchy is the class + ``MemRegion`` with specific subclasses refining the region concept + for variables, heap allocated memory, and so forth. +- The modeling of binding of values to regions. For example, modeling + the value stored to a local variable ``x`` consists of recording the + binding between the region for ``x`` (which represents the raw memory + associated with ``x``) and the value stored to ``x``. This binding + relationship is captured with the notion of "symbolic stores." + +Symbolic stores, which can be thought of as representing the relation +``regions -> values``, are implemented by subclasses of the +``StoreManager`` class (``Store.h``). A particular StoreManager +implementation has complete flexibility concerning the following: + +- *How* to model the binding between regions and values +- *What* bindings are recorded + +Together, both points allow different StoreManagers to tradeoff between +different levels of analysis precision and scalability concerning the +reasoning of program memory. Meanwhile, the core path-sensitive engine +makes no assumptions about either points, and queries a StoreManager +about the bindings to a memory region through a generic interface that +all StoreManagers share. If a particular StoreManager cannot reason +about the potential bindings of a given memory region (e.g., +'``BasicStoreManager``' does not reason about fields of structures) then +the StoreManager can simply return 'unknown' (represented by +'``UnknownVal``') for a particular region-binding. This separation of +concerns not only isolates the core analysis engine from the details of +reasoning about program memory but also facilities the option of a +client of the path-sensitive engine to easily swap in different +StoreManager implementations that internally reason about program memory +in very different ways. + +The rest of this document is divided into two parts. We first discuss +region taxonomy and the semantics of regions. We then discuss the +StoreManager interface, and details of how the currently available +StoreManager classes implement region bindings. + +Memory Regions and Region Taxonomy +================================== + +Pointers +-------- + +Before talking about the memory regions, we would talk about the +pointers since memory regions are essentially used to represent pointer +values. + +The pointer is a type of values. Pointer values have two semantic +aspects. One is its physical value, which is an address or location. The +other is the type of the memory object residing in the address. + +Memory regions are designed to abstract these two properties of the +pointer. The physical value of a pointer is represented by MemRegion +pointers. The rvalue type of the region corresponds to the type of the +pointee object. + +One complication is that we could have different view regions on the +same memory chunk. They represent the same memory location, but have +different abstract location, i.e., MemRegion pointers. Thus we need to +canonicalize the abstract locations to get a unique abstract location +for one physical location. + +Furthermore, these different view regions may or may not represent +memory objects of different types. Some different types are semantically +the same, for example, 'struct s' and 'my\_type' are the same type. + +:: + + struct s; + typedef struct s my_type; + +But ``char`` and ``int`` are not the same type in the code below: + +:: + + void *p; + int *q = (int*) p; + char *r = (char*) p; + +Thus we need to canonicalize the MemRegion which is used in binding and +retrieving. + +Regions +------- + +Region is the entity used to model pointer values. A Region has the +following properties: + +- Kind +- ObjectType: the type of the object residing on the region. +- LocationType: the type of the pointer value that the region + corresponds to. Usually this is the pointer to the ObjectType. But + sometimes we want to cache this type explicitly, for example, for a + CodeTextRegion. +- StartLocation +- EndLocation + +Symbolic Regions +---------------- + +A symbolic region is a map of the concept of symbolic values into the +domain of regions. It is the way that we represent symbolic pointers. +Whenever a symbolic pointer value is needed, a symbolic region is +created to represent it. + +A symbolic region has no type. It wraps a SymbolData. But sometimes we +have type information associated with a symbolic region. For this case, +a TypedViewRegion is created to layer the type information on top of the +symbolic region. The reason we do not carry type information with the +symbolic region is that the symbolic regions can have no type. To be +consistent, we don't let them to carry type information. + +Like a symbolic pointer, a symbolic region may be NULL, has unknown +extent, and represents a generic chunk of memory. + +.. note:: + We plan not to use loc::SymbolVal in RegionStore and remove it + gradually. + +Symbolic regions get their rvalue types through the following ways: + +- Through the parameter or global variable that points to it, e.g.: + + :: + + void f(struct s* p) { + ... + } + + The symbolic region pointed to by ``p`` has type ``struct s``. + +- Through explicit or implicit casts, e.g.: + + :: + + void f(void* p) { + struct s* q = (struct s*) p; + ... + } + +We attach the type information to the symbolic region lazily. For the +first case above, we create the ``TypedViewRegion`` only when the +pointer is actually used to access the pointee memory object, that is +when the element or field region is created. For the cast case, the +``TypedViewRegion`` is created when visiting the ``CastExpr``. + +The reason for doing lazy typing is that symbolic regions are sometimes +only used to do location comparison. + +Pointer Casts +------------- + +Pointer casts allow people to impose different 'views' onto a chunk of +memory. + +Usually we have two kinds of casts. One kind of casts cast down with in +the type hierarchy. It imposes more specific views onto more generic +memory regions. The other kind of casts cast up with in the type +hierarchy. It strips away more specific views on top of the more generic +memory regions. + +We simulate the down casts by layering another ``TypedViewRegion`` on +top of the original region. We simulate the up casts by striping away +the top ``TypedViewRegion``. Down casts is usually simple. For up casts, +if the there is no ``TypedViewRegion`` to be stripped, we return the +original region. If the underlying region is of the different type than +the cast-to type, we flag an error state. + +For toll-free bridging casts, we return the original region. + +We can set up a partial order for pointer types, with the most general +type ``void*`` at the top. The partial order forms a tree with ``void*`` +as its root node. + +Every ``MemRegion`` has a root position in the type tree. For example, +the pointee region of ``void *p`` has its root position at the root node +of the tree. ``VarRegion`` of ``int x`` has its root position at the +'int type' node. + +``TypedViewRegion`` is used to move the region down or up in the tree. +Moving down in the tree adds a ``TypedViewRegion``. Moving up in the +tree removes a ``TypedViewRegion``. + +Do we want to allow moving up beyond the root position? This happens +when: + +:: + + int x; void *p = &x; + +The region of ``x`` has its root position at 'int\*' node. the cast to +void\* moves that region up to the 'void\*' node. I propose to not allow +such casts, and assign the region of ``x`` for ``p``. + +Another non-ideal case is that people might cast to a non-generic +pointer from another non-generic pointer instead of first casting it +back to the generic pointer. Direct handling of this case would result +in multiple layers of TypedViewRegions. This enforces an incorrect +semantic view to the region, because we can only have one typed view on +a region at a time. To avoid this inconsistency, before casting the +region, we strip the TypedViewRegion, then do the cast. In summary, we +only allow one layer of TypedViewRegion. + +Region Bindings +--------------- + +The following region kinds are boundable: VarRegion, +CompoundLiteralRegion, StringRegion, ElementRegion, FieldRegion, and +ObjCIvarRegion. + +When binding regions, we perform canonicalization on element regions and +field regions. This is because we can have different views on the same +region, some of which are essentially the same view with different sugar +type names. + +To canonicalize a region, we get the canonical types for all +TypedViewRegions along the way up to the root region, and make new +TypedViewRegions with those canonical types. + +For Objective-C and C++, perhaps another canonicalization rule should be +added: for FieldRegion, the least derived class that has the field is +used as the type of the super region of the FieldRegion. + +All bindings and retrievings are done on the canonicalized regions. + +Canonicalization is transparent outside the region store manager, and +more specifically, unaware outside the Bind() and Retrieve() method. We +don't need to consider region canonicalization when doing pointer cast. + +Constraint Manager +------------------ + +The constraint manager reasons about the abstract location of memory +objects. We can have different views on a region, but none of these +views changes the location of that object. Thus we should get the same +abstract location for those regions. diff --git a/docs/ClangPlugins.html b/docs/ClangPlugins.html deleted file mode 100644 index ed560fe840..0000000000 --- a/docs/ClangPlugins.html +++ /dev/null @@ -1,170 +0,0 @@ - - - -Clang Plugins - - - - - - - -
- -

Clang Plugins

-

Clang Plugins make it possible to run extra user defined actions during -a compilation. This document will provide a basic walkthrough of how to write -and run a Clang Plugin.

- - -

Introduction

- - -

Clang Plugins run FrontendActions over code. See the -FrontendAction tutorial on how to write a -FrontendAction using the RecursiveASTVisitor. In this tutorial, we'll -demonstrate how to write a simple clang plugin. -

- - -

Writing a PluginASTAction

- - -

The main difference from writing normal FrontendActions is that you can -handle plugin command line options. The -PluginASTAction base class declares a ParseArgs method which you have to -implement in your plugin. -

-
-  bool ParseArgs(const CompilerInstance &CI,
-                 const std::vector<std::string>& args) {
-    for (unsigned i = 0, e = args.size(); i != e; ++i) {
-      if (args[i] == "-some-arg") {
-        // Handle the command line argument.
-      }
-    }
-    return true;
-  }
-
- - -

Registering a plugin

- - -

A plugin is loaded from a dynamic library at runtime by the compiler. To register -a plugin in a library, use FrontendPluginRegistry::Add:

-
-  static FrontendPluginRegistry::Add<MyPlugin> X("my-plugin-name", "my plugin description");
-
- - -

Putting it all together

- - -

Let's look at an example plugin that prints top-level function names. -This example is also checked into the clang repository; please also take a look -at the latest checked in version of PrintFunctionNames.cpp.

-
-#include "clang/Frontend/FrontendPluginRegistry.h"
-#include "clang/AST/ASTConsumer.h"
-#include "clang/AST/AST.h"
-#include "clang/Frontend/CompilerInstance.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace clang;
-
-namespace {
-
-class PrintFunctionsConsumer : public ASTConsumer {
-public:
-  virtual bool HandleTopLevelDecl(DeclGroupRef DG) {
-    for (DeclGroupRef::iterator i = DG.begin(), e = DG.end(); i != e; ++i) {
-      const Decl *D = *i;
-      if (const NamedDecl *ND = dyn_cast<NamedDecl>(D))
-        llvm::errs() << "top-level-decl: \"" << ND->getNameAsString() << "\"\n";
-    }
-
-    return true;
-  }
-};
-
-class PrintFunctionNamesAction : public PluginASTAction {
-protected:
-  ASTConsumer *CreateASTConsumer(CompilerInstance &CI, llvm::StringRef) {
-    return new PrintFunctionsConsumer();
-  }
-
-  bool ParseArgs(const CompilerInstance &CI,
-                 const std::vector<std::string>& args) {
-    for (unsigned i = 0, e = args.size(); i != e; ++i) {
-      llvm::errs() << "PrintFunctionNames arg = " << args[i] << "\n";
-
-      // Example error handling.
-      if (args[i] == "-an-error") {
-        DiagnosticsEngine &D = CI.getDiagnostics();
-        unsigned DiagID = D.getCustomDiagID(
-          DiagnosticsEngine::Error, "invalid argument '" + args[i] + "'");
-        D.Report(DiagID);
-        return false;
-      }
-    }
-    if (args.size() && args[0] == "help")
-      PrintHelp(llvm::errs());
-
-    return true;
-  }
-  void PrintHelp(llvm::raw_ostream& ros) {
-    ros << "Help for PrintFunctionNames plugin goes here\n";
-  }
-
-};
-
-}
-
-static FrontendPluginRegistry::Add<PrintFunctionNamesAction>
-X("print-fns", "print function names");
-
- - -

Running the plugin

- - -

To run a plugin, the dynamic library containing the plugin registry must be -loaded via the -load command line option. This will load all plugins that are -registered, and you can select the plugins to run by specifying the -plugin -option. Additional parameters for the plugins can be passed with -plugin-arg-<plugin-name>.

- -

Note that those options must reach clang's cc1 process. There are two -ways to do so:

-
    -
  • -Directly call the parsing process by using the -cc1 option; this has the -downside of not configuring the default header search paths, so you'll need to -specify the full system path configuration on the command line. -
  • -
  • -Use clang as usual, but prefix all arguments to the cc1 process with -Xclang. -
  • -
-

For example, to run the print-function-names plugin over a source file in clang, -first build the plugin, and then call clang with the plugin from the source tree:

-
-  $ export BD=/path/to/build/directory
-  $ (cd $BD && make PrintFunctionNames )
-  $ clang++ -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS \
-        -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -D_GNU_SOURCE \
-        -I$BD/tools/clang/include -Itools/clang/include -I$BD/include -Iinclude \
-        tools/clang/tools/clang-check/ClangCheck.cpp -fsyntax-only \
-        -Xclang -load -Xclang $BD/lib/PrintFunctionNames.so -Xclang \
-        -plugin -Xclang print-fns
-
- -

Also see the print-function-name plugin example's -README

- - - -
- - - diff --git a/docs/ClangPlugins.rst b/docs/ClangPlugins.rst new file mode 100644 index 0000000000..cf53817229 --- /dev/null +++ b/docs/ClangPlugins.rst @@ -0,0 +1,149 @@ +============= +Clang Plugins +============= + +Clang Plugins make it possible to run extra user defined actions during +a compilation. This document will provide a basic walkthrough of how to +write and run a Clang Plugin. + +Introduction +============ + +Clang Plugins run FrontendActions over code. See the :doc:`FrontendAction +tutorial ` on how to write a FrontendAction +using the RecursiveASTVisitor. In this tutorial, we'll demonstrate how +to write a simple clang plugin. + +Writing a PluginASTAction +========================= + +The main difference from writing normal FrontendActions is that you can +handle plugin command line options. The PluginASTAction base class +declares a ParseArgs method which you have to implement in your plugin. + +:: + + bool ParseArgs(const CompilerInstance &CI, + const std::vector& args) { + for (unsigned i = 0, e = args.size(); i != e; ++i) { + if (args[i] == "-some-arg") { + // Handle the command line argument. + } + } + return true; + } + +Registering a plugin +==================== + +A plugin is loaded from a dynamic library at runtime by the compiler. To +register a plugin in a library, use FrontendPluginRegistry::Add: + +:: + + static FrontendPluginRegistry::Add X("my-plugin-name", "my plugin description"); + +Putting it all together +======================= + +Let's look at an example plugin that prints top-level function names. +This example is also checked into the clang repository; please also take +a look at the latest `checked in version of +PrintFunctionNames.cpp `_. + +:: + + #include "clang/Frontend/FrontendPluginRegistry.h" + #include "clang/AST/ASTConsumer.h" + #include "clang/AST/AST.h" + #include "clang/Frontend/CompilerInstance.h" + #include "llvm/Support/raw_ostream.h" + using namespace clang; + + namespace { + + class PrintFunctionsConsumer : public ASTConsumer { + public: + virtual bool HandleTopLevelDecl(DeclGroupRef DG) { + for (DeclGroupRef::iterator i = DG.begin(), e = DG.end(); i != e; ++i) { + const Decl *D = *i; + if (const NamedDecl *ND = dyn_cast(D)) + llvm::errs() << "top-level-decl: \"" << ND->getNameAsString() << "\"\n"; + } + + return true; + } + }; + + class PrintFunctionNamesAction : public PluginASTAction { + protected: + ASTConsumer *CreateASTConsumer(CompilerInstance &CI, llvm::StringRef) { + return new PrintFunctionsConsumer(); + } + + bool ParseArgs(const CompilerInstance &CI, + const std::vector& args) { + for (unsigned i = 0, e = args.size(); i != e; ++i) { + llvm::errs() << "PrintFunctionNames arg = " << args[i] << "\n"; + + // Example error handling. + if (args[i] == "-an-error") { + DiagnosticsEngine &D = CI.getDiagnostics(); + unsigned DiagID = D.getCustomDiagID( + DiagnosticsEngine::Error, "invalid argument '" + args[i] + "'"); + D.Report(DiagID); + return false; + } + } + if (args.size() && args[0] == "help") + PrintHelp(llvm::errs()); + + return true; + } + void PrintHelp(llvm::raw_ostream& ros) { + ros << "Help for PrintFunctionNames plugin goes here\n"; + } + + }; + + } + + static FrontendPluginRegistry::Add + X("print-fns", "print function names"); + +Running the plugin +================== + +To run a plugin, the dynamic library containing the plugin registry must +be loaded via the -load command line option. This will load all plugins +that are registered, and you can select the plugins to run by specifying +the -plugin option. Additional parameters for the plugins can be passed +with -plugin-arg-. + +Note that those options must reach clang's cc1 process. There are two +ways to do so: + +- Directly call the parsing process by using the -cc1 option; this has + the downside of not configuring the default header search paths, so + you'll need to specify the full system path configuration on the + command line. +- Use clang as usual, but prefix all arguments to the cc1 process with + -Xclang. + +For example, to run the print-function-names plugin over a source file +in clang, first build the plugin, and then call clang with the plugin +from the source tree: + +:: + + $ export BD=/path/to/build/directory + $ (cd $BD && make PrintFunctionNames ) + $ clang++ -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS \ + -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -D_GNU_SOURCE \ + -I$BD/tools/clang/include -Itools/clang/include -I$BD/include -Iinclude \ + tools/clang/tools/clang-check/ClangCheck.cpp -fsyntax-only \ + -Xclang -load -Xclang $BD/lib/PrintFunctionNames.so -Xclang \ + -plugin -Xclang print-fns + +Also see the print-function-name plugin example's +`README `_ diff --git a/docs/ClangTools.html b/docs/ClangTools.html deleted file mode 100644 index 4de57bd218..0000000000 --- a/docs/ClangTools.html +++ /dev/null @@ -1,110 +0,0 @@ - - - -Clang Tools - - - - - - - -
- -

Clang Tools

-

Clang Tools are standalone command line (and potentially GUI) tools design -for use by C++ developers who are already using and enjoying Clang as their -compiler. These tools provide developer-oriented functionality such as fast -syntax checking, automatic formatting, refactoring, etc.

- -

Only a couple of the most basic and fundamental tools are kept in the primary -Clang Subversion project. The rest of the tools are kept in a side-project so -that developers who don't want or need to build them don't. If you want to get -access to the extra Clang Tools repository, simply check it out into the tools -tree of your Clang checkout and follow the usual process for building and -working with a combined LLVM/Clang checkout:

-
    -
  • With Subversion: -
      -
    • cd llvm/tools/clang/tools
    • -
    • svn co http://llvm.org/svn/llvm-project/clang-tools-extra/trunk - extra
    • -
    -
  • -
  • Or with Git: -
      -
    • cd llvm/tools/clang/tools
    • -
    • git clone http://llvm.org/git/clang-tools-extra.git extra
    • -
    -
  • -
- -

This document describes a high-level overview of the organization of Clang -Tools within the project as well as giving an introduction to some of the more -important tools. However, it should be noted that this document is currently -focused on Clang and Clang Tool developers, not on end users of these tools.

- - -

Clang Tools Organization

- - -

Clang Tools are CLI or GUI programs that are intended to be directly used by -C++ developers. That is they are not primarily for use by Clang -developers, although they are hopefully useful to C++ developers who happen to -work on Clang, and we try to actively dogfood their functionality. They are -developed in three components: the underlying infrastructure for building -a standalone tool based on Clang, core shared logic used by many different tools -in the form of refactoring and rewriting libraries, and the tools -themselves.

- -

The underlying infrastructure for Clang Tools is the -LibTooling platform. See its documentation for -much more detailed information about how this infrastructure works. The common -refactoring and rewriting toolkit-style library is also part of LibTooling -organizationally.

- -

A few Clang Tools are developed along side the core Clang libraries as -examples and test cases of fundamental functionality. However, most of the tools -are developed in a side repository to provide easy separation from the core -libraries. We intentionally do not support public libraries in the side -repository, as we want to carefully review and find good APIs for libraries as -they are lifted out of a few tools and into the core Clang library set.

- -

Regardless of which repository Clang Tools' code resides in, the development -process and practices for all Clang Tools are exactly those of Clang itself. -They are entirely within the Clang project, regardless of the version -control scheme.

- - - -

Core Clang Tools

- - -

The core set of Clang tools that are within the main repository are tools -that very specifically compliment, and allow use and testing of Clang -specific functionality.

- -

clang-check

-

This tool combines the LibTooling framework for running a Clang tool with the -basic Clang diagnostics by syntax checking specific files in a fast, command -line interface. It can also accept flags to re-display the diagnostics in -different formats with different flags, suitable for use driving an IDE or -editor. Furthermore, it can be used in fixit-mode to directly apply fixit-hints -offered by clang.

- -

FIXME: Link to user-oriented clang-check documentation.

- - -

Extra Clang Tools

- - -

As various categories of Clang Tools are added to the extra repository, -they'll be tracked here. The focus of this documentation is on the scope and -features of the tools for other tool developers; each tool should provide its -own user-focused documentation.

- -
- - - diff --git a/docs/ClangTools.rst b/docs/ClangTools.rst new file mode 100644 index 0000000000..f3bdb0742f --- /dev/null +++ b/docs/ClangTools.rst @@ -0,0 +1,91 @@ +=========== +Clang Tools +=========== + +Clang Tools are standalone command line (and potentially GUI) tools +design for use by C++ developers who are already using and enjoying +Clang as their compiler. These tools provide developer-oriented +functionality such as fast syntax checking, automatic formatting, +refactoring, etc. + +Only a couple of the most basic and fundamental tools are kept in the +primary Clang Subversion project. The rest of the tools are kept in a +side-project so that developers who don't want or need to build them +don't. If you want to get access to the extra Clang Tools repository, +simply check it out into the tools tree of your Clang checkout and +follow the usual process for building and working with a combined +LLVM/Clang checkout: + +- With Subversion: + + - ``cd llvm/tools/clang/tools`` + - ``svn co http://llvm.org/svn/llvm-project/clang-tools-extra/trunk extra`` + +- Or with Git: + + - ``cd llvm/tools/clang/tools`` + - ``git clone http://llvm.org/git/clang-tools-extra.git extra`` + +This document describes a high-level overview of the organization of +Clang Tools within the project as well as giving an introduction to some +of the more important tools. However, it should be noted that this +document is currently focused on Clang and Clang Tool developers, not on +end users of these tools. + +Clang Tools Organization +======================== + +Clang Tools are CLI or GUI programs that are intended to be directly +used by C++ developers. That is they are *not* primarily for use by +Clang developers, although they are hopefully useful to C++ developers +who happen to work on Clang, and we try to actively dogfood their +functionality. They are developed in three components: the underlying +infrastructure for building a standalone tool based on Clang, core +shared logic used by many different tools in the form of refactoring and +rewriting libraries, and the tools themselves. + +The underlying infrastructure for Clang Tools is the +:doc:`LibTooling ` platform. See its documentation for much +more detailed information about how this infrastructure works. The +common refactoring and rewriting toolkit-style library is also part of +LibTooling organizationally. + +A few Clang Tools are developed along side the core Clang libraries as +examples and test cases of fundamental functionality. However, most of +the tools are developed in a side repository to provide easy separation +from the core libraries. We intentionally do not support public +libraries in the side repository, as we want to carefully review and +find good APIs for libraries as they are lifted out of a few tools and +into the core Clang library set. + +Regardless of which repository Clang Tools' code resides in, the +development process and practices for all Clang Tools are exactly those +of Clang itself. They are entirely within the Clang *project*, +regardless of the version control scheme. + +Core Clang Tools +================ + +The core set of Clang tools that are within the main repository are +tools that very specifically compliment, and allow use and testing of +*Clang* specific functionality. + +``clang-check`` +~~~~~~~~~~~~~~~ + +This tool combines the LibTooling framework for running a Clang tool +with the basic Clang diagnostics by syntax checking specific files in a +fast, command line interface. It can also accept flags to re-display the +diagnostics in different formats with different flags, suitable for use +driving an IDE or editor. Furthermore, it can be used in fixit-mode to +directly apply fixit-hints offered by clang. + +FIXME: Link to user-oriented clang-check documentation. + +Extra Clang Tools +================= + +As various categories of Clang Tools are added to the extra repository, +they'll be tracked here. The focus of this documentation is on the scope +and features of the tools for other tool developers; each tool should +provide its own user-focused documentation. diff --git a/docs/HowToSetupToolingForLLVM.html b/docs/HowToSetupToolingForLLVM.html deleted file mode 100644 index 022ed9ce9c..0000000000 --- a/docs/HowToSetupToolingForLLVM.html +++ /dev/null @@ -1,212 +0,0 @@ - - - -How To Setup Clang Tooling For LLVM - - - - - - - -
- -

How To Setup Clang Tooling For LLVM

-

Clang Tooling provides infrastructure to write tools that need syntactic and -semantic infomation about a program. This term also relates to a set of specific -tools using this infrastructure (e.g. clang-check). This document -provides information on how to set up and use Clang Tooling for the LLVM source -code.

- - - -

Introduction

- - -

Clang Tooling needs a compilation database to figure out specific build -options for each file. Currently it can create a compilation database from the -compilation_commands.json file, generated by CMake. When invoking -clang tools, you can either specify a path to a build directory using a command -line parameter -p or let Clang Tooling find this file in your -source tree. In either case you need to configure your build using CMake to use -clang tools.

- - -

Setup Clang Tooling Using CMake and Make

- - -

If you intend to use make to build LLVM, you should have CMake 2.8.6 or later -installed (can be found here).

-

First, you need to generate Makefiles for LLVM with CMake. You need to make -a build directory and run CMake from it:

-
-  mkdir your/build/directory
-  cd your/build/directory
-  cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources
-
- -

If you want to use clang instead of GCC, you can add --DCMAKE_C_COMPILER=/path/to/clang - -DCMAKE_CXX_COMPILER=/path/to/clang++. -You can also use ccmake, which provides a curses interface to configure CMake -variables for lazy people.

- -

As a result, the new compile_commands.json file should appear in -the current directory. You should link it to the LLVM source tree so that Clang -Tooling is able to use it:

-
-  ln -s $PWD/compile_commands.json path/to/llvm/source/
-
- -

Now you are ready to build and test LLVM using make:

-
-  make check-all
-
- - -

Using Clang Tools

- - -

After you completed the previous steps, you are ready to run clang tools. If -you have a recent clang installed, you should have clang-check in -$PATH. Try to run it on any .cpp file inside the LLVM source tree:

-
-  clang-check tools/clang/lib/Tooling/CompilationDatabase.cpp
-
-

If you're using vim, it's convenient to have clang-check integrated. Put this -into your .vimrc:

-
-function! ClangCheckImpl(cmd)
-  if &autowrite | wall | endif
-  echo "Running " . a:cmd . " ..."
-  let l:output = system(a:cmd)
-  cexpr l:output
-  cwindow
-  let w:quickfix_title = a:cmd
-  if v:shell_error != 0
-    cc
-  endif
-  let g:clang_check_last_cmd = a:cmd
-endfunction
-
-function! ClangCheck()
-  let l:filename = expand('%')
-  if l:filename =~ '\.\(cpp\|cxx\|cc\|c\)$'
-    call ClangCheckImpl("clang-check " . l:filename)
-  elseif exists("g:clang_check_last_cmd")
-    call ClangCheckImpl(g:clang_check_last_cmd)
-  else
-    echo "Can't detect file's compilation arguments and no previous clang-check invocation!"
-  endif
-endfunction
-
-nmap <silent> <F5> :call ClangCheck()<CR><CR>
-
- -

When editing a .cpp/.cxx/.cc/.c file, hit F5 to reparse the file. In case -the current file has a different extension (for example, .h), F5 will re-run -the last clang-check invocation made from this vim instance (if any). The -output will go into the error window, which is opened automatically when -clang-check finds errors, and can be re-opened with :cope.

- -

Other clang-check options that can be useful when working with -clang AST:

-
    -
  • -ast-print - Build ASTs and then pretty-print them.
  • -
  • -ast-dump - Build ASTs and then debug dump them.
  • -
  • -ast-dump-filter=<string> - Use with - -ast-dump or -ast-print to dump/print - only AST declaration nodes having a certain substring in a qualified name. - Use -ast-list to list all filterable declaration node - names.
  • -
  • -ast-list - Build ASTs and print the list of declaration - node qualified names.
  • -
-

Examples:

-
-$ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-dump -ast-dump-filter ActionFactory::newASTConsumer
-Processing: tools/clang/tools/clang-check/ClangCheck.cpp.
-Dumping ::ActionFactory::newASTConsumer:
-clang::ASTConsumer *newASTConsumer() (CompoundStmt 0x44da290 </home/alexfh/local/llvm/tools/clang/tools/clang-check/ClangCheck.cpp:64:40, line:72:3>
-  (IfStmt 0x44d97c8 <line:65:5, line:66:45>
-    <<<NULL>>>
-      (ImplicitCastExpr 0x44d96d0 <line:65:9> '_Bool':'_Bool' <UserDefinedConversion>
-...
-$ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-print -ast-dump-filter ActionFactory::newASTConsumer
-Processing: tools/clang/tools/clang-check/ClangCheck.cpp.
-Printing <anonymous namespace>::ActionFactory::newASTConsumer:
-clang::ASTConsumer *newASTConsumer() {
-    if (this->ASTList.operator _Bool())
-        return clang::CreateASTDeclNodeLister();
-    if (this->ASTDump.operator _Bool())
-        return clang::CreateASTDumper(this->ASTDumpFilter);
-    if (this->ASTPrint.operator _Bool())
-        return clang::CreateASTPrinter(&llvm::outs(), this->ASTDumpFilter);
-    return new clang::ASTConsumer();
-}
-
- - -

(Experimental) Using Ninja Build System

- - -

Optionally you can use the Ninja build system instead of -make. It is aimed at making your builds faster. Currently this step will require -building Ninja from sources and using a development version of CMake.

-

To take advantage of using Clang Tools along with Ninja build you need at -least CMake 2.8.9. At the moment CMake 2.8.9 is still under development, so you -can get latest development sources and build it yourself:

-
-  git clone git://cmake.org/cmake.git
-  cd cmake
-  ./bootstrap
-  make
-  sudo make install
-
- -

Having the correct version of CMake, you can clone the Ninja git repository -and build Ninja from sources:

-
-  git clone git://github.com/martine/ninja.git
-  cd ninja/
-  ./bootstrap.py
-
-

This will result in a single binary ninja in the current -directory. It doesn't require installation and can just be copied to any -location inside $PATH, say /usr/local/bin/:

-
-  sudo cp ninja /usr/local/bin/
-  sudo chmod a+rx /usr/local/bin/ninja
-
-

After doing all of this, you'll need to generate Ninja build files for LLVM -with CMake. You need to make a build directory and run CMake from it:

-
-  mkdir your/build/directory
-  cd your/build/directory
-  cmake -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources
-
- -

If you want to use clang instead of GCC, you can add --DCMAKE_C_COMPILER=/path/to/clang - -DCMAKE_CXX_COMPILER=/path/to/clang++. -You can also use ccmake, which provides a curses interface to configure CMake -variables in an interactive manner.

- -

As a result, the new compile_commands.json file should appear in -the current directory. You should link it to the LLVM source tree so that Clang -Tooling is able to use it:

-
-  ln -s $PWD/compile_commands.json path/to/llvm/source/
-
- -

Now you are ready to build and test LLVM using Ninja:

-
-  ninja check-all
-
-

Other target names can be used in the same way as with make.

-
- - - diff --git a/docs/HowToSetupToolingForLLVM.rst b/docs/HowToSetupToolingForLLVM.rst new file mode 100644 index 0000000000..0c4cccafca --- /dev/null +++ b/docs/HowToSetupToolingForLLVM.rst @@ -0,0 +1,211 @@ +=================================== +How To Setup Clang Tooling For LLVM +=================================== + +Clang Tooling provides infrastructure to write tools that need syntactic +and semantic infomation about a program. This term also relates to a set +of specific tools using this infrastructure (e.g. ``clang-check``). This +document provides information on how to set up and use Clang Tooling for +the LLVM source code. + +Introduction +============ + +Clang Tooling needs a compilation database to figure out specific build +options for each file. Currently it can create a compilation database +from the ``compilation_commands.json`` file, generated by CMake. When +invoking clang tools, you can either specify a path to a build directory +using a command line parameter ``-p`` or let Clang Tooling find this +file in your source tree. In either case you need to configure your +build using CMake to use clang tools. + +Setup Clang Tooling Using CMake and Make +======================================== + +If you intend to use make to build LLVM, you should have CMake 2.8.6 or +later installed (can be found `here `_). + +First, you need to generate Makefiles for LLVM with CMake. You need to +make a build directory and run CMake from it: + +:: + + mkdir your/build/directory + cd your/build/directory + cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources + +If you want to use clang instead of GCC, you can add +``-DCMAKE_C_COMPILER=/path/to/clang -DCMAKE_CXX_COMPILER=/path/to/clang++``. +You can also use ccmake, which provides a curses interface to configure +CMake variables for lazy people. + +As a result, the new ``compile_commands.json`` file should appear in the +current directory. You should link it to the LLVM source tree so that +Clang Tooling is able to use it: + +:: + + ln -s $PWD/compile_commands.json path/to/llvm/source/ + +Now you are ready to build and test LLVM using make: + +:: + + make check-all + +Using Clang Tools +================= + +After you completed the previous steps, you are ready to run clang +tools. If you have a recent clang installed, you should have +``clang-check`` in $PATH. Try to run it on any .cpp file inside the LLVM +source tree: + +:: + + clang-check tools/clang/lib/Tooling/CompilationDatabase.cpp + +If you're using vim, it's convenient to have clang-check integrated. Put +this into your .vimrc: + +:: + + function! ClangCheckImpl(cmd) + if &autowrite | wall | endif + echo "Running " . a:cmd . " ..." + let l:output = system(a:cmd) + cexpr l:output + cwindow + let w:quickfix_title = a:cmd + if v:shell_error != 0 + cc + endif + let g:clang_check_last_cmd = a:cmd + endfunction + + function! ClangCheck() + let l:filename = expand('%') + if l:filename =~ '\.\(cpp\|cxx\|cc\|c\)$' + call ClangCheckImpl("clang-check " . l:filename) + elseif exists("g:clang_check_last_cmd") + call ClangCheckImpl(g:clang_check_last_cmd) + else + echo "Can't detect file's compilation arguments and no previous clang-check invocation!" + endif + endfunction + + nmap :call ClangCheck() + +When editing a .cpp/.cxx/.cc/.c file, hit F5 to reparse the file. In +case the current file has a different extension (for example, .h), F5 +will re-run the last clang-check invocation made from this vim instance +(if any). The output will go into the error window, which is opened +automatically when clang-check finds errors, and can be re-opened with +``:cope``. + +Other ``clang-check`` options that can be useful when working with clang +AST: + +- ``-ast-print`` - Build ASTs and then pretty-print them. +- ``-ast-dump`` - Build ASTs and then debug dump them. +- ``-ast-dump-filter=`` - Use with ``-ast-dump`` or + ``-ast-print`` to dump/print only AST declaration nodes having a + certain substring in a qualified name. Use ``-ast-list`` to list all + filterable declaration node names. +- ``-ast-list`` - Build ASTs and print the list of declaration node + qualified names. + +Examples: + +:: + + $ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-dump -ast-dump-filter ActionFactory::newASTConsumer + Processing: tools/clang/tools/clang-check/ClangCheck.cpp. + Dumping ::ActionFactory::newASTConsumer: + clang::ASTConsumer *newASTConsumer() (CompoundStmt 0x44da290 + (IfStmt 0x44d97c8 + <<>> + (ImplicitCastExpr 0x44d96d0 '_Bool':'_Bool' + ... + $ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-print -ast-dump-filter ActionFactory::newASTConsumer + Processing: tools/clang/tools/clang-check/ClangCheck.cpp. + Printing ::ActionFactory::newASTConsumer: + clang::ASTConsumer *newASTConsumer() { + if (this->ASTList.operator _Bool()) + return clang::CreateASTDeclNodeLister(); + if (this->ASTDump.operator _Bool()) + return clang::CreateASTDumper(this->ASTDumpFilter); + if (this->ASTPrint.operator _Bool()) + return clang::CreateASTPrinter(&llvm::outs(), this->ASTDumpFilter); + return new clang::ASTConsumer(); + } + +(Experimental) Using Ninja Build System +======================================= + +Optionally you can use the `Ninja `_ +build system instead of make. It is aimed at making your builds faster. +Currently this step will require building Ninja from sources and using a +development version of CMake. + +To take advantage of using Clang Tools along with Ninja build you need +at least CMake 2.8.9. At the moment CMake 2.8.9 is still under +development, so you can get latest development sources and build it +yourself: + +:: + + git clone git://cmake.org/cmake.git + cd cmake + ./bootstrap + make + sudo make install + +Having the correct version of CMake, you can clone the Ninja git +repository and build Ninja from sources: + +:: + + git clone git://github.com/martine/ninja.git + cd ninja/ + ./bootstrap.py + +This will result in a single binary ``ninja`` in the current directory. +It doesn't require installation and can just be copied to any location +inside ``$PATH``, say ``/usr/local/bin/``: + +:: + + sudo cp ninja /usr/local/bin/ + sudo chmod a+rx /usr/local/bin/ninja + +After doing all of this, you'll need to generate Ninja build files for +LLVM with CMake. You need to make a build directory and run CMake from +it: + +:: + + mkdir your/build/directory + cd your/build/directory + cmake -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources + +If you want to use clang instead of GCC, you can add +``-DCMAKE_C_COMPILER=/path/to/clang -DCMAKE_CXX_COMPILER=/path/to/clang++``. +You can also use ccmake, which provides a curses interface to configure +CMake variables in an interactive manner. + +As a result, the new ``compile_commands.json`` file should appear in the +current directory. You should link it to the LLVM source tree so that +Clang Tooling is able to use it: + +:: + + ln -s $PWD/compile_commands.json path/to/llvm/source/ + +Now you are ready to build and test LLVM using Ninja: + +:: + + ninja check-all + +Other target names can be used in the same way as with make. diff --git a/docs/IntroductionToTheClangAST.html b/docs/IntroductionToTheClangAST.html deleted file mode 100644 index 28175dd81b..0000000000 --- a/docs/IntroductionToTheClangAST.html +++ /dev/null @@ -1,139 +0,0 @@ - - - -Introduction to the Clang AST - - - - - - - -
- -

Introduction to the Clang AST

-

This document gives a gentle introduction to the mysteries of the Clang AST. -It is targeted at developers who either want to contribute to Clang, or use -tools that work based on Clang's AST, like the AST matchers.

- - - -

Introduction

- - -

Clang's AST is different from ASTs produced by some other compilers in that it closely -resembles both the written C++ code and the C++ standard. For example, -parenthesis expressions and compile time constants are available in an unreduced -form in the AST. This makes Clang's AST a good fit for refactoring tools.

- -

Documentation for all Clang AST nodes is available via the generated -Doxygen. The doxygen online -documentation is also indexed by your favorite search engine, which will make -a search for clang and the AST node's class name usually turn up the doxygen -of the class you're looking for (for example, search for: clang ParenExpr).

- - -

Examining the AST

- - -

A good way to familarize yourself with the Clang AST is to actually look -at it on some simple example code. Clang has a builtin AST-dump modes, which -can be enabled with the flags -ast-dump and -ast-dump-xml. Note that -ast-dump-xml -currently only works with debug-builds of clang.

- -

Let's look at a simple example AST:

-
-# cat test.cc
-int f(int x) {
-  int result = (x / 42);
-  return result;
-}
-
-# Clang by default is a frontend for many tools; -cc1 tells it to directly
-# use the C++ compiler mode. -undef leaves out some internal declarations.
-$ clang -cc1 -undef -ast-dump-xml test.cc
-... cutting out internal declarations of clang ...
-<TranslationUnit ptr="0x4871160">
- <Function ptr="0x48a5800" name="f" prototype="true">
-  <FunctionProtoType ptr="0x4871de0" canonical="0x4871de0">
-   <BuiltinType ptr="0x4871250" canonical="0x4871250"/>
-   <parameters>
-    <BuiltinType ptr="0x4871250" canonical="0x4871250"/>
-   </parameters>
-  </FunctionProtoType>
-  <ParmVar ptr="0x4871d80" name="x" initstyle="c">
-   <BuiltinType ptr="0x4871250" canonical="0x4871250"/>
-  </ParmVar>
-  <Stmt>
-(CompoundStmt 0x48a5a38 <t2.cc:1:14, line:4:1>
-  (DeclStmt 0x48a59c0 <line:2:3, col:24>
-    0x48a58c0 "int result =
-      (ParenExpr 0x48a59a0 <col:16, col:23> 'int'
-        (BinaryOperator 0x48a5978 <col:17, col:21> 'int' '/'
-          (ImplicitCastExpr 0x48a5960 <col:17> 'int' <LValueToRValue>
-            (DeclRefExpr 0x48a5918 <col:17> 'int' lvalue ParmVar 0x4871d80 'x' 'int'))
-          (IntegerLiteral 0x48a5940 <col:21> 'int' 42)))")
-  (ReturnStmt 0x48a5a18 <line:3:3, col:10>
-    (ImplicitCastExpr 0x48a5a00 <col:10> 'int' <LValueToRValue>
-      (DeclRefExpr 0x48a59d8 <col:10> 'int' lvalue Var 0x48a58c0 'result' 'int'))))
-
-  </Stmt>
- </Function>
-</TranslationUnit>
-
-

In general, -ast-dump-xml dumps declarations in an XML-style format and -statements in an S-expression-style format. -The toplevel declaration in a translation unit is always the -translation unit declaration. -In this example, our first user written declaration is the -function declaration -of 'f'. The body of 'f' is a compound statement, -whose child nodes are a declaration statement -that declares our result variable, and the -return statement.

- - -

AST Context

- - -

All information about the AST for a translation unit is bundled up in the class -ASTContext. -It allows traversal of the whole translation unit starting from -getTranslationUnitDecl, -or to access Clang's table of identifiers -for the parsed translation unit.

- - -

AST Nodes

- - -

Clang's AST nodes are modeled on a class hierarchy that does not have a common -ancestor. Instead, there are multiple larger hierarchies for basic node types like -Decl and -Stmt. Many -important AST nodes derive from Type, -Decl, -DeclContext or -Stmt, -with some classes deriving from both Decl and DeclContext.

-

There are also a multitude of nodes in the AST that are not part of a -larger hierarchy, and are only reachable from specific other nodes, -like CXXBaseSpecifier. -

- -

Thus, to traverse the full AST, one starts from the TranslationUnitDecl -and then recursively traverses everything that can be reached from that node -- this information has to be encoded for each specific node type. This algorithm -is encoded in the RecursiveASTVisitor. -See the RecursiveASTVisitor tutorial.

- -

The two most basic nodes in the Clang AST are statements (Stmt) -and declarations (Decl). -Note that expressions (Expr) -are also statements in Clang's AST.

- -
- - - diff --git a/docs/IntroductionToTheClangAST.rst b/docs/IntroductionToTheClangAST.rst new file mode 100644 index 0000000000..a23fb42b71 --- /dev/null +++ b/docs/IntroductionToTheClangAST.rst @@ -0,0 +1,135 @@ +============================= +Introduction to the Clang AST +============================= + +This document gives a gentle introduction to the mysteries of the Clang +AST. It is targeted at developers who either want to contribute to +Clang, or use tools that work based on Clang's AST, like the AST +matchers. + +Introduction +============ + +Clang's AST is different from ASTs produced by some other compilers in +that it closely resembles both the written C++ code and the C++ +standard. For example, parenthesis expressions and compile time +constants are available in an unreduced form in the AST. This makes +Clang's AST a good fit for refactoring tools. + +Documentation for all Clang AST nodes is available via the generated +`Doxygen `_. The doxygen online +documentation is also indexed by your favorite search engine, which will +make a search for clang and the AST node's class name usually turn up +the doxygen of the class you're looking for (for example, search for: +clang ParenExpr). + +Examining the AST +================= + +A good way to familarize yourself with the Clang AST is to actually look +at it on some simple example code. Clang has a builtin AST-dump modes, +which can be enabled with the flags -ast-dump and -ast-dump-xml. Note +that -ast-dump-xml currently only works with debug-builds of clang. + +Let's look at a simple example AST: + +:: + + # cat test.cc + int f(int x) { + int result = (x / 42); + return result; + } + + # Clang by default is a frontend for many tools; -cc1 tells it to directly + # use the C++ compiler mode. -undef leaves out some internal declarations. + $ clang -cc1 -undef -ast-dump-xml test.cc + ... cutting out internal declarations of clang ... + + + + + + + + + + + + + (CompoundStmt 0x48a5a38 + (DeclStmt 0x48a59c0 + 0x48a58c0 "int result = + (ParenExpr 0x48a59a0 'int' + (BinaryOperator 0x48a5978 'int' '/' + (ImplicitCastExpr 0x48a5960 'int' + (DeclRefExpr 0x48a5918 'int' lvalue ParmVar 0x4871d80 'x' 'int')) + (IntegerLiteral 0x48a5940 'int' 42)))") + (ReturnStmt 0x48a5a18 + (ImplicitCastExpr 0x48a5a00 'int' + (DeclRefExpr 0x48a59d8 'int' lvalue Var 0x48a58c0 'result' 'int')))) + + + + + +In general, -ast-dump-xml dumps declarations in an XML-style format and +statements in an S-expression-style format. The toplevel declaration in +a translation unit is always the `translation unit +declaration `_. +In this example, our first user written declaration is the `function +declaration `_ +of 'f'. The body of 'f' is a `compound +statement `_, +whose child nodes are a `declaration +statement `_ +that declares our result variable, and the `return +statement `_. + +AST Context +=========== + +All information about the AST for a translation unit is bundled up in +the class +`ASTContext `_. +It allows traversal of the whole translation unit starting from +`getTranslationUnitDecl `_, +or to access Clang's `table of +identifiers `_ +for the parsed translation unit. + +AST Nodes +========= + +Clang's AST nodes are modeled on a class hierarchy that does not have a +common ancestor. Instead, there are multiple larger hierarchies for +basic node types like +`Decl `_ and +`Stmt `_. Many +important AST nodes derive from +`Type `_, +`Decl `_, +`DeclContext `_ +or `Stmt `_, with +some classes deriving from both Decl and DeclContext. + +There are also a multitude of nodes in the AST that are not part of a +larger hierarchy, and are only reachable from specific other nodes, like +`CXXBaseSpecifier `_. + +Thus, to traverse the full AST, one starts from the +`TranslationUnitDecl `_ +and then recursively traverses everything that can be reached from that +node - this information has to be encoded for each specific node type. +This algorithm is encoded in the +`RecursiveASTVisitor `_. +See the `RecursiveASTVisitor +tutorial `_. + +The two most basic nodes in the Clang AST are statements +(`Stmt `_) and +declarations +(`Decl `_). Note +that expressions +(`Expr `_) are +also statements in Clang's AST. diff --git a/docs/JSONCompilationDatabase.html b/docs/JSONCompilationDatabase.html deleted file mode 100644 index 2907194729..0000000000 --- a/docs/JSONCompilationDatabase.html +++ /dev/null @@ -1,89 +0,0 @@ - - - -JSON Compilation Database Format Specification - - - - - - - -
- -

JSON Compilation Database Format Specification

-

This document describes a format for specifying how to replay -single compilations independently of the build system.

- -

Background

-

Tools based on the C++ Abstract Syntax Tree need full information how to -parse a translation unit. Usually this information is implicitly -available in the build system, but running tools as part of -the build system is not necessarily the best solution: -

    -
  • Build systems are inherently change driven, so running multiple -tools over the same code base without changing the code does not fit -into the architecture of many build systems.
  • -
  • Figuring out whether things have changed is often an IO bound -process; this makes it hard to build low latency end user tools based -on the build system.
  • -
  • Build systems are inherently sequential in the build graph, for example -due to generated source code. While tools that run independently of the -build still need the generated source code to exist, running tools multiple -times over unchanging source does not require serialization of the runs -according to the build dependency graph.
  • -
-

- -

Supported Systems

-

Currently CMake (since 2.8.5) supports generation of compilation -databases for Unix Makefile builds (Ninja builds in the works) with the option -CMAKE_EXPORT_COMPILE_COMMANDS.

-

Clang's tooling interface supports reading compilation databases; see -the LibTooling documentation. libclang and its -python bindings also support this (since clang 3.2); see -CXCompilationDatabase.h.

- -

Format

-

A compilation database is a JSON file, which consist of an array of -"command objects", where each command object specifies one way a translation unit -is compiled in the project.

-

Each command object contains the translation unit's main file, the working -directory of the compile run and the actual compile command.

-

Example: -

-[
-  { "directory": "/home/user/llvm/build",
-    "command": "/usr/bin/clang++ -Irelative -DSOMEDEF='\"With spaces and quotes.\"' -c -o file.o file.cc",
-    "file": "file.cc" },
-  ...
-]
-
-The contracts for each field in the command object are: -
    -
  • directory: The working directory of the compilation. All paths specified -in the command or file fields must be either absolute or relative to -this directory.
  • -
  • file: The main translation unit source processed by this compilation step. -This is used by tools as the key into the compilation database. There can be multiple -command objects for the same file, for example if the same source file is -compiled with different configurations.
  • -
  • command: The compile command executed. After JSON unescaping, this must -be a valid command to rerun the exact compilation step for the translation unit in -the environment the build system uses. Parameters use shell quoting and shell escaping -of quotes, with '"' and '\' being the only special characters. Shell expansion is -not supported.
  • -
-

- -

Build System Integration

-

The convention is to name the file compile_commands.json and put it at the top -of the build directory. Clang tools are pointed to the top of the build directory -to detect the file and use the compilation database to parse C++ code in the source -tree.

- -
- - - diff --git a/docs/JSONCompilationDatabase.rst b/docs/JSONCompilationDatabase.rst new file mode 100644 index 0000000000..502557b50a --- /dev/null +++ b/docs/JSONCompilationDatabase.rst @@ -0,0 +1,85 @@ +============================================== +JSON Compilation Database Format Specification +============================================== + +This document describes a format for specifying how to replay single +compilations independently of the build system. + +Background +========== + +Tools based on the C++ Abstract Syntax Tree need full information how to +parse a translation unit. Usually this information is implicitly +available in the build system, but running tools as part of the build +system is not necessarily the best solution: + +- Build systems are inherently change driven, so running multiple tools + over the same code base without changing the code does not fit into + the architecture of many build systems. +- Figuring out whether things have changed is often an IO bound + process; this makes it hard to build low latency end user tools based + on the build system. +- Build systems are inherently sequential in the build graph, for + example due to generated source code. While tools that run + independently of the build still need the generated source code to + exist, running tools multiple times over unchanging source does not + require serialization of the runs according to the build dependency + graph. + +Supported Systems +================= + +Currently `CMake `_ (since 2.8.5) supports generation +of compilation databases for Unix Makefile builds (Ninja builds in the +works) with the option CMAKE\_EXPORT\_COMPILE\_COMMANDS. + +Clang's tooling interface supports reading compilation databases; see +the `LibTooling documentation `_. libclang and its +python bindings also support this (since clang 3.2); see +`CXCompilationDatabase.h `_. + +Format +====== + +A compilation database is a JSON file, which consist of an array of +"command objects", where each command object specifies one way a +translation unit is compiled in the project. + +Each command object contains the translation unit's main file, the +working directory of the compile run and the actual compile command. + +Example: + +:: + + [ + { "directory": "/home/user/llvm/build", + "command": "/usr/bin/clang++ -Irelative -DSOMEDEF='\"With spaces and quotes.\"' -c -o file.o file.cc", + "file": "file.cc" }, + ... + ] + +The contracts for each field in the command object are: + +- **directory:** The working directory of the compilation. All paths + specified in the **command** or **file** fields must be either + absolute or relative to this directory. +- **file:** The main translation unit source processed by this + compilation step. This is used by tools as the key into the + compilation database. There can be multiple command objects for the + same file, for example if the same source file is compiled with + different configurations. +- **command:** The compile command executed. After JSON unescaping, + this must be a valid command to rerun the exact compilation step for + the translation unit in the environment the build system uses. + Parameters use shell quoting and shell escaping of quotes, with '"' + and '\\' being the only special characters. Shell expansion is not + supported. + +Build System Integration +======================== + +The convention is to name the file compile\_commands.json and put it at +the top of the build directory. Clang tools are pointed to the top of +the build directory to detect the file and use the compilation database +to parse C++ code in the source tree. diff --git a/docs/LibASTMatchersTutorial.html b/docs/LibASTMatchersTutorial.html deleted file mode 100644 index 222ce70dfa..0000000000 --- a/docs/LibASTMatchersTutorial.html +++ /dev/null @@ -1,533 +0,0 @@ - - - -Tutorial for building tools using LibTooling and LibASTMatchers - - - - - - - -
- -

Tutorial for building tools using LibTooling and LibASTMatchers

-

This document is intended to show how to build a useful source-to-source -translation tool based on Clang's LibTooling. It -is explicitly aimed at people who are new to Clang, so all you should need is a -working knowledge of C++ and the command line.

- -

In order to work on the compiler, you need some basic knowledge of the -abstract syntax tree (AST). To this end, the reader is incouraged to skim the -Introduction -to the Clang AST

- - -

Step 0: Obtaining Clang

- -As Clang is part of the LLVM project, you'll need to download LLVM's source code -first. Both Clang and LLVM are maintained as Subversion repositories, but we'll -be accessing them through the git mirror. For further information, see the -getting started guide. - -
-  mkdir ~/clang-llvm && cd ~/clang-llvm
-  git clone http://llvm.org/git/llvm.git
-  cd llvm/tools
-  git clone http://llvm.org/git/clang.git
-
- -Next you need to obtain the CMake build system and Ninja build tool. You may -already have CMake installed, but current binary versions of CMake aren't built -with Ninja support. - -
-  cd ~/clang-llvm
-  git clone https://github.com/martine/ninja.git
-  cd ninja
-  git checkout release
-  ./bootstrap.py
-  sudo cp ninja /usr/bin/
-
-  cd ~/clang-llvm
-  git clone git://cmake.org/stage/cmake.git
-  cd cmake
-  git checkout next
-  ./bootstrap
-  make
-  sudo make install
-
- -

Okay. Now we'll build Clang!

- -
-  cd ~/clang-llvm
-  mkdir build && cd build
-  cmake -G Ninja ../llvm -DLLVM_BUILD_TESTS=ON  # Enable tests; default is off.
-  ninja
-  ninja check       # Test LLVM only.
-  ninja clang-test  # Test Clang only.
-  ninja install
-
- -

And we're live.

- -

All of the tests should pass, though there is a (very) small chance that you -can catch LLVM and Clang out of sync. Running 'git svn rebase' in both -the llvm and clang directories should fix any problems.

- -

Finally, we want to set Clang as its own compiler.

- -
-  cd ~/clang-llvm/build
-  ccmake ../llvm
-
- -

The second command will bring up a GUI for configuring Clang. You need to set -the entry for CMAKE_CXX_COMPILER. Press 't' to turn on -advanced mode. Scroll down to CMAKE_CXX_COMPILER, and set it to -/usr/bin/clang++, or wherever you installed it. Press 'c' to -configure, then 'g' to generate CMake's files.

- -

Finally, run ninja one last time, and you're done.

- - -

Step 1: Create a ClangTool

- -

Now that we have enough background knowledge, it's time to create the -simplest productive ClangTool in existence: a syntax checker. While this already -exists as clang-check, it's important to understand what's going -on.

- -

First, we'll need to create a new directory for our tool and tell CMake that -it exists. As this is not going to be a core clang tool, it will live in the -tools/extra repository.

- -
-  cd ~/clang-llvm/llvm/tools/clang
-  mkdir tools/extra/loop-convert
-  echo 'add_subdirectory(loop-convert)' >> tools/extra/CMakeLists.txt
-  vim tools/extra/loop-convert/CMakeLists.txt
-
- -CMakeLists.txt should have the following contents: -
-  set(LLVM_LINK_COMPONENTS support)
-  set(LLVM_USED_LIBS clangTooling clangBasic clangAST)
-
-  add_clang_executable(loop-convert
-    LoopConvert.cpp
-    )
-  target_link_libraries(loop-convert
-    clangTooling
-    clangBasic
-    clangASTMatchers
-    )
-
- -

With that done, Ninja will be able to compile our tool. Let's give it -something to compile! Put the following into -tools/extra/loop-convert/LoopConvert.cpp. A detailed explanation of why -the different parts are needed can be found in the -LibTooling documentation.

- -
-  // Declares clang::SyntaxOnlyAction.
-  #include "clang/Frontend/FrontendActions.h"
-  #include "clang/Tooling/CommonOptionsParser.h"
-  #include "clang/Tooling/Tooling.h"
-  // Declares llvm::cl::extrahelp.
-  #include "llvm/Support/CommandLine.h"
-
-  using namespace clang::tooling;
-  using namespace llvm;
-
-  // CommonOptionsParser declares HelpMessage with a description of the common
-  // command-line options related to the compilation database and input files.
-  // It's nice to have this help message in all tools.
-  static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);
-
-  // A help message for this specific tool can be added afterwards.
-  static cl::extrahelp MoreHelp("\nMore help text...");
-
-  int main(int argc, const char **argv) {
-    CommonOptionsParser OptionsParser(argc, argv);
-    ClangTool Tool(OptionsParser.GetCompilations(),
-                   OptionsParser.GetSourcePathList());
-    return Tool.run(newFrontendActionFactory<clang::SyntaxOnlyAction>());
-  }
-
- -

And that's it! You can compile our new tool by running ninja from the -build directory.

- -
-  cd ~/clang-llvm/build
-  ninja
-
- -

You should now be able to run the syntax checker, which is located in -~/clang-llvm/build/bin, on any source file. Try it!

- -
-  cat "void main() {}" > test.cpp
-  bin/loop-convert test.cpp --
-
- -

Note the two dashes after we specify the source file. The additional options -for the compiler are passed after the dashes rather than loading them from a -compilation database - there just aren't any options needed right now.

- - -

Intermezzo: Learn AST matcher basics

- -

Clang recently introduced the ASTMatcher -library to provide a simple, powerful, and concise way to describe specific -patterns in the AST. Implemented as a DSL powered by macros and templates (see -ASTMatchers.h if you're -curious), matchers offer the feel of algebraic data types common to functional -programming languages.

- -

For example, suppose you wanted to examine only binary operators. There is a -matcher to do exactly that, conveniently named binaryOperator. I'll -give you one guess what this matcher does:

- -
-  binaryOperator(hasOperatorName("+"), hasLHS(integerLiteral(equals(0))))
-
- -

Shockingly, it will match against addition expressions whose left hand side -is exactly the literal 0. It will not match against other forms of 0, such as -'\0' or NULL, but it will match against macros that expand to -0. The matcher will also not match against calls to the overloaded operator -'+', as there is a separate operatorCallExpr matcher to handle -overloaded operators.

- -

There are AST matchers to match all the different nodes of the AST, narrowing -matchers to only match AST nodes fulfilling specific criteria, and traversal -matchers to get from one kind of AST node to another. For a complete list of AST -matchers, take a look at the AST Matcher -References

- -

All matcher that are nouns describe entities in the AST and can be bound, -so that they can be referred to whenever a match is found. To do so, simply call -the method bind on these matchers, e.g.:

-
-  variable(hasType(isInteger())).bind("intvar")
-
- - -

Step 2: Using AST matchers

- -

Okay, on to using matchers for real. Let's start by defining a matcher which -will capture all for statements that define a new variable -initialized to zero. Let's start with matching all for loops:

- -
-  forStmt()
-
- -

Next, we want to specify that a single variable is declared in the first -portion of the loop, so we can extend the matcher to

-
-  forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl()))))
-
- -

Finally, we can add the condition that the variable is initialized to -zero.

-
-  forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl(
-    hasInitializer(integerLiteral(equals(0))))))))
-
- -

It is fairly easy to read and understand the matcher definition ("match -loops whose init portion declares a single variable which is initialized to the -integer literal 0"), but deciding that every piece is necessary is more -difficult. Note that this matcher will not match loops whose variables are -initialized to '\0', 0.0, NULL, or any form of zero -besides the integer 0.

- -

The last step is giving the matcher a name and binding the ForStmt -as we will want to do something with it:

-
-  StatementMatcher LoopMatcher =
-    forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl(
-      hasInitializer(integerLiteral(equals(0)))))))).bind("forLoop");
-
- -

Once you have defined your matchers, you will need to add a little more -scaffolding in order to run them. Matchers are paired with a -MatchCallback and registered with a MatchFinder object, then -run from a ClangTool. More code!

- -Add the following to LoopConvert.cpp: -
-  StatementMatcher LoopMatcher =
-    forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl(
-      hasInitializer(integerLiteral(equals(0)))))))).bind("forLoop");
-
-  class LoopPrinter : public MatchFinder::MatchCallback {
-  public :
-    virtual void run(const MatchFinder::MatchResult &Result) {
-    if (const ForStmt *FS = Result.Nodes.getNodeAs<clang::ForStmt>("forLoop"))
-      FS->dump();
-  };
-
- -And change main() to: -
-  int main(int argc, const char **argv) {
-    CommonOptionsParser OptionsParser(argc, argv);
-    ClangTool Tool(OptionsParser.GetCompilations(),
-                   OptionsParser.GetSourcePathList());
-
-    LoopPrinter Printer;
-    MatchFinder Finder;
-    Finder.addMatcher(LoopMatcher, &Printer);
-
-    return Tool.run(newFrontendActionFactory(&Finder));
-  }
-
- -

Now, you should be able to recompile and run the code to discover for loops. -Create a new file with a few examples, and test out our new handiwork:

- -
-  cd ~/clang-llvm/llvm/llvm_build/
-  ninja loop-convert
-  vim ~/test-files/simple-loops.cc
-  bin/loop-convert ~/test-files/simple-loops.cc
-
- - - - -

Step 3.5: More Complicated Matchers

- -

Our simple matcher is capable of discovering for loops, but we would still -need to filter out many more ourselves. We can do a good portion of the -remaining work with some cleverly chosen matchers, but first we need to decide -exactly which properties we want to allow.

- -

How can we characterize for loops over arrays which would be eligible for -translation to range-based syntax? Range based loops over arrays of size -N that:

-
    -
  • start at index 0
  • -
  • iterate consecutively
  • -
  • end at index N-1
  • -
- -

We already check for (1), so all we need to add is a check to the loop's -condition to ensure that the loop's index variable is compared against -N and another check to ensure that the increment step just increments -this same variable. The matcher for (2) is straightforward: require a pre- or -post-increment of the same variable declared in the init portion.

- -

Unfortunately, such a matcher is impossible to write. Matchers contain no -logic for comparing two arbitrary AST nodes and determining whether or not they -are equal, so the best we can do is matching more than we would like to allow, -and punting extra comparisons to the callback.

- -

In any case, we can start building this sub-matcher. We can require that the -increment step be a unary increment like this:

- -
-  hasIncrement(unaryOperator(hasOperatorName("++")))
-
- -

Specifying what is incremented introduces another quirk of Clang's AST: -Usages of variables are represented as DeclRefExpr's ("declaration -reference expressions") because they are expressions which refer to variable -declarations. To find a unaryOperator that refers to a specific -declaration, we can simply add a second condition to it:

-
-  hasIncrement(unaryOperator(
-    hasOperatorName("++"),
-    hasUnaryOperand(declRefExpr())))
-
- -

Furthermore, we can restrict our matcher to only match if the incremented -variable is an integer:

-
-  hasIncrement(unaryOperator(
-    hasOperatorName("++"),
-    hasUnaryOperand(declRefExpr(to(varDecl(hasType(isInteger())))))))
-
- -

And the last step will be to attach an identifier to this variable, so that -we can retrieve it in the callback:

-
-  hasIncrement(unaryOperator(
-    hasOperatorName("++"),
-    hasUnaryOperand(declRefExpr(to(
-      varDecl(hasType(isInteger())).bind("incrementVariable"))))))
-
- -

We can add this code to the definition of LoopMatcher and make sure -that our program, outfitted with the new matcher, only prints out loops that -declare a single variable initialized to zero and have an increment step -consisting of a unary increment of some variable.

- - - -

Now, we just need to add a matcher to check if the condition part of the -for loop compares a variable against the size of the array. There is -only one problem - we don't know which array we're iterating over without -looking at the body of the loop! We are again restricted to approximating the -result we want with matchers, filling in the details in the callback. So we -start with:

-
-  hasCondition(binaryOperator(hasOperatorName("<"))
-
- -

It makes sense to ensure that the left-hand side is a reference to a -variable, and that the right-hand side has integer type.

-
-  hasCondition(binaryOperator(
-    hasOperatorName("<"),
-    hasRHS(expr(hasType(isInteger()))),
-    hasLHS(declRefExpr(to(varDecl(hasType(isInteger())))))))
-
- - - -

Why? Because it doesn't work. Of the three loops provided in -test-files/simple.cpp, zero of them have a matching condition. A quick -look at the AST dump of the first for loop, produced by the previous iteration -of loop-convert, shows us the answer:

- -
-  (ForStmt 0x173b240
-    (DeclStmt 0x173afc8
-      0x173af50 "int i =
-        (IntegerLiteral 0x173afa8 'int' 0)")
-    <<>>
-    (BinaryOperator 0x173b060 '_Bool' '<'
-      (ImplicitCastExpr 0x173b030 'int' 
-        (DeclRefExpr 0x173afe0 'int' lvalue Var 0x173af50 'i' 'int'))
-      (ImplicitCastExpr 0x173b048 'int' 
-        (DeclRefExpr 0x173b008 'const int' lvalue Var 0x170fa80 'N' 'const int')))
-    (UnaryOperator 0x173b0b0 'int' lvalue prefix '++'
-      (DeclRefExpr 0x173b088 'int' lvalue Var 0x173af50 'i' 'int'))
-    (CompoundStatement …
-
- -

We already know that the declaration and increments both match, or this loop -wouldn't have been dumped. The culprit lies in the implicit cast applied to the -first operand (i.e. the LHS) of the less-than operator, an L-value to R-value -conversion applied to the expression referencing i. Thankfully, the -matcher library offers a solution to this problem in the form of -ignoringParenImpCasts, which instructs the matcher to ignore implicit -casts and parentheses before continuing to match. Adjusting the condition -operator will restore the desired match.

- -
-  hasCondition(binaryOperator(
-    hasOperatorName("<"),
-    hasLHS(expr(hasType(isInteger()))),
-    hasRHS(ignoringParenImpCasts(declRefExpr(
-      to(varDecl(hasType(isInteger()))))))))
-
- -

After adding binds to the expressions we wished to capture and extracting the -identifier strings into variables, we have array-step-2 completed.

- - -

Step 4: Retrieving Matched Nodes

- -

So far, the matcher callback isn't very interesting: it just dumps the loop's -AST. At some point, we will need to make changes to the input source code. Next, -we'll work on using the nodes we bound in the previous step.

- -

The MatchFinder::run() callback takes a -MatchFinder::MatchResult& as its parameter. We're most interested in -its Context and Nodes members. Clang uses the -ASTContext class to represent contextual information about the AST, as -the name implies, though the most functionally important detail is that several -operations require an ASTContext* parameter. More immediately useful is -the set of matched nodes, and how we retrieve them.

- - - -

Since we bind three variables (identified by ConditionVarName, -InitVarName, and IncrementVarName), we can obtain the matched nodes by using the -getNodeAs() member function.

- -

In LoopActions.cpp:

-
-  #include "clang/AST/ASTContext.h"
-
-  void LoopPrinter::run(const MatchFinder::MatchResult &Result) {
-    ASTContext *Context = Result.Context;
-    const ForStmt *FS = Result.Nodes.getStmtAs<ForStmt>(LoopName);
-    // We do not want to convert header files!
-    if (!FS || !Context->getSourceManager().isFromMainFile(FS->getForLoc()))
-      return;
-    const VarDecl *IncVar = Result.Nodes.getNodeAs<VarDecl>(IncrementVarName);
-    const VarDecl *CondVar = Result.Nodes.getNodeAs<VarDecl>(ConditionVarName);
-    const VarDecl *InitVar = Result.Nodes.getNodeAs<VarDecl>(InitVarName);
-
- -

Now that we have the three variables, represented by their respective -declarations, let's make sure that they're all the same, using a helper function -I call areSameVariable().

-
-  if (!areSameVariable(IncVar, CondVar) || !areSameVariable(IncVar, InitVar))
-    return;
-  llvm::outs() << "Potential array-based loop discovered.\n";
-}
-
- -

If execution reaches the end of LoopPrinter::run(), we know that the -loop shell that looks like

-
-  for (int i= 0; i < expr(); ++i) { ... }
-
- -

For now, we will just print a message explaining that we found a loop. The -next section will deal with recursively traversing the AST to discover all -changes needed.

- -

As a side note, here is the implementation of areSameVariable. Clang -associates a VarDecl with each variable to represent the variable's -declaration. Since the "canonical" form of each declaration is unique by -address, all we need to do is make sure neither ValueDecl (base class -of VarDecl) is NULL and compare the canonical Decls.

-
-  static bool areSameVariable(const ValueDecl *First, const ValueDecl *Second) {
-    return First && Second &&
-           First->getCanonicalDecl() == Second->getCanonicalDecl();
-  }
-
- -

It's not as trivial to test if two expressions are the same, though Clang has -already done the hard work for us by providing a way to canonicalize -expressions:

-
-  static bool areSameExpr(ASTContext* Context, const Expr *First,
-                          const Expr *Second) {
-    if (!First || !Second)
-      return false;
-    llvm::FoldingSetNodeID FirstID, SecondID;
-    First->Profile(FirstID, *Context, true);
-    Second->Profile(SecondID, *Context, true);
-    return FirstID == SecondID;
-  }
-
- - - -

This code relies on the comparison between two -llvm::FoldingSetNodeIDs. As the documentation for -Stmt::Profile() indicates, the Profile() member function -builds a description of a node in the AST, based on its properties, along with -those of its children. FoldingSetNodeID then serves as a hash we can -use to compare expressions. We will need areSameExpr later. Before you -run the new code on the additional loops added to test-files/simple.cpp, try to -figure out which ones will be considered potentially convertible.

- -
- - diff --git a/docs/LibASTMatchersTutorial.rst b/docs/LibASTMatchersTutorial.rst new file mode 100644 index 0000000000..21d26897f3 --- /dev/null +++ b/docs/LibASTMatchersTutorial.rst @@ -0,0 +1,532 @@ +=============================================================== +Tutorial for building tools using LibTooling and LibASTMatchers +=============================================================== + +This document is intended to show how to build a useful source-to-source +translation tool based on Clang's `LibTooling `_. It is +explicitly aimed at people who are new to Clang, so all you should need +is a working knowledge of C++ and the command line. + +In order to work on the compiler, you need some basic knowledge of the +abstract syntax tree (AST). To this end, the reader is incouraged to +skim the :doc:`Introduction to the Clang +AST ` + +Step 0: Obtaining Clang +======================= + +As Clang is part of the LLVM project, you'll need to download LLVM's +source code first. Both Clang and LLVM are maintained as Subversion +repositories, but we'll be accessing them through the git mirror. For +further information, see the `getting started +guide `_. + +:: + + mkdir ~/clang-llvm && cd ~/clang-llvm + git clone http://llvm.org/git/llvm.git + cd llvm/tools + git clone http://llvm.org/git/clang.git + +Next you need to obtain the CMake build system and Ninja build tool. You +may already have CMake installed, but current binary versions of CMake +aren't built with Ninja support. + +:: + + cd ~/clang-llvm + git clone https://github.com/martine/ninja.git + cd ninja + git checkout release + ./bootstrap.py + sudo cp ninja /usr/bin/ + + cd ~/clang-llvm + git clone git://cmake.org/stage/cmake.git + cd cmake + git checkout next + ./bootstrap + make + sudo make install + +Okay. Now we'll build Clang! + +:: + + cd ~/clang-llvm + mkdir build && cd build + cmake -G Ninja ../llvm -DLLVM_BUILD_TESTS=ON # Enable tests; default is off. + ninja + ninja check # Test LLVM only. + ninja clang-test # Test Clang only. + ninja install + +And we're live. + +All of the tests should pass, though there is a (very) small chance that +you can catch LLVM and Clang out of sync. Running ``'git svn rebase'`` +in both the llvm and clang directories should fix any problems. + +Finally, we want to set Clang as its own compiler. + +:: + + cd ~/clang-llvm/build + ccmake ../llvm + +The second command will bring up a GUI for configuring Clang. You need +to set the entry for ``CMAKE_CXX_COMPILER``. Press ``'t'`` to turn on +advanced mode. Scroll down to ``CMAKE_CXX_COMPILER``, and set it to +``/usr/bin/clang++``, or wherever you installed it. Press ``'c'`` to +configure, then ``'g'`` to generate CMake's files. + +Finally, run ninja one last time, and you're done. + +Step 1: Create a ClangTool +========================== + +Now that we have enough background knowledge, it's time to create the +simplest productive ClangTool in existence: a syntax checker. While this +already exists as ``clang-check``, it's important to understand what's +going on. + +First, we'll need to create a new directory for our tool and tell CMake +that it exists. As this is not going to be a core clang tool, it will +live in the ``tools/extra`` repository. + +:: + + cd ~/clang-llvm/llvm/tools/clang + mkdir tools/extra/loop-convert + echo 'add_subdirectory(loop-convert)' >> tools/extra/CMakeLists.txt + vim tools/extra/loop-convert/CMakeLists.txt + +CMakeLists.txt should have the following contents: + +:: + + set(LLVM_LINK_COMPONENTS support) + set(LLVM_USED_LIBS clangTooling clangBasic clangAST) + + add_clang_executable(loop-convert + LoopConvert.cpp + ) + target_link_libraries(loop-convert + clangTooling + clangBasic + clangASTMatchers + ) + +With that done, Ninja will be able to compile our tool. Let's give it +something to compile! Put the following into +``tools/extra/loop-convert/LoopConvert.cpp``. A detailed explanation of +why the different parts are needed can be found in the `LibTooling +documentation `_. + +:: + + // Declares clang::SyntaxOnlyAction. + #include "clang/Frontend/FrontendActions.h" + #include "clang/Tooling/CommonOptionsParser.h" + #include "clang/Tooling/Tooling.h" + // Declares llvm::cl::extrahelp. + #include "llvm/Support/CommandLine.h" + + using namespace clang::tooling; + using namespace llvm; + + // CommonOptionsParser declares HelpMessage with a description of the common + // command-line options related to the compilation database and input files. + // It's nice to have this help message in all tools. + static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage); + + // A help message for this specific tool can be added afterwards. + static cl::extrahelp MoreHelp("\nMore help text..."); + + int main(int argc, const char **argv) { + CommonOptionsParser OptionsParser(argc, argv); + ClangTool Tool(OptionsParser.GetCompilations(), + OptionsParser.GetSourcePathList()); + return Tool.run(newFrontendActionFactory()); + } + +And that's it! You can compile our new tool by running ninja from the +``build`` directory. + +:: + + cd ~/clang-llvm/build + ninja + +You should now be able to run the syntax checker, which is located in +``~/clang-llvm/build/bin``, on any source file. Try it! + +:: + + cat "void main() {}" > test.cpp + bin/loop-convert test.cpp -- + +Note the two dashes after we specify the source file. The additional +options for the compiler are passed after the dashes rather than loading +them from a compilation database - there just aren't any options needed +right now. + +Intermezzo: Learn AST matcher basics +==================================== + +Clang recently introduced the :doc:`ASTMatcher +library ` to provide a simple, powerful, and +concise way to describe specific patterns in the AST. Implemented as a +DSL powered by macros and templates (see +`ASTMatchers.h <../doxygen/ASTMatchers_8h_source.html>`_ if you're +curious), matchers offer the feel of algebraic data types common to +functional programming languages. + +For example, suppose you wanted to examine only binary operators. There +is a matcher to do exactly that, conveniently named ``binaryOperator``. +I'll give you one guess what this matcher does: + +:: + + binaryOperator(hasOperatorName("+"), hasLHS(integerLiteral(equals(0)))) + +Shockingly, it will match against addition expressions whose left hand +side is exactly the literal 0. It will not match against other forms of +0, such as ``'\0'`` or ``NULL``, but it will match against macros that +expand to 0. The matcher will also not match against calls to the +overloaded operator ``'+'``, as there is a separate ``operatorCallExpr`` +matcher to handle overloaded operators. + +There are AST matchers to match all the different nodes of the AST, +narrowing matchers to only match AST nodes fulfilling specific criteria, +and traversal matchers to get from one kind of AST node to another. For +a complete list of AST matchers, take a look at the `AST Matcher +References `_ + +All matcher that are nouns describe entities in the AST and can be +bound, so that they can be referred to whenever a match is found. To do +so, simply call the method ``bind`` on these matchers, e.g.: + +:: + + variable(hasType(isInteger())).bind("intvar") + +Step 2: Using AST matchers +========================== + +Okay, on to using matchers for real. Let's start by defining a matcher +which will capture all ``for`` statements that define a new variable +initialized to zero. Let's start with matching all ``for`` loops: + +:: + + forStmt() + +Next, we want to specify that a single variable is declared in the first +portion of the loop, so we can extend the matcher to + +:: + + forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl())))) + +Finally, we can add the condition that the variable is initialized to +zero. + +:: + + forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl( + hasInitializer(integerLiteral(equals(0)))))))) + +It is fairly easy to read and understand the matcher definition ("match +loops whose init portion declares a single variable which is initialized +to the integer literal 0"), but deciding that every piece is necessary +is more difficult. Note that this matcher will not match loops whose +variables are initialized to ``'\0'``, ``0.0``, ``NULL``, or any form of +zero besides the integer 0. + +The last step is giving the matcher a name and binding the ``ForStmt`` +as we will want to do something with it: + +:: + + StatementMatcher LoopMatcher = + forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl( + hasInitializer(integerLiteral(equals(0)))))))).bind("forLoop"); + +Once you have defined your matchers, you will need to add a little more +scaffolding in order to run them. Matchers are paired with a +``MatchCallback`` and registered with a ``MatchFinder`` object, then run +from a ``ClangTool``. More code! + +Add the following to ``LoopConvert.cpp``: + +:: + + StatementMatcher LoopMatcher = + forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl( + hasInitializer(integerLiteral(equals(0)))))))).bind("forLoop"); + + class LoopPrinter : public MatchFinder::MatchCallback { + public : + virtual void run(const MatchFinder::MatchResult &Result) { + if (const ForStmt *FS = Result.Nodes.getNodeAs("forLoop")) + FS->dump(); + }; + +And change ``main()`` to: + +:: + + int main(int argc, const char **argv) { + CommonOptionsParser OptionsParser(argc, argv); + ClangTool Tool(OptionsParser.GetCompilations(), + OptionsParser.GetSourcePathList()); + + LoopPrinter Printer; + MatchFinder Finder; + Finder.addMatcher(LoopMatcher, &Printer); + + return Tool.run(newFrontendActionFactory(&Finder)); + } + +Now, you should be able to recompile and run the code to discover for +loops. Create a new file with a few examples, and test out our new +handiwork: + +:: + + cd ~/clang-llvm/llvm/llvm_build/ + ninja loop-convert + vim ~/test-files/simple-loops.cc + bin/loop-convert ~/test-files/simple-loops.cc + +Step 3.5: More Complicated Matchers +=================================== + +Our simple matcher is capable of discovering for loops, but we would +still need to filter out many more ourselves. We can do a good portion +of the remaining work with some cleverly chosen matchers, but first we +need to decide exactly which properties we want to allow. + +How can we characterize for loops over arrays which would be eligible +for translation to range-based syntax? Range based loops over arrays of +size ``N`` that: + +- start at index ``0`` +- iterate consecutively +- end at index ``N-1`` + +We already check for (1), so all we need to add is a check to the loop's +condition to ensure that the loop's index variable is compared against +``N`` and another check to ensure that the increment step just +increments this same variable. The matcher for (2) is straightforward: +require a pre- or post-increment of the same variable declared in the +init portion. + +Unfortunately, such a matcher is impossible to write. Matchers contain +no logic for comparing two arbitrary AST nodes and determining whether +or not they are equal, so the best we can do is matching more than we +would like to allow, and punting extra comparisons to the callback. + +In any case, we can start building this sub-matcher. We can require that +the increment step be a unary increment like this: + +:: + + hasIncrement(unaryOperator(hasOperatorName("++"))) + +Specifying what is incremented introduces another quirk of Clang's AST: +Usages of variables are represented as ``DeclRefExpr``'s ("declaration +reference expressions") because they are expressions which refer to +variable declarations. To find a ``unaryOperator`` that refers to a +specific declaration, we can simply add a second condition to it: + +:: + + hasIncrement(unaryOperator( + hasOperatorName("++"), + hasUnaryOperand(declRefExpr()))) + +Furthermore, we can restrict our matcher to only match if the +incremented variable is an integer: + +:: + + hasIncrement(unaryOperator( + hasOperatorName("++"), + hasUnaryOperand(declRefExpr(to(varDecl(hasType(isInteger()))))))) + +And the last step will be to attach an identifier to this variable, so +that we can retrieve it in the callback: + +:: + + hasIncrement(unaryOperator( + hasOperatorName("++"), + hasUnaryOperand(declRefExpr(to( + varDecl(hasType(isInteger())).bind("incrementVariable")))))) + +We can add this code to the definition of ``LoopMatcher`` and make sure +that our program, outfitted with the new matcher, only prints out loops +that declare a single variable initialized to zero and have an increment +step consisting of a unary increment of some variable. + +Now, we just need to add a matcher to check if the condition part of the +``for`` loop compares a variable against the size of the array. There is +only one problem - we don't know which array we're iterating over +without looking at the body of the loop! We are again restricted to +approximating the result we want with matchers, filling in the details +in the callback. So we start with: + +:: + + hasCondition(binaryOperator(hasOperatorName("<")) + +It makes sense to ensure that the left-hand side is a reference to a +variable, and that the right-hand side has integer type. + +:: + + hasCondition(binaryOperator( + hasOperatorName("<"), + hasRHS(expr(hasType(isInteger()))), + hasLHS(declRefExpr(to(varDecl(hasType(isInteger()))))))) + +Why? Because it doesn't work. Of the three loops provided in +``test-files/simple.cpp``, zero of them have a matching condition. A +quick look at the AST dump of the first for loop, produced by the +previous iteration of loop-convert, shows us the answer: + +:: + + (ForStmt 0x173b240 + (DeclStmt 0x173afc8 + 0x173af50 "int i = + (IntegerLiteral 0x173afa8 'int' 0)") + <<>> + (BinaryOperator 0x173b060 '_Bool' '<' + (ImplicitCastExpr 0x173b030 'int' + (DeclRefExpr 0x173afe0 'int' lvalue Var 0x173af50 'i' 'int')) + (ImplicitCastExpr 0x173b048 'int' + (DeclRefExpr 0x173b008 'const int' lvalue Var 0x170fa80 'N' 'const int'))) + (UnaryOperator 0x173b0b0 'int' lvalue prefix '++' + (DeclRefExpr 0x173b088 'int' lvalue Var 0x173af50 'i' 'int')) + (CompoundStatement … + +We already know that the declaration and increments both match, or this +loop wouldn't have been dumped. The culprit lies in the implicit cast +applied to the first operand (i.e. the LHS) of the less-than operator, +an L-value to R-value conversion applied to the expression referencing +``i``. Thankfully, the matcher library offers a solution to this problem +in the form of ``ignoringParenImpCasts``, which instructs the matcher to +ignore implicit casts and parentheses before continuing to match. +Adjusting the condition operator will restore the desired match. + +:: + + hasCondition(binaryOperator( + hasOperatorName("<"), + hasLHS(expr(hasType(isInteger()))), + hasRHS(ignoringParenImpCasts(declRefExpr( + to(varDecl(hasType(isInteger())))))))) + +After adding binds to the expressions we wished to capture and +extracting the identifier strings into variables, we have array-step-2 +completed. + +Step 4: Retrieving Matched Nodes +================================ + +So far, the matcher callback isn't very interesting: it just dumps the +loop's AST. At some point, we will need to make changes to the input +source code. Next, we'll work on using the nodes we bound in the +previous step. + +The ``MatchFinder::run()`` callback takes a +``MatchFinder::MatchResult&`` as its parameter. We're most interested in +its ``Context`` and ``Nodes`` members. Clang uses the ``ASTContext`` +class to represent contextual information about the AST, as the name +implies, though the most functionally important detail is that several +operations require an ``ASTContext*`` parameter. More immediately useful +is the set of matched nodes, and how we retrieve them. + +Since we bind three variables (identified by ConditionVarName, +InitVarName, and IncrementVarName), we can obtain the matched nodes by +using the ``getNodeAs()`` member function. + +In ``LoopActions.cpp``: + +:: + + #include "clang/AST/ASTContext.h" + + void LoopPrinter::run(const MatchFinder::MatchResult &Result) { + ASTContext *Context = Result.Context; + const ForStmt *FS = Result.Nodes.getStmtAs(LoopName); + // We do not want to convert header files! + if (!FS || !Context->getSourceManager().isFromMainFile(FS->getForLoc())) + return; + const VarDecl *IncVar = Result.Nodes.getNodeAs(IncrementVarName); + const VarDecl *CondVar = Result.Nodes.getNodeAs(ConditionVarName); + const VarDecl *InitVar = Result.Nodes.getNodeAs(InitVarName); + +Now that we have the three variables, represented by their respective +declarations, let's make sure that they're all the same, using a helper +function I call ``areSameVariable()``. + +:: + + if (!areSameVariable(IncVar, CondVar) || !areSameVariable(IncVar, InitVar)) + return; + llvm::outs() << "Potential array-based loop discovered.\n"; + } + +If execution reaches the end of ``LoopPrinter::run()``, we know that the +loop shell that looks like + +:: + + for (int i= 0; i < expr(); ++i) { ... } + +For now, we will just print a message explaining that we found a loop. +The next section will deal with recursively traversing the AST to +discover all changes needed. + +As a side note, here is the implementation of ``areSameVariable``. Clang +associates a ``VarDecl`` with each variable to represent the variable's +declaration. Since the "canonical" form of each declaration is unique by +address, all we need to do is make sure neither ``ValueDecl`` (base +class of ``VarDecl``) is ``NULL`` and compare the canonical Decls. + +:: + + static bool areSameVariable(const ValueDecl *First, const ValueDecl *Second) { + return First && Second && + First->getCanonicalDecl() == Second->getCanonicalDecl(); + } + +It's not as trivial to test if two expressions are the same, though +Clang has already done the hard work for us by providing a way to +canonicalize expressions: + +:: + + static bool areSameExpr(ASTContext* Context, const Expr *First, + const Expr *Second) { + if (!First || !Second) + return false; + llvm::FoldingSetNodeID FirstID, SecondID; + First->Profile(FirstID, *Context, true); + Second->Profile(SecondID, *Context, true); + return FirstID == SecondID; + } + +This code relies on the comparison between two +``llvm::FoldingSetNodeIDs``. As the documentation for +``Stmt::Profile()`` indicates, the ``Profile()`` member function builds +a description of a node in the AST, based on its properties, along with +those of its children. ``FoldingSetNodeID`` then serves as a hash we can +use to compare expressions. We will need ``areSameExpr`` later. Before +you run the new code on the additional loops added to +test-files/simple.cpp, try to figure out which ones will be considered +potentially convertible. diff --git a/docs/PTHInternals.html b/docs/PTHInternals.html deleted file mode 100644 index b15f681606..0000000000 --- a/docs/PTHInternals.html +++ /dev/null @@ -1,179 +0,0 @@ - - - - Pretokenized Headers (PTH) - - - - - - - - -
- -

Pretokenized Headers (PTH)

- -

This document first describes the low-level -interface for using PTH and then briefly elaborates on its design and -implementation. If you are interested in the end-user view, please see the -User's Manual.

- - -

Using Pretokenized Headers with clang (Low-level Interface)

- -

The Clang compiler frontend, clang -cc1, supports three command line -options for generating and using PTH files.

- -

To generate PTH files using clang -cc1, use the option --emit-pth: - -

 $ clang -cc1 test.h -emit-pth -o test.h.pth 
- -

This option is transparently used by clang when generating PTH -files. Similarly, PTH files can be used as prefix headers using the --include-pth option:

- -
-  $ clang -cc1 -include-pth test.h.pth test.c -o test.s
-
- -

Alternatively, Clang's PTH files can be used as a raw "token-cache" -(or "content" cache) of the source included by the original header -file. This means that the contents of the PTH file are searched as substitutes -for any source files that are used by clang -cc1 to process a -source file. This is done by specifying the -token-cache -option:

- -
-  $ cat test.h
-  #include <stdio.h>
-  $ clang -cc1 -emit-pth test.h -o test.h.pth
-  $ cat test.c
-  #include "test.h"
-  $ clang -cc1 test.c -o test -token-cache test.h.pth
-
- -

In this example the contents of stdio.h (and the files it includes) -will be retrieved from test.h.pth, as the PTH file is being used in -this case as a raw cache of the contents of test.h. This is a low-level -interface used to both implement the high-level PTH interface as well as to -provide alternative means to use PTH-style caching.

- -

PTH Design and Implementation

- -

Unlike GCC's precompiled headers, which cache the full ASTs and preprocessor -state of a header file, Clang's pretokenized header files mainly cache the raw -lexer tokens that are needed to segment the stream of characters in a -source file into keywords, identifiers, and operators. Consequently, PTH serves -to mainly directly speed up the lexing and preprocessing of a source file, while -parsing and type-checking must be completely redone every time a PTH file is -used.

- -

Basic Design Tradeoffs

- -

In the long term there are plans to provide an alternate PCH implementation -for Clang that also caches the work for parsing and type checking the contents -of header files. The current implementation of PCH in Clang as pretokenized -header files was motivated by the following factors:

- -

    - -
  • Language independence: PTH files work with any language that -Clang's lexer can handle, including C, Objective-C, and (in the early stages) -C++. This means development on language features at the parsing level or above -(which is basically almost all interesting pieces) does not require PTH to be -modified.

  • - -
  • Simple design: Relatively speaking, PTH has a simple design and -implementation, making it easy to test. Further, because the machinery for PTH -resides at the lower-levels of the Clang library stack it is fairly -straightforward to profile and optimize.
  • -
- -

Further, compared to GCC's PCH implementation (which is the dominate -precompiled header file implementation that Clang can be directly compared -against) the PTH design in Clang yields several attractive features:

- -
    - -
  • Architecture independence: In contrast to GCC's PCH files (and -those of several other compilers), Clang's PTH files are architecture -independent, requiring only a single PTH file when building an program for -multiple architectures.

    - -

    For example, on Mac OS X one may wish to -compile a "universal binary" that runs on PowerPC, 32-bit Intel -(i386), and 64-bit Intel architectures. In contrast, GCC requires a PCH file for -each architecture, as the definitions of types in the AST are -architecture-specific. Since a Clang PTH file essentially represents a lexical -cache of header files, a single PTH file can be safely used when compiling for -multiple architectures. This can also reduce compile times because only a single -PTH file needs to be generated during a build instead of several.

  • - -
  • Reduced memory pressure: Similar to GCC, -Clang reads PTH files via the use of memory mapping (i.e., mmap). -Clang, however, memory maps PTH files as read-only, meaning that multiple -invocations of clang -cc1 can share the same pages in memory from a -memory-mapped PTH file. In comparison, GCC also memory maps its PCH files but -also modifies those pages in memory, incurring the copy-on-write costs. The -read-only nature of PTH can greatly reduce memory pressure for builds involving -multiple cores, thus improving overall scalability.

  • - -
  • Fast generation: PTH files can be generated in a small fraction -of the time needed to generate GCC's PCH files. Since PTH/PCH generation is a -serial operation that typically blocks progress during a build, faster -generation time leads to improved processor utilization with parallel builds on -multicore machines.

  • - -
- -

Despite these strengths, PTH's simple design suffers some algorithmic -handicaps compared to other PCH strategies such as those used by GCC. While PTH -can greatly speed up the processing time of a header file, the amount of work -required to process a header file is still roughly linear in the size of the -header file. In contrast, the amount of work done by GCC to process a -precompiled header is (theoretically) constant (the ASTs for the header are -literally memory mapped into the compiler). This means that only the pieces of -the header file that are referenced by the source file including the header are -the only ones the compiler needs to process during actual compilation. While -GCC's particular implementation of PCH mitigates some of these algorithmic -strengths via the use of copy-on-write pages, the approach itself can -fundamentally dominate at an algorithmic level, especially when one considers -header files of arbitrary size.

- -

There are plans to potentially implement an complementary PCH implementation -for Clang based on the lazy deserialization of ASTs. This approach would -theoretically have the same constant-time algorithmic advantages just mentioned -but would also retain some of the strengths of PTH such as reduced memory -pressure (ideal for multi-core builds).

- -

Internal PTH Optimizations

- -

While the main optimization employed by PTH is to reduce lexing time of -header files by caching pre-lexed tokens, PTH also employs several other -optimizations to speed up the processing of header files:

- -
    - -
  • stat caching: PTH files cache information obtained via -calls to stat that clang -cc1 uses to resolve which files are -included by #include directives. This greatly reduces the overhead -involved in context-switching to the kernel to resolve included files.

  • - -
  • Fasting skipping of #ifdef...#endif chains: -PTH files record the basic structure of nested preprocessor blocks. When the -condition of the preprocessor block is false, all of its tokens are immediately -skipped instead of requiring them to be handled by Clang's -preprocessor.

  • - -
- -
- - diff --git a/docs/PTHInternals.rst b/docs/PTHInternals.rst new file mode 100644 index 0000000000..d37b76736b --- /dev/null +++ b/docs/PTHInternals.rst @@ -0,0 +1,164 @@ +========================== +Pretokenized Headers (PTH) +========================== + +This document first describes the low-level interface for using PTH and +then briefly elaborates on its design and implementation. If you are +interested in the end-user view, please see the `User's +Manual `_. + +Using Pretokenized Headers with ``clang`` (Low-level Interface) +=============================================================== + +The Clang compiler frontend, ``clang -cc1``, supports three command line +options for generating and using PTH files. + +To generate PTH files using ``clang -cc1``, use the option +``-emit-pth``: + +:: + + $ clang -cc1 test.h -emit-pth -o test.h.pth + +This option is transparently used by ``clang`` when generating PTH +files. Similarly, PTH files can be used as prefix headers using the +``-include-pth`` option: + +:: + + $ clang -cc1 -include-pth test.h.pth test.c -o test.s + +Alternatively, Clang's PTH files can be used as a raw "token-cache" (or +"content" cache) of the source included by the original header file. +This means that the contents of the PTH file are searched as substitutes +for *any* source files that are used by ``clang -cc1`` to process a +source file. This is done by specifying the ``-token-cache`` option: + +:: + + $ cat test.h + #include + $ clang -cc1 -emit-pth test.h -o test.h.pth + $ cat test.c + #include "test.h" + $ clang -cc1 test.c -o test -token-cache test.h.pth + +In this example the contents of ``stdio.h`` (and the files it includes) +will be retrieved from ``test.h.pth``, as the PTH file is being used in +this case as a raw cache of the contents of ``test.h``. This is a +low-level interface used to both implement the high-level PTH interface +as well as to provide alternative means to use PTH-style caching. + +PTH Design and Implementation +============================= + +Unlike GCC's precompiled headers, which cache the full ASTs and +preprocessor state of a header file, Clang's pretokenized header files +mainly cache the raw lexer *tokens* that are needed to segment the +stream of characters in a source file into keywords, identifiers, and +operators. Consequently, PTH serves to mainly directly speed up the +lexing and preprocessing of a source file, while parsing and +type-checking must be completely redone every time a PTH file is used. + +Basic Design Tradeoffs +~~~~~~~~~~~~~~~~~~~~~~ + +In the long term there are plans to provide an alternate PCH +implementation for Clang that also caches the work for parsing and type +checking the contents of header files. The current implementation of PCH +in Clang as pretokenized header files was motivated by the following +factors: + +**Language independence** + PTH files work with any language that + Clang's lexer can handle, including C, Objective-C, and (in the early + stages) C++. This means development on language features at the + parsing level or above (which is basically almost all interesting + pieces) does not require PTH to be modified. + +**Simple design** + Relatively speaking, PTH has a simple design and + implementation, making it easy to test. Further, because the + machinery for PTH resides at the lower-levels of the Clang library + stack it is fairly straightforward to profile and optimize. + +Further, compared to GCC's PCH implementation (which is the dominate +precompiled header file implementation that Clang can be directly +compared against) the PTH design in Clang yields several attractive +features: + +**Architecture independence** + In contrast to GCC's PCH files (and + those of several other compilers), Clang's PTH files are architecture + independent, requiring only a single PTH file when building an + program for multiple architectures. + + For example, on Mac OS X one may wish to compile a "universal binary" + that runs on PowerPC, 32-bit Intel (i386), and 64-bit Intel + architectures. In contrast, GCC requires a PCH file for each + architecture, as the definitions of types in the AST are + architecture-specific. Since a Clang PTH file essentially represents + a lexical cache of header files, a single PTH file can be safely used + when compiling for multiple architectures. This can also reduce + compile times because only a single PTH file needs to be generated + during a build instead of several. + +**Reduced memory pressure** + Similar to GCC, Clang reads PTH files + via the use of memory mapping (i.e., ``mmap``). Clang, however, + memory maps PTH files as read-only, meaning that multiple invocations + of ``clang -cc1`` can share the same pages in memory from a + memory-mapped PTH file. In comparison, GCC also memory maps its PCH + files but also modifies those pages in memory, incurring the + copy-on-write costs. The read-only nature of PTH can greatly reduce + memory pressure for builds involving multiple cores, thus improving + overall scalability. + +**Fast generation** + PTH files can be generated in a small fraction + of the time needed to generate GCC's PCH files. Since PTH/PCH + generation is a serial operation that typically blocks progress + during a build, faster generation time leads to improved processor + utilization with parallel builds on multicore machines. + +Despite these strengths, PTH's simple design suffers some algorithmic +handicaps compared to other PCH strategies such as those used by GCC. +While PTH can greatly speed up the processing time of a header file, the +amount of work required to process a header file is still roughly linear +in the size of the header file. In contrast, the amount of work done by +GCC to process a precompiled header is (theoretically) constant (the +ASTs for the header are literally memory mapped into the compiler). This +means that only the pieces of the header file that are referenced by the +source file including the header are the only ones the compiler needs to +process during actual compilation. While GCC's particular implementation +of PCH mitigates some of these algorithmic strengths via the use of +copy-on-write pages, the approach itself can fundamentally dominate at +an algorithmic level, especially when one considers header files of +arbitrary size. + +There are plans to potentially implement an complementary PCH +implementation for Clang based on the lazy deserialization of ASTs. This +approach would theoretically have the same constant-time algorithmic +advantages just mentioned but would also retain some of the strengths of +PTH such as reduced memory pressure (ideal for multi-core builds). + +Internal PTH Optimizations +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +While the main optimization employed by PTH is to reduce lexing time of +header files by caching pre-lexed tokens, PTH also employs several other +optimizations to speed up the processing of header files: + +- ``stat`` caching: PTH files cache information obtained via calls to + ``stat`` that ``clang -cc1`` uses to resolve which files are included + by ``#include`` directives. This greatly reduces the overhead + involved in context-switching to the kernel to resolve included + files. + +- Fasting skipping of ``#ifdef``... ``#endif`` chains: PTH files + record the basic structure of nested preprocessor blocks. When the + condition of the preprocessor block is false, all of its tokens are + immediately skipped instead of requiring them to be handled by + Clang's preprocessor. + + diff --git a/docs/RAVFrontendAction.html b/docs/RAVFrontendAction.html deleted file mode 100644 index b30cd573d9..0000000000 --- a/docs/RAVFrontendAction.html +++ /dev/null @@ -1,224 +0,0 @@ - - - -How to write RecursiveASTVisitor based ASTFrontendActions. - - - - - - - -
- -

How to write RecursiveASTVisitor based ASTFrontendActions.

- - -

Introduction

- - -In this tutorial you will learn how to create a FrontendAction that uses -a RecursiveASTVisitor to find CXXRecordDecl AST nodes with a specified name. - - -

Creating a FrontendAction

- - -

When writing a clang based tool like a Clang Plugin or a standalone tool -based on LibTooling, the common entry point is the FrontendAction. -FrontendAction is an interface that allows execution of user specific actions -as part of the compilation. To run tools over the AST clang provides the -convenience interface ASTFrontendAction, which takes care of executing the -action. The only part left is to implement the CreateASTConsumer method that -returns an ASTConsumer per translation unit.

-
-  class FindNamedClassAction : public clang::ASTFrontendAction {
-  public:
-    virtual clang::ASTConsumer *CreateASTConsumer(
-      clang::CompilerInstance &Compiler, llvm::StringRef InFile) {
-      return new FindNamedClassConsumer;
-    }
-  };
-
- - -

Creating an ASTConsumer

- - -

ASTConsumer is an interface used to write generic actions on an AST, -regardless of how the AST was produced. ASTConsumer provides many different -entry points, but for our use case the only one needed is HandleTranslationUnit, -which is called with the ASTContext for the translation unit.

-
-  class FindNamedClassConsumer : public clang::ASTConsumer {
-  public:
-    virtual void HandleTranslationUnit(clang::ASTContext &Context) {
-      // Traversing the translation unit decl via a RecursiveASTVisitor
-      // will visit all nodes in the AST.
-      Visitor.TraverseDecl(Context.getTranslationUnitDecl());
-    }
-  private:
-    // A RecursiveASTVisitor implementation.
-    FindNamedClassVisitor Visitor;
-  };
-
- - -

Using the RecursiveASTVisitor

- - -

Now that everything is hooked up, the next step is to implement a -RecursiveASTVisitor to extract the relevant information from the AST.

-

The RecursiveASTVisitor provides hooks of the form -bool VisitNodeType(NodeType *) for most AST nodes; the exception are TypeLoc -nodes, which are passed by-value. We only need to implement the methods for the -relevant node types. -

-

Let's start by writing a RecursiveASTVisitor that visits all CXXRecordDecl's. -

-  class FindNamedClassVisitor
-    : public RecursiveASTVisitor<FindNamedClassVisitor> {
-  public:
-    bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
-      // For debugging, dumping the AST nodes will show which nodes are already
-      // being visited.
-      Declaration->dump();
-
-      // The return value indicates whether we want the visitation to proceed.
-      // Return false to stop the traversal of the AST.
-      return true;
-    }
-  };
-
-

-

In the methods of our RecursiveASTVisitor we can now use the full power of -the Clang AST to drill through to the parts that are interesting for us. For -example, to find all class declaration with a certain name, we can check for a -specific qualified name: -

-  bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
-    if (Declaration->getQualifiedNameAsString() == "n::m::C")
-      Declaration->dump();
-    return true;
-  }
-
-

- - -

Accessing the SourceManager and ASTContext

- - -

Some of the information about the AST, like source locations and global -identifier information, are not stored in the AST nodes themselves, but in -the ASTContext and its associated source manager. To retrieve them we need to -hand the ASTContext into our RecursiveASTVisitor implementation.

-

The ASTContext is available from the CompilerInstance during the call -to CreateASTConsumer. We can thus extract it there and hand it into our -freshly created FindNamedClassConsumer:

-
-  virtual clang::ASTConsumer *CreateASTConsumer(
-    clang::CompilerInstance &Compiler, llvm::StringRef InFile) {
-    return new FindNamedClassConsumer(&Compiler.getASTContext());
-  }
-
- -

Now that the ASTContext is available in the RecursiveASTVisitor, we can do -more interesting things with AST nodes, like looking up their source -locations:

-
-  bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
-    if (Declaration->getQualifiedNameAsString() == "n::m::C") {
-      // getFullLoc uses the ASTContext's SourceManager to resolve the source
-      // location and break it up into its line and column parts.
-      FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart());
-      if (FullLocation.isValid())
-        llvm::outs() << "Found declaration at "
-                     << FullLocation.getSpellingLineNumber() << ":"
-                     << FullLocation.getSpellingColumnNumber() << "\n";
-    }
-    return true;
-  }
-
- - -

Putting it all together

- - -

Now we can combine all of the above into a small example program:

-
-  #include "clang/AST/ASTConsumer.h"
-  #include "clang/AST/RecursiveASTVisitor.h"
-  #include "clang/Frontend/CompilerInstance.h"
-  #include "clang/Frontend/FrontendAction.h"
-  #include "clang/Tooling/Tooling.h"
-
-  using namespace clang;
-
-  class FindNamedClassVisitor
-    : public RecursiveASTVisitor<FindNamedClassVisitor> {
-  public:
-    explicit FindNamedClassVisitor(ASTContext *Context)
-      : Context(Context) {}
-
-    bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
-      if (Declaration->getQualifiedNameAsString() == "n::m::C") {
-        FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart());
-        if (FullLocation.isValid())
-          llvm::outs() << "Found declaration at "
-                       << FullLocation.getSpellingLineNumber() << ":"
-                       << FullLocation.getSpellingColumnNumber() << "\n";
-      }
-      return true;
-    }
-
-  private:
-    ASTContext *Context;
-  };
-
-  class FindNamedClassConsumer : public clang::ASTConsumer {
-  public:
-    explicit FindNamedClassConsumer(ASTContext *Context)
-      : Visitor(Context) {}
-
-    virtual void HandleTranslationUnit(clang::ASTContext &Context) {
-      Visitor.TraverseDecl(Context.getTranslationUnitDecl());
-    }
-  private:
-    FindNamedClassVisitor Visitor;
-  };
-
-  class FindNamedClassAction : public clang::ASTFrontendAction {
-  public:
-    virtual clang::ASTConsumer *CreateASTConsumer(
-      clang::CompilerInstance &Compiler, llvm::StringRef InFile) {
-      return new FindNamedClassConsumer(&Compiler.getASTContext());
-    }
-  };
-
-  int main(int argc, char **argv) {
-    if (argc > 1) {
-      clang::tooling::runToolOnCode(new FindNamedClassAction, argv[1]);
-    }
-  }
-
- -

We store this into a file called FindClassDecls.cpp and create the following -CMakeLists.txt to link it:

-
-set(LLVM_USED_LIBS clangTooling)
-
-add_clang_executable(find-class-decls FindClassDecls.cpp)
-
- -

When running this tool over a small code snippet it will output all -declarations of a class n::m::C it found:

-
-  $ ./bin/find-class-decls "namespace n { namespace m { class C {}; } }"
-  Found declaration at 1:29
-
- -
- - - diff --git a/docs/RAVFrontendAction.rst b/docs/RAVFrontendAction.rst new file mode 100644 index 0000000000..2f60ce9e82 --- /dev/null +++ b/docs/RAVFrontendAction.rst @@ -0,0 +1,216 @@ +========================================================== +How to write RecursiveASTVisitor based ASTFrontendActions. +========================================================== + +Introduction +============ + +In this tutorial you will learn how to create a FrontendAction that uses +a RecursiveASTVisitor to find CXXRecordDecl AST nodes with a specified +name. + +Creating a FrontendAction +========================= + +When writing a clang based tool like a Clang Plugin or a standalone tool +based on LibTooling, the common entry point is the FrontendAction. +FrontendAction is an interface that allows execution of user specific +actions as part of the compilation. To run tools over the AST clang +provides the convenience interface ASTFrontendAction, which takes care +of executing the action. The only part left is to implement the +CreateASTConsumer method that returns an ASTConsumer per translation +unit. + +:: + + class FindNamedClassAction : public clang::ASTFrontendAction { + public: + virtual clang::ASTConsumer *CreateASTConsumer( + clang::CompilerInstance &Compiler, llvm::StringRef InFile) { + return new FindNamedClassConsumer; + } + }; + +Creating an ASTConsumer +======================= + +ASTConsumer is an interface used to write generic actions on an AST, +regardless of how the AST was produced. ASTConsumer provides many +different entry points, but for our use case the only one needed is +HandleTranslationUnit, which is called with the ASTContext for the +translation unit. + +:: + + class FindNamedClassConsumer : public clang::ASTConsumer { + public: + virtual void HandleTranslationUnit(clang::ASTContext &Context) { + // Traversing the translation unit decl via a RecursiveASTVisitor + // will visit all nodes in the AST. + Visitor.TraverseDecl(Context.getTranslationUnitDecl()); + } + private: + // A RecursiveASTVisitor implementation. + FindNamedClassVisitor Visitor; + }; + +Using the RecursiveASTVisitor +============================= + +Now that everything is hooked up, the next step is to implement a +RecursiveASTVisitor to extract the relevant information from the AST. + +The RecursiveASTVisitor provides hooks of the form bool +VisitNodeType(NodeType \*) for most AST nodes; the exception are TypeLoc +nodes, which are passed by-value. We only need to implement the methods +for the relevant node types. + +Let's start by writing a RecursiveASTVisitor that visits all +CXXRecordDecl's. + +:: + + class FindNamedClassVisitor + : public RecursiveASTVisitor { + public: + bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) { + // For debugging, dumping the AST nodes will show which nodes are already + // being visited. + Declaration->dump(); + + // The return value indicates whether we want the visitation to proceed. + // Return false to stop the traversal of the AST. + return true; + } + }; + +In the methods of our RecursiveASTVisitor we can now use the full power +of the Clang AST to drill through to the parts that are interesting for +us. For example, to find all class declaration with a certain name, we +can check for a specific qualified name: + +:: + + bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) { + if (Declaration->getQualifiedNameAsString() == "n::m::C") + Declaration->dump(); + return true; + } + +Accessing the SourceManager and ASTContext +========================================== + +Some of the information about the AST, like source locations and global +identifier information, are not stored in the AST nodes themselves, but +in the ASTContext and its associated source manager. To retrieve them we +need to hand the ASTContext into our RecursiveASTVisitor implementation. + +The ASTContext is available from the CompilerInstance during the call to +CreateASTConsumer. We can thus extract it there and hand it into our +freshly created FindNamedClassConsumer: + +:: + + virtual clang::ASTConsumer *CreateASTConsumer( + clang::CompilerInstance &Compiler, llvm::StringRef InFile) { + return new FindNamedClassConsumer(&Compiler.getASTContext()); + } + +Now that the ASTContext is available in the RecursiveASTVisitor, we can +do more interesting things with AST nodes, like looking up their source +locations: + +:: + + bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) { + if (Declaration->getQualifiedNameAsString() == "n::m::C") { + // getFullLoc uses the ASTContext's SourceManager to resolve the source + // location and break it up into its line and column parts. + FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart()); + if (FullLocation.isValid()) + llvm::outs() << "Found declaration at " + << FullLocation.getSpellingLineNumber() << ":" + << FullLocation.getSpellingColumnNumber() << "\n"; + } + return true; + } + +Putting it all together +======================= + +Now we can combine all of the above into a small example program: + +:: + + #include "clang/AST/ASTConsumer.h" + #include "clang/AST/RecursiveASTVisitor.h" + #include "clang/Frontend/CompilerInstance.h" + #include "clang/Frontend/FrontendAction.h" + #include "clang/Tooling/Tooling.h" + + using namespace clang; + + class FindNamedClassVisitor + : public RecursiveASTVisitor { + public: + explicit FindNamedClassVisitor(ASTContext *Context) + : Context(Context) {} + + bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) { + if (Declaration->getQualifiedNameAsString() == "n::m::C") { + FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart()); + if (FullLocation.isValid()) + llvm::outs() << "Found declaration at " + << FullLocation.getSpellingLineNumber() << ":" + << FullLocation.getSpellingColumnNumber() << "\n"; + } + return true; + } + + private: + ASTContext *Context; + }; + + class FindNamedClassConsumer : public clang::ASTConsumer { + public: + explicit FindNamedClassConsumer(ASTContext *Context) + : Visitor(Context) {} + + virtual void HandleTranslationUnit(clang::ASTContext &Context) { + Visitor.TraverseDecl(Context.getTranslationUnitDecl()); + } + private: + FindNamedClassVisitor Visitor; + }; + + class FindNamedClassAction : public clang::ASTFrontendAction { + public: + virtual clang::ASTConsumer *CreateASTConsumer( + clang::CompilerInstance &Compiler, llvm::StringRef InFile) { + return new FindNamedClassConsumer(&Compiler.getASTContext()); + } + }; + + int main(int argc, char **argv) { + if (argc > 1) { + clang::tooling::runToolOnCode(new FindNamedClassAction, argv[1]); + } + } + +We store this into a file called FindClassDecls.cpp and create the +following CMakeLists.txt to link it: + +:: + + set(LLVM_USED_LIBS clangTooling) + + add_clang_executable(find-class-decls FindClassDecls.cpp) + +When running this tool over a small code snippet it will output all +declarations of a class n::m::C it found: + +:: + + $ ./bin/find-class-decls "namespace n { namespace m { class C {}; } }" + Found declaration at 1:29 + diff --git a/docs/UsersManual.html b/docs/UsersManual.html deleted file mode 100644 index 4adf0b4dad..0000000000 --- a/docs/UsersManual.html +++ /dev/null @@ -1,1338 +0,0 @@ - - - -Clang Compiler User's Manual - - - - - - - - -
- -

Clang Compiler User's Manual

- - - - - -

Introduction

- - -

The Clang Compiler is an open-source compiler for the C family of programming -languages, aiming to be the best in class implementation of these languages. -Clang builds on the LLVM optimizer and code generator, allowing it to provide -high-quality optimization and code generation support for many targets. For -more general information, please see the Clang -Web Site or the LLVM Web Site.

- -

This document describes important notes about using Clang as a compiler for -an end-user, documenting the supported features, command line options, etc. If -you are interested in using Clang to build a tool that processes code, please -see the Clang Internals Manual. If you are -interested in the Clang -Static Analyzer, please see its web page.

- -

Clang is designed to support the C family of programming languages, which -includes C, Objective-C, C++, and Objective-C++ as well as many -dialects of those. For language-specific information, please see the -corresponding language specific section:

- - - -

In addition to these base languages and their dialects, Clang supports a -broad variety of language extensions, which are documented in the corresponding -language section. These extensions are provided to be compatible with the GCC, -Microsoft, and other popular compilers as well as to improve functionality -through Clang-specific features. The Clang driver and language features are -intentionally designed to be as compatible with the GNU GCC compiler as -reasonably possible, easing migration from GCC to Clang. In most cases, code -"just works".

- -

In addition to language specific features, Clang has a variety of features -that depend on what CPU architecture or operating system is being compiled for. -Please see the Target-Specific Features and -Limitations section for more details.

- -

The rest of the introduction introduces some basic compiler terminology that is used throughout this manual -and contains a basic introduction to using Clang -as a command line compiler.

- - -

Terminology

- - -

Front end, parser, backend, preprocessor, undefined behavior, diagnostic, - optimizer

- - -

Basic Usage

- - -

Intro to how to use a C compiler for newbies.

-

-compile + link - -compile then link - -debug info - -enabling optimizations - -picking a language to use, defaults to C99 by default. Autosenses based on -extension. - -using a makefile -

- - - -

Command Line Options

- - -

-This section is generally an index into other sections. It does not go into -depth on the ones that are covered by other sections. However, the first part -introduces the language selection and other high level options like -c, -g, etc. -

- - - -

Options to Control Error and Warning Messages

- - -

-Werror: Turn warnings into errors.

-

-Werror=foo: Turn warning "foo" into an error.

-

-Wno-error=foo: Turn warning "foo" into an warning even if -Werror is - specified.

-

-Wfoo: Enable warning "foo".

-

-Wno-foo: Disable warning "foo".

-

-w: Disable all warnings.

-

-Weverything: Enable all warnings.

-

-pedantic: Warn on language extensions.

-

-pedantic-errors: Error on language extensions.

-

-Wsystem-headers: Enable warnings from system headers.

- -

-ferror-limit=123: Stop emitting diagnostics after 123 errors have - been produced. The default is 20, and the error limit can be disabled with - -ferror-limit=0.

- -

-ftemplate-backtrace-limit=123: Only emit up to 123 template instantiation notes within the template instantiation backtrace for a single warning or error. The default is 10, and the limit can be disabled with -ftemplate-backtrace-limit=0.

- - -

Formatting of Diagnostics

- - -

Clang aims to produce beautiful diagnostics by default, particularly for new -users that first come to Clang. However, different people have different -preferences, and sometimes Clang is driven by another program that wants to -parse simple and consistent output, not a person. For these cases, Clang -provides a wide range of options to control the exact output format of the -diagnostics that it generates.

- -
- - -
-f[no-]show-column: Print column number in -diagnostic.
-
This option, which defaults to on, controls whether or not Clang prints the -column number of a diagnostic. For example, when this is enabled, Clang will -print something like: - -
-  test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens]
-  #endif bad
-         ^
-         //
-
- -

When this is disabled, Clang will print "test.c:28: warning..." with no -column number.

- -

The printed column numbers count bytes from the beginning of the line; take -care if your source contains multibyte characters.

-
- - -
-f[no-]show-source-location: Print -source file/line/column information in diagnostic.
-
This option, which defaults to on, controls whether or not Clang prints the -filename, line number and column number of a diagnostic. For example, -when this is enabled, Clang will print something like: - -
-  test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens]
-  #endif bad
-         ^
-         //
-
- -

When this is disabled, Clang will not print the "test.c:28:8: " part.

-
- - -
-f[no-]caret-diagnostics: Print source -line and ranges from source code in diagnostic.
-
This option, which defaults to on, controls whether or not Clang prints the -source line, source ranges, and caret when emitting a diagnostic. For example, -when this is enabled, Clang will print something like: - -
-  test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens]
-  #endif bad
-         ^
-         //
-
-
- -
-f[no-]color-diagnostics:
-
This option, which defaults to on when a color-capable terminal is - detected, controls whether or not Clang prints diagnostics in color. - When this option is enabled, Clang will use colors to highlight - specific parts of the diagnostic, e.g., -
-  test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens]
-  #endif bad
-         ^
-         //
-
- -

When this is disabled, Clang will just print:

- -
-  test.c:2:8: warning: extra tokens at end of #endif directive [-Wextra-tokens]
-  #endif bad
-         ^
-         //
-
-
- -
-fdiagnostics-format=clang/msvc/vi: -Changes diagnostic output format to better match IDEs and command line tools.
-
This option controls the output format of the filename, line number, and column printed in diagnostic messages. The options, and their affect on formatting a simple conversion diagnostic, follow: - -
-
clang (default)
-
-
t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int'
-
- -
msvc
-
-
t.c(3,11) : warning: conversion specifies type 'char *' but the argument has type 'int'
-
- -
vi
-
-
t.c +3:11: warning: conversion specifies type 'char *' but the argument has type 'int'
-
-
-
- - -
-f[no-]diagnostics-show-name: -Enable the display of the diagnostic name.
-
This option, which defaults to off, controls whether or not -Clang prints the associated name.

- -
-f[no-]diagnostics-show-option: -Enable [-Woption] information in diagnostic line.
-
This option, which defaults to on, -controls whether or not Clang prints the associated warning group option name when outputting -a warning diagnostic. For example, in this output: - -
-  test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens]
-  #endif bad
-         ^
-         //
-
- -

Passing -fno-diagnostics-show-option will prevent Clang from printing -the [-Wextra-tokens] information in the -diagnostic. This information tells you the flag needed to enable or disable the -diagnostic, either from the command line or through #pragma GCC diagnostic.

- - -
-fdiagnostics-show-category=none/id/name: -Enable printing category information in diagnostic line.
-
This option, which defaults to "none", -controls whether or not Clang prints the category associated with a diagnostic -when emitting it. Each diagnostic may or many not have an associated category, -if it has one, it is listed in the diagnostic categorization field of the -diagnostic line (in the []'s). - -

For example, a format string warning will produce these three renditions -based on the setting of this option:

- -
-  t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int' [-Wformat]
-  t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int' [-Wformat,1]
-  t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int' [-Wformat,Format String]
-
- -

This category can be used by clients that want to group diagnostics by -category, so it should be a high level category. We want dozens of these, not -hundreds or thousands of them.

-
- - - - -
-f[no-]diagnostics-fixit-info: -Enable "FixIt" information in the diagnostics output.
-
This option, which defaults to on, controls whether or not Clang prints the -information on how to fix a specific diagnostic underneath it when it knows. -For example, in this output: - -
-  test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens]
-  #endif bad
-         ^
-         //
-
- -

Passing -fno-diagnostics-fixit-info will prevent Clang from printing -the "//" line at the end of the message. This information is useful for users -who may not understand what is wrong, but can be confusing for machine -parsing.

-
- - -
--f[no-]diagnostics-print-source-range-info: -Print machine parsable information about source ranges.
-
This option, which defaults to off, controls whether or not Clang prints -information about source ranges in a machine parsable format after the -file/line/column number information. The information is a simple sequence of -brace enclosed ranges, where each range lists the start and end line/column -locations. For example, in this output: - -
-exprs.c:47:15:{47:8-47:14}{47:17-47:24}: error: invalid operands to binary expression ('int *' and '_Complex float')
-   P = (P-42) + Gamma*4;
-       ~~~~~~ ^ ~~~~~~~
-
- -

The {}'s are generated by -fdiagnostics-print-source-range-info.

- -

The printed column numbers count bytes from the beginning of the line; take -care if your source contains multibyte characters.

-
- - -
--fdiagnostics-parseable-fixits: -Print Fix-Its in a machine parseable form.
-

This option makes Clang print available Fix-Its in a machine parseable format at the end of diagnostics. The following example illustrates the format:

- -
- fix-it:"t.cpp":{7:25-7:29}:"Gamma"
-
- -

The range printed is a half-open range, so in this example the characters at -column 25 up to but not including column 29 on line 7 in t.cpp should be -replaced with the string "Gamma". Either the range or the replacement -string may be empty (representing strict insertions and strict erasures, -respectively). Both the file name and the insertion string escape backslash (as -"\\"), tabs (as "\t"), newlines (as "\n"), double -quotes(as "\"") and non-printable characters (as octal -"\xxx").

- -

The printed column numbers count bytes from the beginning of the line; take -care if your source contains multibyte characters.

-
- -
--fno-elide-type: -Turns off elision in template type printing.
-

The default for template type printing is to elide as many template -arguments as possible, removing those which are the same in both template types, -leaving only the differences. Adding this flag will print all the template -arguments. If supported by the terminal, highlighting will still appear on -differing arguments.

- -Default: -
-t.cc:4:5: note: candidate function not viable: no known conversion from 'vector<map<[...], map<float, [...]>>>' to 'vector<map<[...], map<double, [...]>>>' for 1st argument;
-
--fno-elide-type: -
-t.cc:4:5: note: candidate function not viable: no known conversion from 'vector<map<int, map<float, int>>>' to 'vector<map<int, map<double, int>>>' for 1st argument;
-
-
- -
--fdiagnostics-show-template-tree: -Template type diffing prints a text tree.
-

For diffing large templated types, this option will cause Clang to -display the templates as an indented text tree, one argument per line, with -differences marked inline. This is compatible with -fno-elide-type.

- -Default: -
-t.cc:4:5: note: candidate function not viable: no known conversion from 'vector<map<[...], map<float, [...]>>>' to 'vector<map<[...], map<double, [...]>>>' for 1st argument;
-
--fdiagnostics-show-template-tree -
-t.cc:4:5: note: candidate function not viable: no known conversion for 1st argument;
-  vector<
-    map<
-      [...],
-      map<
-        [float != float],
-        [...]>>>
-
-
- -
- - - - -

Individual Warning Groups

- - -

TODO: Generate this from tblgen. Define one anchor per warning group.

- - -
- - - -
-Wextra-tokens: Warn about excess tokens at - the end of a preprocessor directive.
-
This option, which defaults to on, enables warnings about extra tokens at -the end of preprocessor directives. For example: - -
-  test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens]
-  #endif bad
-         ^
-
- -

These extra tokens are not strictly conforming, and are usually best handled -by commenting them out.

-
- - -
-Wambiguous-member-template: -Warn about unqualified uses of a member template whose name resolves -to another template at the location of the use.
-
This option, which defaults to on, enables a warning in the -following code: - -
-template<typename T> struct set{};
-template<typename T> struct trait { typedef const T& type; };
-struct Value {
-  template<typename T> void set(typename trait<T>::type value) {}
-};
-void foo() {
-  Value v;
-  v.set<double>(3.2);
-}
-
- -

C++ [basic.lookup.classref] requires this to be an error, but, -because it's hard to work around, Clang downgrades it to a warning as -an extension.

-
- - -
-Wbind-to-temporary-copy: Warn about -an unusable copy constructor when binding a reference to a temporary.
-
This option, which defaults to on, enables warnings about binding a -reference to a temporary when the temporary doesn't have a usable copy -constructor. For example: - -
-  struct NonCopyable {
-    NonCopyable();
-  private:
-    NonCopyable(const NonCopyable&);
-  };
-  void foo(const NonCopyable&);
-  void bar() {
-    foo(NonCopyable());  // Disallowed in C++98; allowed in C++11.
-  }
-
-
-  struct NonCopyable2 {
-    NonCopyable2();
-    NonCopyable2(NonCopyable2&);
-  };
-  void foo(const NonCopyable2&);
-  void bar() {
-    foo(NonCopyable2());  // Disallowed in C++98; allowed in C++11.
-  }
-
- -

Note that if NonCopyable2::NonCopyable2() has a default -argument whose instantiation produces a compile error, that error will -still be a hard error in C++98 mode even if this warning is turned -off.

- -
- -
- - -

Options to Control Clang Crash Diagnostics

- - -

As unbelievable as it may sound, Clang does crash from time to time. -Generally, this only occurs to those living on the -bleeding edge. Clang -goes to great lengths to assist you in filing a bug report. Specifically, Clang -generates preprocessed source file(s) and associated run script(s) upon a -crash. These files should be attached to a bug report to ease reproducibility -of the failure. Below are the command line options to control the crash -diagnostics. -

- -

-fno-crash-diagnostics: Disable auto-generation of preprocessed -source files during a clang crash.

- -

The -fno-crash-diagnostics flag can be helpful for speeding the process of -generating a delta reduced test case.

- - - -

Language and Target-Independent Features

- - - - -

Controlling Errors and Warnings

- - -

Clang provides a number of ways to control which code constructs cause it to -emit errors and warning messages, and how they are displayed to the console.

- -

Controlling How Clang Displays Diagnostics

- -

When Clang emits a diagnostic, it includes rich information in the output, -and gives you fine-grain control over which information is printed. Clang has -the ability to print this information, and these are the options that control -it:

- -
    -
  1. A file/line/column indicator that shows exactly where the diagnostic occurs - in your code [-fshow-column, -fshow-source-location].
  2. -
  3. A categorization of the diagnostic as a note, warning, error, or fatal - error.
  4. -
  5. A text string that describes what the problem is.
  6. -
  7. An option that indicates how to control the diagnostic (for diagnostics that - support it) [-fdiagnostics-show-option].
  8. -
  9. A high-level category for the - diagnostic for clients that want to group diagnostics by class (for - diagnostics that support it) [-fdiagnostics-show-category].
  10. -
  11. The line of source code that the issue occurs on, along with a caret and - ranges that indicate the important locations [-fcaret-diagnostics].
  12. -
  13. "FixIt" information, which is a concise explanation of how to fix the - problem (when Clang is certain it knows) [-fdiagnostics-fixit-info].
  14. -
  15. A machine-parsable representation of the ranges involved (off by - default) [-fdiagnostics-print-source-range-info].
  16. -
- -

For more information please see Formatting of -Diagnostics.

- - -

Diagnostic Mappings

- -

All diagnostics are mapped into one of these 5 classes:

- -
    -
  • Ignored
  • -
  • Note
  • -
  • Warning
  • -
  • Error
  • -
  • Fatal
  • -
- -

Diagnostic Categories

- -

Though not shown by default, diagnostics may each be associated with a - high-level category. This category is intended to make it possible to triage - builds that produce a large number of errors or warnings in a grouped way. -

- -

Categories are not shown by default, but they can be turned on with the --fdiagnostics-show-category option. -When set to "name", the category is printed textually in the diagnostic -output. When it is set to "id", a category number is printed. The -mapping of category names to category id's can be obtained by running 'clang - --print-diagnostic-categories'. -

- -

Controlling Diagnostics via Command Line - Flags

- -

TODO: -W flags, -pedantic, etc

- -

Controlling Diagnostics via Pragmas

- -

Clang can also control what diagnostics are enabled through the use of -pragmas in the source code. This is useful for turning off specific warnings -in a section of source code. Clang supports GCC's pragma for compatibility -with existing source code, as well as several extensions.

- -

The pragma may control any warning that can be used from the command line. -Warnings may be set to ignored, warning, error, or fatal. The following -example code will tell Clang or GCC to ignore the -Wall warnings:

- -
-#pragma GCC diagnostic ignored "-Wall"
-
- -

In addition to all of the functionality provided by GCC's pragma, Clang -also allows you to push and pop the current warning state. This is particularly -useful when writing a header file that will be compiled by other people, because -you don't know what warning flags they build with.

- -

In the below example --Wmultichar is ignored for only a single line of code, after which the -diagnostics return to whatever state had previously existed.

- -
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wmultichar"
-
-char b = 'df'; // no warning.
-
-#pragma clang diagnostic pop
-
- -

The push and pop pragmas will save and restore the full diagnostic state of -the compiler, regardless of how it was set. That means that it is possible to -use push and pop around GCC compatible diagnostics and Clang will push and pop -them appropriately, while GCC will ignore the pushes and pops as unknown -pragmas. It should be noted that while Clang supports the GCC pragma, Clang and -GCC do not support the exact same set of warnings, so even when using GCC -compatible #pragmas there is no guarantee that they will have identical behaviour -on both compilers.

- -

Controlling Diagnostics in System Headers

- -

Warnings are suppressed when they occur in system headers. By default, an -included file is treated as a system header if it is found in an include path -specified by -isystem, but this can be overridden in several ways.

- -

The system_header pragma can be used to mark the current file as -being a system header. No warnings will be produced from the location of the -pragma onwards within the same file.

- -
-char a = 'xy'; // warning
-
-#pragma clang system_header
-
-char b = 'ab'; // no warning
-
- -

The -isystem-prefix and -ino-system-prefix command-line -arguments can be used to override whether subsets of an include path are treated -as system headers. When the name in a #include directive is found -within a header search path and starts with a system prefix, the header is -treated as a system header. The last prefix on the command-line which matches -the specified header name takes precedence. For instance:

- -
-clang -Ifoo -isystem bar -isystem-prefix x/ -ino-system-prefix x/y/
-
- -

Here, #include "x/a.h" is treated as including a system header, even -if the header is found in foo, and #include "x/y/b.h" is -treated as not including a system header, even if the header is found in -bar. -

- -

A #include directive which finds a file relative to the current -directory is treated as including a system header if the including file is -treated as a system header.

- -

Enabling All Warnings

- -

In addition to the traditional -W flags, one can enable all - warnings by passing -Weverything. - This works as expected with -Werror, - and also includes the warnings from -pedantic.

- -

Note that when combined with -w (which disables all warnings), that - flag wins.

- -

Controlling Static Analyzer Diagnostics

- -

While not strictly part of the compiler, the diagnostics from Clang's static analyzer can also be influenced -by the user via changes to the source code. See the available -annotations and -the analyzer's -FAQ page for -more information. - - -

Precompiled Headers

- - -

Precompiled -headers are a general approach employed by many compilers to reduce -compilation time. The underlying motivation of the approach is that it is -common for the same (and often large) header files to be included by -multiple source files. Consequently, compile times can often be greatly improved -by caching some of the (redundant) work done by a compiler to process headers. -Precompiled header files, which represent one of many ways to implement -this optimization, are literally files that represent an on-disk cache that -contains the vital information necessary to reduce some of the work -needed to process a corresponding header file. While details of precompiled -headers vary between compilers, precompiled headers have been shown to be -highly effective at speeding up program compilation on systems with very large -system headers (e.g., Mac OS/X).

- -

Generating a PCH File

- -

To generate a PCH file using Clang, one invokes Clang with -the -x <language>-header option. This mirrors the -interface in GCC for generating PCH files:

- -
-  $ gcc -x c-header test.h -o test.h.gch
-  $ clang -x c-header test.h -o test.h.pch
-
- -

Using a PCH File

- -

A PCH file can then be used as a prefix header when a --include option is passed to clang:

- -
-  $ clang -include test.h test.c -o test
-
- -

The clang driver will first check if a PCH file for test.h -is available; if so, the contents of test.h (and the files it includes) -will be processed from the PCH file. Otherwise, Clang falls back to -directly processing the content of test.h. This mirrors the behavior of -GCC.

- -

NOTE: Clang does not automatically use PCH files -for headers that are directly included within a source file. For example:

- -
-  $ clang -x c-header test.h -o test.h.pch
-  $ cat test.c
-  #include "test.h"
-  $ clang test.c -o test
-
- -

In this example, clang will not automatically use the PCH file for -test.h since test.h was included directly in the source file -and not specified on the command line using -include.

- -

Relocatable PCH Files

-

It is sometimes necessary to build a precompiled header from headers that -are not yet in their final, installed locations. For example, one might build a -precompiled header within the build tree that is then meant to be installed -alongside the headers. Clang permits the creation of "relocatable" precompiled -headers, which are built with a given path (into the build directory) and can -later be used from an installed location.

- -

To build a relocatable precompiled header, place your headers into a -subdirectory whose structure mimics the installed location. For example, if you -want to build a precompiled header for the header mylib.h that -will be installed into /usr/include, create a subdirectory -build/usr/include and place the header mylib.h into -that subdirectory. If mylib.h depends on other headers, then -they can be stored within build/usr/include in a way that mimics -the installed location.

- -

Building a relocatable precompiled header requires two additional arguments. -First, pass the --relocatable-pch flag to indicate that the -resulting PCH file should be relocatable. Second, pass --isysroot /path/to/build, which makes all includes for your -library relative to the build directory. For example:

- -
-  # clang -x c-header --relocatable-pch -isysroot /path/to/build /path/to/build/mylib.h mylib.h.pch
-
- -

When loading the relocatable PCH file, the various headers used in the PCH -file are found from the system header root. For example, mylib.h -can be found in /usr/include/mylib.h. If the headers are installed -in some other system root, the -isysroot option can be used provide -a different system root from which the headers will be based. For example, --isysroot /Developer/SDKs/MacOSX10.4u.sdk will look for -mylib.h in -/Developer/SDKs/MacOSX10.4u.sdk/usr/include/mylib.h.

- -

Relocatable precompiled headers are intended to be used in a limited number -of cases where the compilation environment is tightly controlled and the -precompiled header cannot be generated after headers have been installed. -Relocatable precompiled headers also have some performance impact, because -the difference in location between the header locations at PCH build time vs. -at the time of PCH use requires one of the PCH optimizations, -stat() caching, to be disabled. However, this change is only -likely to affect PCH files that reference a large number of headers.

- - -

Controlling Code Generation

- - -

Clang provides a number of ways to control code generation. The options are listed below.

- - -
-
-fsanitize=check1,check2: Turn on runtime checks -for various forms of undefined or suspicious behavior.
- -
This option controls whether Clang adds runtime checks for various forms of -undefined or suspicious behavior, and is disabled by default. If a check -fails, a diagnostic message is produced at runtime explaining the problem. The -main checks are: - -
    -
  • -fsanitize=address: - AddressSanitizer, a memory error - detector.
  • -
  • -fsanitize=address-full: - AddressSanitizer with all the experimental features listed below. -
  • -fsanitize=integer: - Enables checks for undefined or suspicious integer behavior.
  • -
  • -fsanitize=thread: - ThreadSanitizer, an experimental - data race detector. Not ready for widespread use.
  • -
  • -fsanitize=undefined: - Fast and compatible undefined behavior checker. Enables the undefined behavior - checks that have small runtime cost and no impact on address space layout - or ABI. This includes all of the checks listed below other than - unsigned-integer-overflow.
  • -
- -The following more fine-grained checks are also available: - -
    -
  • -fsanitize=alignment: - Use of a misaligned pointer or creation of a misaligned reference.
  • -
  • -fsanitize=bounds: - Out of bounds array indexing, in cases where the array bound can be - statically determined.
  • -
  • -fsanitize=float-cast-overflow: - Conversion to, from, or between floating-point types which would overflow - the destination.
  • -
  • -fsanitize=float-divide-by-zero: - Floating point division by zero.
  • -
  • -fsanitize=integer-divide-by-zero: - Integer division by zero.
  • -
  • -fsanitize=null: - Use of a null pointer or creation of a null reference.
  • -
  • -fsanitize=object-size: - An attempt to use bytes which the optimizer can determine are not part of - the object being accessed. - The sizes of objects are determined using __builtin_object_size, and - consequently may be able to detect more problems at higher optimization - levels.
  • -
  • -fsanitize=return: - In C++, reaching the end of a value-returning function without returning a - value.
  • -
  • -fsanitize=shift: - Shift operators where the amount shifted is greater or equal to the - promoted bit-width of the left hand side or less than zero, or where - the left hand side is negative. For a signed left shift, also checks - for signed overflow in C, and for unsigned overflow in C++.
  • -
  • -fsanitize=signed-integer-overflow: - Signed integer overflow, including all the checks added by -ftrapv, - and checking for overflow in signed division (INT_MIN / -1).
  • -
  • -fsanitize=unreachable: - If control flow reaches __builtin_unreachable.
  • -
  • -fsanitize=unsigned-integer-overflow: - Unsigned integer overflows.
  • -
  • -fsanitize=vla-bound: - A variable-length array whose bound does not evaluate to a positive value.
  • -
  • -fsanitize=vptr: - Use of an object whose vptr indicates that it is of the wrong dynamic type, - or that its lifetime has not begun or has ended. Incompatible with - -fno-rtti.
  • -
- -Experimental features of AddressSanitizer (not ready for widespread use, -require explicit -fsanitize=address): - -
    -
  • -fsanitize=init-order: -Check for dynamic initialization order problems.
  • -
  • -fsanitize=use-after-return: -Check for use-after-return errors (accessing local variable after the function -exit). -
  • -fsanitize=use-after-scope: -Check for use-after-scope errors (accesing local variable after it went out of -scope). -
- -The -fsanitize= argument must also be provided when linking, in order -to link to the appropriate runtime library. It is not possible to combine the --fsanitize=address and -fsanitize=thread checkers in the same -program. -
- -
-f[no-]address-sanitizer: -Deprecated synonym for -f[no-]sanitize=address. - -
-f[no-]thread-sanitizer: -Deprecated synonym for -f[no-]sanitize=thread. - -
-fcatch-undefined-behavior: -Deprecated synonym for -fsanitize=undefined. - -
-fno-assume-sane-operator-new: -Don't assume that the C++'s new operator is sane.
-
This option tells the compiler to do not assume that C++'s global new -operator will always return a pointer that does not -alias any other pointer when the function returns.
- -
-ftrap-function=[name]: Instruct code -generator to emit a function call to the specified function name for -__builtin_trap().
- -
LLVM code generator translates __builtin_trap() to a trap -instruction if it is supported by the target ISA. Otherwise, the builtin is -translated into a call to abort. If this option is set, then the code -generator will always lower the builtin to a call to the specified function -regardless of whether the target ISA has a trap instruction. This option is -useful for environments (e.g. deeply embedded) where a trap cannot be properly -handled, or when some custom behavior is desired.
- -
-ftls-model=[model]: Select which TLS model to -use.
-
Valid values are: global-dynamic, local-dynamic, -initial-exec and local-exec. The default value is -global-dynamic. The compiler may use a different model if the selected -model is not supported by the target, or if a more efficient model can be used. -The TLS model can be overridden per variable using the tls_model -attribute. -
-
- - -

Controlling Size of Debug Information

- - -

Debug info kind generated by Clang can be set by one of the flags listed -below. If multiple flags are present, the last one is used.

- - -
-
-g0: Don't generate any debug info (default). - -
-gline-tables-only: -Generate line number tables only. -
-This kind of debug info allows to obtain stack traces with function -names, file names and line numbers (by such tools as -gdb or addr2line). It doesn't contain any other data (e.g. -description of local variables or function parameters). -
- -
-g: Generate complete debug info. -
- - -

C Language Features

- - -

The support for standard C in clang is feature-complete except for the C99 -floating-point pragmas.

- - -

Extensions supported by clang

- - -

See clang language extensions.

- - -

Differences between various standard modes

- - -

clang supports the -std option, which changes what language mode clang uses. -The supported modes for C are c89, gnu89, c94, c99, gnu99 and various aliases -for those modes. If no -std option is specified, clang defaults to gnu99 mode. -

- -

Differences between all c* and gnu* modes:

-
    -
  • c* modes define "__STRICT_ANSI__".
  • -
  • Target-specific defines not prefixed by underscores, like "linux", are -defined in gnu* modes.
  • -
  • Trigraphs default to being off in gnu* modes; they can be enabled by the --trigraphs option.
  • -
  • The parser recognizes "asm" and "typeof" as keywords in gnu* modes; the -variants "__asm__" and "__typeof__" are recognized in all modes.
  • -
  • The Apple "blocks" extension is recognized by default in gnu* modes -on some platforms; it can be enabled in any mode with the "-fblocks" -option.
  • -
  • Arrays that are VLA's according to the standard, but which can be constant - folded by the frontend are treated as fixed size arrays. This occurs for - things like "int X[(1, 2)];", which is technically a VLA. c* modes are - strictly compliant and treat these as VLAs.
  • -
- -

Differences between *89 and *99 modes:

-
    -
  • The *99 modes default to implementing "inline" as specified in C99, while -the *89 modes implement the GNU version. This can be overridden for individual -functions with the __gnu_inline__ attribute.
  • -
  • Digraphs are not recognized in c89 mode.
  • -
  • The scope of names defined inside a "for", "if", "switch", "while", or "do" -statement is different. (example: "if ((struct x {int x;}*)0) {}".)
  • -
  • __STDC_VERSION__ is not defined in *89 modes.
  • -
  • "inline" is not recognized as a keyword in c89 mode.
  • -
  • "restrict" is not recognized as a keyword in *89 modes.
  • -
  • Commas are allowed in integer constant expressions in *99 modes.
  • -
  • Arrays which are not lvalues are not implicitly promoted to pointers in -*89 modes.
  • -
  • Some warnings are different.
  • -
- -

c94 mode is identical to c89 mode except that digraphs are enabled in -c94 mode (FIXME: And __STDC_VERSION__ should be defined!).

- - -

GCC extensions not implemented yet

- - -

clang tries to be compatible with gcc as much as possible, but some gcc -extensions are not implemented yet:

- -
    - -
  • clang does not support #pragma weak -(bug 3679). Due to -the uses described in the bug, this is likely to be implemented at some -point, at least partially.
  • - -
  • clang does not support decimal floating point types (_Decimal32 and -friends) or fixed-point types (_Fract and friends); nobody has expressed -interest in these features yet, so it's hard to say when they will be -implemented.
  • - -
  • clang does not support nested functions; this is a complex feature which -is infrequently used, so it is unlikely to be implemented anytime soon. In C++11 -it can be emulated by assigning lambda functions to local variables, e.g: -
    -  auto const local_function = [&](int parameter) {
    -    // Do something
    -  };
    -  ...
    -  local_function(1);
    -
    -
  • - -
  • clang does not support global register variables; this is unlikely -to be implemented soon because it requires additional LLVM backend support. -
  • - -
  • clang does not support static initialization of flexible array -members. This appears to be a rarely used extension, but could be -implemented pending user demand.
  • - -
  • clang does not support __builtin_va_arg_pack/__builtin_va_arg_pack_len. -This is used rarely, but in some potentially interesting places, like the -glibc headers, so it may be implemented pending user demand. Note that -because clang pretends to be like GCC 4.2, and this extension was introduced -in 4.3, the glibc headers will not try to use this extension with clang at -the moment.
  • - -
  • clang does not support the gcc extension for forward-declaring function -parameters; this has not shown up in any real-world code yet, though, so it -might never be implemented.
  • - -
- -

This is not a complete list; if you find an unsupported extension -missing from this list, please send an e-mail to cfe-dev. This list -currently excludes C++; see C++ Language Features. -Also, this list does not include bugs in mostly-implemented features; please -see the -bug tracker for known existing bugs (FIXME: Is there a section for -bug-reporting guidelines somewhere?).

- - -

Intentionally unsupported GCC extensions

- - -
    - -
  • clang does not support the gcc extension that allows variable-length arrays -in structures. This is for a few reasons: one, it is tricky -to implement, two, the extension is completely undocumented, and three, the -extension appears to be rarely used. Note that clang does support -flexible array members (arrays with a zero or unspecified size at the end of -a structure).
  • - -
  • clang does not have an equivalent to gcc's "fold"; this means that -clang doesn't accept some constructs gcc might accept in contexts where a -constant expression is required, like "x-x" where x is a variable.
  • - -
  • clang does not support __builtin_apply and friends; this extension is -extremely obscure and difficult to implement reliably.
  • - -
- - -

Microsoft extensions

- - -

clang has some experimental support for extensions from -Microsoft Visual C++; to enable it, use the -fms-extensions command-line -option. This is the default for Windows targets. Note that the -support is incomplete; enabling Microsoft extensions will silently drop -certain constructs (including __declspec and Microsoft-style asm statements). -

- -

clang has a -fms-compatibility flag that makes clang accept enough -invalid C++ to be able to parse most Microsoft headers. This flag is enabled by -default for Windows targets.

- -

-fdelayed-template-parsing lets clang delay all template instantiation until -the end of a translation unit. This flag is enabled by default for Windows -targets.

- -
    -
  • clang allows setting _MSC_VER with -fmsc-version=. It defaults to 1300 which -is the same as Visual C/C++ 2003. Any number is supported and can greatly affect -what Windows SDK and c++stdlib headers clang can compile. This option will be -removed when clang supports the full set of MS extensions required for these -headers.
  • - -
  • clang does not support the Microsoft extension where anonymous -record members can be declared using user defined typedefs.
  • - -
  • clang supports the Microsoft "#pragma pack" feature for -controlling record layout. GCC also contains support for this feature, -however where MSVC and GCC are incompatible clang follows the MSVC -definition.
  • - -
  • clang defaults to C++11 for Windows targets.
  • -
- - -

C++ Language Features

- - -

clang fully implements all of standard C++98 except for exported templates -(which were removed in C++11), and -many C++11 features are also -implemented.

- - -

Controlling implementation limits

- - -

-fconstexpr-depth=N: Sets the limit for recursive constexpr function -invocations to N. The default is 512.

- -

-ftemplate-depth=N: Sets the limit for recursively nested template -instantiations to N. The default is 1024.

- - -

Target-Specific Features and Limitations

- - - - -

CPU Architectures Features and Limitations

- - - -

X86

- - -

The support for X86 (both 32-bit and 64-bit) is considered stable on Darwin -(Mac OS/X), Linux, FreeBSD, and Dragonfly BSD: it has been tested to correctly -compile many large C, C++, Objective-C, and Objective-C++ codebases.

- -

On x86_64-mingw32, passing i128(by value) is incompatible to Microsoft x64 -calling conversion. You might need to tweak WinX86_64ABIInfo::classify() -in lib/CodeGen/TargetInfo.cpp.

- - -

ARM

- - -

The support for ARM (specifically ARMv6 and ARMv7) is considered stable on -Darwin (iOS): it has been tested to correctly compile many large C, C++, -Objective-C, and Objective-C++ codebases. Clang only supports a limited number -of ARM architectures. It does not yet fully support ARMv5, for example.

- - -

Other platforms

- -clang currently contains some support for PPC and Sparc; however, significant -pieces of code generation are still missing, and they haven't undergone -significant testing. - -

clang contains limited support for the MSP430 embedded processor, but both -the clang support and the LLVM backend support are highly experimental. - -

Other platforms are completely unsupported at the moment. Adding the -minimal support needed for parsing and semantic analysis on a new platform -is quite easy; see lib/Basic/Targets.cpp in the clang source tree. This level -of support is also sufficient for conversion to LLVM IR for simple programs. -Proper support for conversion to LLVM IR requires adding code to -lib/CodeGen/CGCall.cpp at the moment; this is likely to change soon, though. -Generating assembly requires a suitable LLVM backend. - - -

Operating System Features and Limitations

- - - -

Darwin (Mac OS/X)

- - -

None

- - -

Windows

- - -

Experimental supports are on Cygming.

- -

See also Microsoft Extensions.

- -
Cygwin
- -

Clang works on Cygwin-1.7.

- -
MinGW32
- -

Clang works on some mingw32 distributions. -Clang assumes directories as below;

- -
    -
  • C:/mingw/include
  • -
  • C:/mingw/lib
  • -
  • C:/mingw/lib/gcc/mingw32/4.[3-5].0/include/c++
  • -
- -

On MSYS, a few tests might fail.

- -
MinGW-w64
- -

For 32-bit (i686-w64-mingw32), and 64-bit (x86_64-w64-mingw32), Clang assumes as below;

- -

    -
  • GCC versions 4.5.0 to 4.5.3, 4.6.0 to 4.6.2, or 4.7.0 (for the C++ header search path)
  • -
  • some_directory/bin/gcc.exe
  • -
  • some_directory/bin/clang.exe
  • -
  • some_directory/bin/clang++.exe
  • -
  • some_directory/bin/../include/c++/GCC_version
  • -
  • some_directory/bin/../include/c++/GCC_version/x86_64-w64-mingw32
  • -
  • some_directory/bin/../include/c++/GCC_version/i686-w64-mingw32
  • -
  • some_directory/bin/../include/c++/GCC_version/backward
  • -
  • some_directory/bin/../x86_64-w64-mingw32/include
  • -
  • some_directory/bin/../i686-w64-mingw32/include
  • -
  • some_directory/bin/../include
  • -
- -

This directory layout is standard for any toolchain you will find on the official MinGW-w64 website. - -

Clang expects the GCC executable "gcc.exe" compiled for i686-w64-mingw32 (or x86_64-w64-mingw32) to be present on PATH.

- -

Some tests might fail -on x86_64-w64-mingw32.

- -
- - diff --git a/docs/UsersManual.rst b/docs/UsersManual.rst new file mode 100644 index 0000000000..6284255f87 --- /dev/null +++ b/docs/UsersManual.rst @@ -0,0 +1,1238 @@ +============================ +Clang Compiler User's Manual +============================ + +.. contents:: + :local: + +Introduction +============ + +The Clang Compiler is an open-source compiler for the C family of +programming languages, aiming to be the best in class implementation of +these languages. Clang builds on the LLVM optimizer and code generator, +allowing it to provide high-quality optimization and code generation +support for many targets. For more general information, please see the +`Clang Web Site `_ or the `LLVM Web +Site `_. + +This document describes important notes about using Clang as a compiler +for an end-user, documenting the supported features, command line +options, etc. If you are interested in using Clang to build a tool that +processes code, please see `the Clang Internals +Manual `_. If you are interested in the `Clang +Static Analyzer `_, please see its web +page. + +Clang is designed to support the C family of programming languages, +which includes :ref:`C `, :ref:`Objective-C `, :ref:`C++ `, and +:ref:`Objective-C++ ` as well as many dialects of those. For +language-specific information, please see the corresponding language +specific section: + +- :ref:`C Language `: K&R C, ANSI C89, ISO C90, ISO C94 (C89+AMD1), ISO + C99 (+TC1, TC2, TC3). +- :ref:`Objective-C Language `: ObjC 1, ObjC 2, ObjC 2.1, plus + variants depending on base language. +- :ref:`C++ Language ` +- :ref:`Objective C++ Language ` + +In addition to these base languages and their dialects, Clang supports a +broad variety of language extensions, which are documented in the +corresponding language section. These extensions are provided to be +compatible with the GCC, Microsoft, and other popular compilers as well +as to improve functionality through Clang-specific features. The Clang +driver and language features are intentionally designed to be as +compatible with the GNU GCC compiler as reasonably possible, easing +migration from GCC to Clang. In most cases, code "just works". + +In addition to language specific features, Clang has a variety of +features that depend on what CPU architecture or operating system is +being compiled for. Please see the :ref:`Target-Specific Features and +Limitations ` section for more details. + +The rest of the introduction introduces some basic :ref:`compiler +terminology ` that is used throughout this manual and +contains a basic :ref:`introduction to using Clang ` as a +command line compiler. + +.. _terminology: + +Terminology +----------- + +Front end, parser, backend, preprocessor, undefined behavior, +diagnostic, optimizer + +.. _basicusage: + +Basic Usage +----------- + +Intro to how to use a C compiler for newbies. + +compile + link compile then link debug info enabling optimizations +picking a language to use, defaults to C99 by default. Autosenses based +on extension. using a makefile + +Command Line Options +==================== + +This section is generally an index into other sections. It does not go +into depth on the ones that are covered by other sections. However, the +first part introduces the language selection and other high level +options like -c, -g, etc. + +Options to Control Error and Warning Messages +--------------------------------------------- + +**-Werror**: Turn warnings into errors. + +**-Werror=foo**: Turn warning "foo" into an error. + +**-Wno-error=foo**: Turn warning "foo" into an warning even if -Werror +is specified. + +**-Wfoo**: Enable warning "foo". + +**-Wno-foo**: Disable warning "foo". + +**-w**: Disable all warnings. + +**-Weverything**: :ref:`Enable **all** +warnings. ` + +**-pedantic**: Warn on language extensions. + +**-pedantic-errors**: Error on language extensions. + +**-Wsystem-headers**: Enable warnings from system headers. + +**-ferror-limit=123**: Stop emitting diagnostics after 123 errors have +been produced. The default is 20, and the error limit can be disabled +with -ferror-limit=0. + +**-ftemplate-backtrace-limit=123**: Only emit up to 123 template +instantiation notes within the template instantiation backtrace for a +single warning or error. The default is 10, and the limit can be +disabled with -ftemplate-backtrace-limit=0. + +.. _cl_diag_formatting: + +Formatting of Diagnostics +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Clang aims to produce beautiful diagnostics by default, particularly for +new users that first come to Clang. However, different people have +different preferences, and sometimes Clang is driven by another program +that wants to parse simple and consistent output, not a person. For +these cases, Clang provides a wide range of options to control the exact +output format of the diagnostics that it generates. + +.. _opt_fshow-column: + +**-f[no-]show-column** + Print column number in diagnostic. + + This option, which defaults to on, controls whether or not Clang + prints the column number of a diagnostic. For example, when this is + enabled, Clang will print something like: + + :: + + test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] + #endif bad + ^ + // + + When this is disabled, Clang will print "test.c:28: warning..." with + no column number. + + The printed column numbers count bytes from the beginning of the + line; take care if your source contains multibyte characters. + +.. _opt_fshow-source-location: + +**-f[no-]show-source-location** + Print source file/line/column information in diagnostic. + + This option, which defaults to on, controls whether or not Clang + prints the filename, line number and column number of a diagnostic. + For example, when this is enabled, Clang will print something like: + + :: + + test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] + #endif bad + ^ + // + + When this is disabled, Clang will not print the "test.c:28:8: " + part. + +.. _opt_fcaret-diagnostics: + +**-f[no-]caret-diagnostics** + Print source line and ranges from source code in diagnostic. + This option, which defaults to on, controls whether or not Clang + prints the source line, source ranges, and caret when emitting a + diagnostic. For example, when this is enabled, Clang will print + something like: + + :: + + test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] + #endif bad + ^ + // + +**-f[no-]color-diagnostics** + This option, which defaults to on when a color-capable terminal is + detected, controls whether or not Clang prints diagnostics in color. + + When this option is enabled, Clang will use colors to highlight + specific parts of the diagnostic, e.g., + + .. nasty hack to not lose our dignity + + .. raw:: html + +
+         test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens]
+         #endif bad
+                ^
+                //
+       
+ + When this is disabled, Clang will just print: + + :: + + test.c:2:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] + #endif bad + ^ + // + +**-fdiagnostics-format=clang/msvc/vi** + Changes diagnostic output format to better match IDEs and command line tools. + + This option controls the output format of the filename, line number, + and column printed in diagnostic messages. The options, and their + affect on formatting a simple conversion diagnostic, follow: + + **clang** (default) + :: + + t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int' + + **msvc** + :: + + t.c(3,11) : warning: conversion specifies type 'char *' but the argument has type 'int' + + **vi** + :: + + t.c +3:11: warning: conversion specifies type 'char *' but the argument has type 'int' + +**-f[no-]diagnostics-show-name** + Enable the display of the diagnostic name. + This option, which defaults to off, controls whether or not Clang + prints the associated name. + +.. _opt_fdiagnostics-show-option: + +**-f[no-]diagnostics-show-option** + Enable ``[-Woption]`` information in diagnostic line. + + This option, which defaults to on, controls whether or not Clang + prints the associated :ref:`warning group ` + option name when outputting a warning diagnostic. For example, in + this output: + + :: + + test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] + #endif bad + ^ + // + + Passing **-fno-diagnostics-show-option** will prevent Clang from + printing the [:ref:`-Wextra-tokens `] information in + the diagnostic. This information tells you the flag needed to enable + or disable the diagnostic, either from the command line or through + :ref:`#pragma GCC diagnostic `. + +.. _opt_fdiagnostics-show-category: + +**-fdiagnostics-show-category=none/id/name** + Enable printing category information in diagnostic line. + + This option, which defaults to "none", controls whether or not Clang + prints the category associated with a diagnostic when emitting it. + Each diagnostic may or many not have an associated category, if it + has one, it is listed in the diagnostic categorization field of the + diagnostic line (in the []'s). + + For example, a format string warning will produce these three + renditions based on the setting of this option: + + :: + + t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int' [-Wformat] + t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int' [-Wformat,1] + t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int' [-Wformat,Format String] + + This category can be used by clients that want to group diagnostics + by category, so it should be a high level category. We want dozens + of these, not hundreds or thousands of them. + +.. _opt_fdiagnostics-fixit-info: + +**-f[no-]diagnostics-fixit-info** + Enable "FixIt" information in the diagnostics output. + + This option, which defaults to on, controls whether or not Clang + prints the information on how to fix a specific diagnostic + underneath it when it knows. For example, in this output: + + :: + + test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] + #endif bad + ^ + // + + Passing **-fno-diagnostics-fixit-info** will prevent Clang from + printing the "//" line at the end of the message. This information + is useful for users who may not understand what is wrong, but can be + confusing for machine parsing. + +.. _opt_fdiagnostics-print-source-range-info: + +**-f[no-]diagnostics-print-source-range-info** + Print machine parsable information about source ranges. + This option, which defaults to off, controls whether or not Clang + prints information about source ranges in a machine parsable format + after the file/line/column number information. The information is a + simple sequence of brace enclosed ranges, where each range lists the + start and end line/column locations. For example, in this output: + + :: + + exprs.c:47:15:{47:8-47:14}{47:17-47:24}: error: invalid operands to binary expression ('int *' and '_Complex float') + P = (P-42) + Gamma*4; + ~~~~~~ ^ ~~~~~~~ + + The {}'s are generated by -fdiagnostics-print-source-range-info. + + The printed column numbers count bytes from the beginning of the + line; take care if your source contains multibyte characters. + +**-fdiagnostics-parseable-fixits** + Print Fix-Its in a machine parseable form. + + This option makes Clang print available Fix-Its in a machine + parseable format at the end of diagnostics. The following example + illustrates the format: + + :: + + fix-it:"t.cpp":{7:25-7:29}:"Gamma" + + The range printed is a half-open range, so in this example the + characters at column 25 up to but not including column 29 on line 7 + in t.cpp should be replaced with the string "Gamma". Either the + range or the replacement string may be empty (representing strict + insertions and strict erasures, respectively). Both the file name + and the insertion string escape backslash (as "\\\\"), tabs (as + "\\t"), newlines (as "\\n"), double quotes(as "\\"") and + non-printable characters (as octal "\\xxx"). + + The printed column numbers count bytes from the beginning of the + line; take care if your source contains multibyte characters. + +**-fno-elide-type** + Turns off elision in template type printing. + + The default for template type printing is to elide as many template + arguments as possible, removing those which are the same in both + template types, leaving only the differences. Adding this flag will + print all the template arguments. If supported by the terminal, + highlighting will still appear on differing arguments. + + Default: + + :: + + t.cc:4:5: note: candidate function not viable: no known conversion from 'vector>>' to 'vector>>' for 1st argument; + + -fno-elide-type: + + :: + + t.cc:4:5: note: candidate function not viable: no known conversion from 'vector>>' to 'vector>>' for 1st argument; + +**-fdiagnostics-show-template-tree** + Template type diffing prints a text tree. + + For diffing large templated types, this option will cause Clang to + display the templates as an indented text tree, one argument per + line, with differences marked inline. This is compatible with + -fno-elide-type. + + Default: + + :: + + t.cc:4:5: note: candidate function not viable: no known conversion from 'vector>>' to 'vector>>' for 1st argument; + + -fdiagnostics-show-template-tree + + :: + + t.cc:4:5: note: candidate function not viable: no known conversion for 1st argument; + vector< + map< + [...], + map< + [float != float], + [...]>>> + +.. _cl_diag_warning_groups: + +Individual Warning Groups +^^^^^^^^^^^^^^^^^^^^^^^^^ + +TODO: Generate this from tblgen. Define one anchor per warning group. + +.. _opt_wextra-tokens: + +**-Wextra-tokens** + Warn about excess tokens at the end of a preprocessor directive. + + This option, which defaults to on, enables warnings about extra + tokens at the end of preprocessor directives. For example: + + :: + + test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] + #endif bad + ^ + + These extra tokens are not strictly conforming, and are usually best + handled by commenting them out. + +**-Wambiguous-member-template** + Warn about unqualified uses of a member template whose name resolves to + another template at the location of the use. + + This option, which defaults to on, enables a warning in the + following code: + + :: + + template struct set{}; + template struct trait { typedef const T& type; }; + struct Value { + template void set(typename trait::type value) {} + }; + void foo() { + Value v; + v.set(3.2); + } + + C++ [basic.lookup.classref] requires this to be an error, but, + because it's hard to work around, Clang downgrades it to a warning + as an extension. + +**-Wbind-to-temporary-copy** + Warn about an unusable copy constructor when binding a reference to a + temporary. + + This option, which defaults to on, enables warnings about binding a + reference to a temporary when the temporary doesn't have a usable + copy constructor. For example: + + :: + + struct NonCopyable { + NonCopyable(); + private: + NonCopyable(const NonCopyable&); + }; + void foo(const NonCopyable&); + void bar() { + foo(NonCopyable()); // Disallowed in C++98; allowed in C++11. + } + + :: + + struct NonCopyable2 { + NonCopyable2(); + NonCopyable2(NonCopyable2&); + }; + void foo(const NonCopyable2&); + void bar() { + foo(NonCopyable2()); // Disallowed in C++98; allowed in C++11. + } + + Note that if ``NonCopyable2::NonCopyable2()`` has a default argument + whose instantiation produces a compile error, that error will still + be a hard error in C++98 mode even if this warning is turned off. + +Options to Control Clang Crash Diagnostics +------------------------------------------ + +As unbelievable as it may sound, Clang does crash from time to time. +Generally, this only occurs to those living on the `bleeding +edge `_. Clang goes to great +lengths to assist you in filing a bug report. Specifically, Clang +generates preprocessed source file(s) and associated run script(s) upon +a crash. These files should be attached to a bug report to ease +reproducibility of the failure. Below are the command line options to +control the crash diagnostics. + +**-fno-crash-diagnostics**: Disable auto-generation of preprocessed +source files during a clang crash. + +The -fno-crash-diagnostics flag can be helpful for speeding the process +of generating a delta reduced test case. + +Language and Target-Independent Features +======================================== + +Controlling Errors and Warnings +------------------------------- + +Clang provides a number of ways to control which code constructs cause +it to emit errors and warning messages, and how they are displayed to +the console. + +Controlling How Clang Displays Diagnostics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When Clang emits a diagnostic, it includes rich information in the +output, and gives you fine-grain control over which information is +printed. Clang has the ability to print this information, and these are +the options that control it: + +#. A file/line/column indicator that shows exactly where the diagnostic + occurs in your code [:ref:`-fshow-column `, + :ref:`-fshow-source-location `]. +#. A categorization of the diagnostic as a note, warning, error, or + fatal error. +#. A text string that describes what the problem is. +#. An option that indicates how to control the diagnostic (for + diagnostics that support it) + [:ref:`-fdiagnostics-show-option `]. +#. A :ref:`high-level category ` for the diagnostic + for clients that want to group diagnostics by class (for diagnostics + that support it) + [:ref:`-fdiagnostics-show-category `]. +#. The line of source code that the issue occurs on, along with a caret + and ranges that indicate the important locations + [:ref:`-fcaret-diagnostics `]. +#. "FixIt" information, which is a concise explanation of how to fix the + problem (when Clang is certain it knows) + [:ref:`-fdiagnostics-fixit-info `]. +#. A machine-parsable representation of the ranges involved (off by + default) + [:ref:`-fdiagnostics-print-source-range-info `]. + +For more information please see :ref:`Formatting of +Diagnostics `. + +Diagnostic Mappings +^^^^^^^^^^^^^^^^^^^ + +All diagnostics are mapped into one of these 5 classes: + +- Ignored +- Note +- Warning +- Error +- Fatal + +.. _diagnostics_categories: + +Diagnostic Categories +^^^^^^^^^^^^^^^^^^^^^ + +Though not shown by default, diagnostics may each be associated with a +high-level category. This category is intended to make it possible to +triage builds that produce a large number of errors or warnings in a +grouped way. + +Categories are not shown by default, but they can be turned on with the +:ref:`-fdiagnostics-show-category ` option. +When set to "``name``", the category is printed textually in the +diagnostic output. When it is set to "``id``", a category number is +printed. The mapping of category names to category id's can be obtained +by running '``clang --print-diagnostic-categories``'. + +Controlling Diagnostics via Command Line Flags +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +TODO: -W flags, -pedantic, etc + +.. _pragma_gcc_diagnostic: + +Controlling Diagnostics via Pragmas +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Clang can also control what diagnostics are enabled through the use of +pragmas in the source code. This is useful for turning off specific +warnings in a section of source code. Clang supports GCC's pragma for +compatibility with existing source code, as well as several extensions. + +The pragma may control any warning that can be used from the command +line. Warnings may be set to ignored, warning, error, or fatal. The +following example code will tell Clang or GCC to ignore the -Wall +warnings: + +:: + + #pragma GCC diagnostic ignored "-Wall" + +In addition to all of the functionality provided by GCC's pragma, Clang +also allows you to push and pop the current warning state. This is +particularly useful when writing a header file that will be compiled by +other people, because you don't know what warning flags they build with. + +In the below example -Wmultichar is ignored for only a single line of +code, after which the diagnostics return to whatever state had +previously existed. + +:: + + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wmultichar" + + char b = 'df'; // no warning. + + #pragma clang diagnostic pop + +The push and pop pragmas will save and restore the full diagnostic state +of the compiler, regardless of how it was set. That means that it is +possible to use push and pop around GCC compatible diagnostics and Clang +will push and pop them appropriately, while GCC will ignore the pushes +and pops as unknown pragmas. It should be noted that while Clang +supports the GCC pragma, Clang and GCC do not support the exact same set +of warnings, so even when using GCC compatible #pragmas there is no +guarantee that they will have identical behaviour on both compilers. + +Controlling Diagnostics in System Headers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Warnings are suppressed when they occur in system headers. By default, +an included file is treated as a system header if it is found in an +include path specified by ``-isystem``, but this can be overridden in +several ways. + +The ``system_header`` pragma can be used to mark the current file as +being a system header. No warnings will be produced from the location of +the pragma onwards within the same file. + +:: + + char a = 'xy'; // warning + + #pragma clang system_header + + char b = 'ab'; // no warning + +The ``-isystem-prefix`` and ``-ino-system-prefix`` command-line +arguments can be used to override whether subsets of an include path are +treated as system headers. When the name in a ``#include`` directive is +found within a header search path and starts with a system prefix, the +header is treated as a system header. The last prefix on the +command-line which matches the specified header name takes precedence. +For instance: + +:: + + clang -Ifoo -isystem bar -isystem-prefix x/ -ino-system-prefix x/y/ + +Here, ``#include "x/a.h"`` is treated as including a system header, even +if the header is found in ``foo``, and ``#include "x/y/b.h"`` is treated +as not including a system header, even if the header is found in +``bar``. + +A ``#include`` directive which finds a file relative to the current +directory is treated as including a system header if the including file +is treated as a system header. + +.. _diagnostics_enable_everything: + +Enabling All Warnings +^^^^^^^^^^^^^^^^^^^^^ + +In addition to the traditional ``-W`` flags, one can enable **all** +warnings by passing ``-Weverything``. This works as expected with +``-Werror``, and also includes the warnings from ``-pedantic``. + +Note that when combined with ``-w`` (which disables all warnings), that +flag wins. + +Controlling Static Analyzer Diagnostics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +While not strictly part of the compiler, the diagnostics from Clang's +`static analyzer `_ can also be +influenced by the user via changes to the source code. See the available +`annotations `_ and the +analyzer's `FAQ +page `_ for more +information. + +Precompiled Headers +------------------- + +`Precompiled headers `__ +are a general approach employed by many compilers to reduce compilation +time. The underlying motivation of the approach is that it is common for +the same (and often large) header files to be included by multiple +source files. Consequently, compile times can often be greatly improved +by caching some of the (redundant) work done by a compiler to process +headers. Precompiled header files, which represent one of many ways to +implement this optimization, are literally files that represent an +on-disk cache that contains the vital information necessary to reduce +some of the work needed to process a corresponding header file. While +details of precompiled headers vary between compilers, precompiled +headers have been shown to be highly effective at speeding up program +compilation on systems with very large system headers (e.g., Mac OS/X). + +Generating a PCH File +^^^^^^^^^^^^^^^^^^^^^ + +To generate a PCH file using Clang, one invokes Clang with the +**``-x -header``** option. This mirrors the interface in GCC +for generating PCH files: + +:: + + $ gcc -x c-header test.h -o test.h.gch + $ clang -x c-header test.h -o test.h.pch + +Using a PCH File +^^^^^^^^^^^^^^^^ + +A PCH file can then be used as a prefix header when a **``-include``** +option is passed to ``clang``: + +:: + + $ clang -include test.h test.c -o test + +The ``clang`` driver will first check if a PCH file for ``test.h`` is +available; if so, the contents of ``test.h`` (and the files it includes) +will be processed from the PCH file. Otherwise, Clang falls back to +directly processing the content of ``test.h``. This mirrors the behavior +of GCC. + +.. note:: + Clang does *not* automatically use PCH files for headers that are + directly included within a source file. For example: + +:: + + $ clang -x c-header test.h -o test.h.pch + $ cat test.c + #include "test.h" + $ clang test.c -o test + +In this example, ``clang`` will not automatically use the PCH file for +``test.h`` since ``test.h`` was included directly in the source file and +not specified on the command line using ``-include``. + +Relocatable PCH Files +^^^^^^^^^^^^^^^^^^^^^ + +It is sometimes necessary to build a precompiled header from headers +that are not yet in their final, installed locations. For example, one +might build a precompiled header within the build tree that is then +meant to be installed alongside the headers. Clang permits the creation +of "relocatable" precompiled headers, which are built with a given path +(into the build directory) and can later be used from an installed +location. + +To build a relocatable precompiled header, place your headers into a +subdirectory whose structure mimics the installed location. For example, +if you want to build a precompiled header for the header ``mylib.h`` +that will be installed into ``/usr/include``, create a subdirectory +``build/usr/include`` and place the header ``mylib.h`` into that +subdirectory. If ``mylib.h`` depends on other headers, then they can be +stored within ``build/usr/include`` in a way that mimics the installed +location. + +Building a relocatable precompiled header requires two additional +arguments. First, pass the ``--relocatable-pch`` flag to indicate that +the resulting PCH file should be relocatable. Second, pass +``-isysroot /path/to/build``, which makes all includes for your library +relative to the build directory. For example: + +:: + + # clang -x c-header --relocatable-pch -isysroot /path/to/build /path/to/build/mylib.h mylib.h.pch + +When loading the relocatable PCH file, the various headers used in the +PCH file are found from the system header root. For example, ``mylib.h`` +can be found in ``/usr/include/mylib.h``. If the headers are installed +in some other system root, the ``-isysroot`` option can be used provide +a different system root from which the headers will be based. For +example, ``-isysroot /Developer/SDKs/MacOSX10.4u.sdk`` will look for +``mylib.h`` in ``/Developer/SDKs/MacOSX10.4u.sdk/usr/include/mylib.h``. + +Relocatable precompiled headers are intended to be used in a limited +number of cases where the compilation environment is tightly controlled +and the precompiled header cannot be generated after headers have been +installed. Relocatable precompiled headers also have some performance +impact, because the difference in location between the header locations +at PCH build time vs. at the time of PCH use requires one of the PCH +optimizations, ``stat()`` caching, to be disabled. However, this change +is only likely to affect PCH files that reference a large number of +headers. + +Controlling Code Generation +--------------------------- + +Clang provides a number of ways to control code generation. The options +are listed below. + +**-fsanitize=check1,check2** + Turn on runtime checks for various forms of undefined or suspicious + behavior. + + This option controls whether Clang adds runtime checks for various + forms of undefined or suspicious behavior, and is disabled by + default. If a check fails, a diagnostic message is produced at + runtime explaining the problem. The main checks are: + + .. _opt_fsanitize_address: + + - ``-fsanitize=address``: + :doc:`AddressSanitizer`, a memory error + detector. + - ``-fsanitize=address-full``: AddressSanitizer with all the + experimental features listed below. + - ``-fsanitize=integer``: Enables checks for undefined or + suspicious integer behavior. + - ``-fsanitize=thread``: :doc:`ThreadSanitizer`, + an *experimental* data race detector. Not ready for widespread + use. + + .. _opt_fsanitize_undefined: + + - ``-fsanitize=undefined``: Fast and compatible undefined behavior + checker. Enables the undefined behavior checks that have small + runtime cost and no impact on address space layout or ABI. This + includes all of the checks listed below other than + ``unsigned-integer-overflow``. + + The following more fine-grained checks are also available: + + - ``-fsanitize=alignment``: Use of a misaligned pointer or creation + of a misaligned reference. + - ``-fsanitize=bounds``: Out of bounds array indexing, in cases + where the array bound can be statically determined. + - ``-fsanitize=float-cast-overflow``: Conversion to, from, or + between floating-point types which would overflow the + destination. + - ``-fsanitize=float-divide-by-zero``: Floating point division by + zero. + - ``-fsanitize=integer-divide-by-zero``: Integer division by zero. + - ``-fsanitize=null``: Use of a null pointer or creation of a null + reference. + - ``-fsanitize=object-size``: An attempt to use bytes which the + optimizer can determine are not part of the object being + accessed. The sizes of objects are determined using + ``__builtin_object_size``, and consequently may be able to detect + more problems at higher optimization levels. + - ``-fsanitize=return``: In C++, reaching the end of a + value-returning function without returning a value. + - ``-fsanitize=shift``: Shift operators where the amount shifted is + greater or equal to the promoted bit-width of the left hand side + or less than zero, or where the left hand side is negative. For a + signed left shift, also checks for signed overflow in C, and for + unsigned overflow in C++. + - ``-fsanitize=signed-integer-overflow``: Signed integer overflow, + including all the checks added by ``-ftrapv``, and checking for + overflow in signed division (``INT_MIN / -1``). + - ``-fsanitize=unreachable``: If control flow reaches + ``__builtin_unreachable``. + - ``-fsanitize=unsigned-integer-overflow``: Unsigned integer + overflows. + - ``-fsanitize=vla-bound``: A variable-length array whose bound + does not evaluate to a positive value. + - ``-fsanitize=vptr``: Use of an object whose vptr indicates that + it is of the wrong dynamic type, or that its lifetime has not + begun or has ended. Incompatible with ``-fno-rtti``. + + Experimental features of AddressSanitizer (not ready for widespread + use, require explicit ``-fsanitize=address``): + + - ``-fsanitize=init-order``: Check for dynamic initialization order + problems. + - ``-fsanitize=use-after-return``: Check for use-after-return + errors (accessing local variable after the function exit). + - ``-fsanitize=use-after-scope``: Check for use-after-scope errors + (accesing local variable after it went out of scope). + + The ``-fsanitize=`` argument must also be provided when linking, in + order to link to the appropriate runtime library. It is not possible + to combine the ``-fsanitize=address`` and ``-fsanitize=thread`` + checkers in the same program. +**-f[no-]address-sanitizer** + Deprecated synonym for :ref:`-f[no-]sanitize=address + `. +**-f[no-]thread-sanitizer** + Deprecated synonym for :ref:`-f[no-]sanitize=thread + `. +**-fcatch-undefined-behavior** + Deprecated synonym for :ref:`-fsanitize=undefined + `. +**-fno-assume-sane-operator-new** + Don't assume that the C++'s new operator is sane. + + This option tells the compiler to do not assume that C++'s global + new operator will always return a pointer that does not alias any + other pointer when the function returns. + +**-ftrap-function=[name]** + Instruct code generator to emit a function call to the specified + function name for ``__builtin_trap()``. + + LLVM code generator translates ``__builtin_trap()`` to a trap + instruction if it is supported by the target ISA. Otherwise, the + builtin is translated into a call to ``abort``. If this option is + set, then the code generator will always lower the builtin to a call + to the specified function regardless of whether the target ISA has a + trap instruction. This option is useful for environments (e.g. + deeply embedded) where a trap cannot be properly handled, or when + some custom behavior is desired. + +**-ftls-model=[model]** + Select which TLS model to use. + + Valid values are: ``global-dynamic``, ``local-dynamic``, + ``initial-exec`` and ``local-exec``. The default value is + ``global-dynamic``. The compiler may use a different model if the + selected model is not supported by the target, or if a more + efficient model can be used. The TLS model can be overridden per + variable using the ``tls_model`` attribute. + +Controlling Size of Debug Information +------------------------------------- + +Debug info kind generated by Clang can be set by one of the flags listed +below. If multiple flags are present, the last one is used. + +**-g0**: Don't generate any debug info (default). + +**-gline-tables-only**: Generate line number tables only. + +This kind of debug info allows to obtain stack traces with function +names, file names and line numbers (by such tools as gdb or addr2line). +It doesn't contain any other data (e.g. description of local variables +or function parameters). + +**-g**: Generate complete debug info. + +.. _c: + +C Language Features +=================== + +The support for standard C in clang is feature-complete except for the +C99 floating-point pragmas. + +Extensions supported by clang +----------------------------- + +See `clang language extensions `_. + +Differences between various standard modes +------------------------------------------ + +clang supports the -std option, which changes what language mode clang +uses. The supported modes for C are c89, gnu89, c94, c99, gnu99 and +various aliases for those modes. If no -std option is specified, clang +defaults to gnu99 mode. + +Differences between all ``c*`` and ``gnu*`` modes: + +- ``c*`` modes define "``__STRICT_ANSI__``". +- Target-specific defines not prefixed by underscores, like "linux", + are defined in ``gnu*`` modes. +- Trigraphs default to being off in ``gnu*`` modes; they can be enabled by + the -trigraphs option. +- The parser recognizes "asm" and "typeof" as keywords in ``gnu*`` modes; + the variants "``__asm__``" and "``__typeof__``" are recognized in all + modes. +- The Apple "blocks" extension is recognized by default in ``gnu*`` modes + on some platforms; it can be enabled in any mode with the "-fblocks" + option. +- Arrays that are VLA's according to the standard, but which can be + constant folded by the frontend are treated as fixed size arrays. + This occurs for things like "int X[(1, 2)];", which is technically a + VLA. ``c*`` modes are strictly compliant and treat these as VLAs. + +Differences between ``*89`` and ``*99`` modes: + +- The ``*99`` modes default to implementing "inline" as specified in C99, + while the ``*89`` modes implement the GNU version. This can be + overridden for individual functions with the ``__gnu_inline__`` + attribute. +- Digraphs are not recognized in c89 mode. +- The scope of names defined inside a "for", "if", "switch", "while", + or "do" statement is different. (example: "``if ((struct x {int + x;}*)0) {}``".) +- ``__STDC_VERSION__`` is not defined in ``*89`` modes. +- "inline" is not recognized as a keyword in c89 mode. +- "restrict" is not recognized as a keyword in ``*89`` modes. +- Commas are allowed in integer constant expressions in ``*99`` modes. +- Arrays which are not lvalues are not implicitly promoted to pointers + in ``*89`` modes. +- Some warnings are different. + +c94 mode is identical to c89 mode except that digraphs are enabled in +c94 mode (FIXME: And ``__STDC_VERSION__`` should be defined!). + +GCC extensions not implemented yet +---------------------------------- + +clang tries to be compatible with gcc as much as possible, but some gcc +extensions are not implemented yet: + +- clang does not support #pragma weak (`bug + 3679 `_). Due to the uses + described in the bug, this is likely to be implemented at some point, + at least partially. +- clang does not support decimal floating point types (``_Decimal32`` and + friends) or fixed-point types (``_Fract`` and friends); nobody has + expressed interest in these features yet, so it's hard to say when + they will be implemented. +- clang does not support nested functions; this is a complex feature + which is infrequently used, so it is unlikely to be implemented + anytime soon. In C++11 it can be emulated by assigning lambda + functions to local variables, e.g: + + :: + + auto const local_function = [&](int parameter) { + // Do something + }; + ... + local_function(1); + +- clang does not support global register variables; this is unlikely to + be implemented soon because it requires additional LLVM backend + support. +- clang does not support static initialization of flexible array + members. This appears to be a rarely used extension, but could be + implemented pending user demand. +- clang does not support + ``__builtin_va_arg_pack``/``__builtin_va_arg_pack_len``. This is + used rarely, but in some potentially interesting places, like the + glibc headers, so it may be implemented pending user demand. Note + that because clang pretends to be like GCC 4.2, and this extension + was introduced in 4.3, the glibc headers will not try to use this + extension with clang at the moment. +- clang does not support the gcc extension for forward-declaring + function parameters; this has not shown up in any real-world code + yet, though, so it might never be implemented. + +This is not a complete list; if you find an unsupported extension +missing from this list, please send an e-mail to cfe-dev. This list +currently excludes C++; see :ref:`C++ Language Features `. Also, this +list does not include bugs in mostly-implemented features; please see +the `bug +tracker `_ +for known existing bugs (FIXME: Is there a section for bug-reporting +guidelines somewhere?). + +Intentionally unsupported GCC extensions +---------------------------------------- + +- clang does not support the gcc extension that allows variable-length + arrays in structures. This is for a few reasons: one, it is tricky to + implement, two, the extension is completely undocumented, and three, + the extension appears to be rarely used. Note that clang *does* + support flexible array members (arrays with a zero or unspecified + size at the end of a structure). +- clang does not have an equivalent to gcc's "fold"; this means that + clang doesn't accept some constructs gcc might accept in contexts + where a constant expression is required, like "x-x" where x is a + variable. +- clang does not support ``__builtin_apply`` and friends; this extension + is extremely obscure and difficult to implement reliably. + +.. _c_ms: + +Microsoft extensions +-------------------- + +clang has some experimental support for extensions from Microsoft Visual +C++; to enable it, use the -fms-extensions command-line option. This is +the default for Windows targets. Note that the support is incomplete; +enabling Microsoft extensions will silently drop certain constructs +(including ``__declspec`` and Microsoft-style asm statements). + +clang has a -fms-compatibility flag that makes clang accept enough +invalid C++ to be able to parse most Microsoft headers. This flag is +enabled by default for Windows targets. + +-fdelayed-template-parsing lets clang delay all template instantiation +until the end of a translation unit. This flag is enabled by default for +Windows targets. + +- clang allows setting ``_MSC_VER`` with ``-fmsc-version=``. It defaults to + 1300 which is the same as Visual C/C++ 2003. Any number is supported + and can greatly affect what Windows SDK and c++stdlib headers clang + can compile. This option will be removed when clang supports the full + set of MS extensions required for these headers. +- clang does not support the Microsoft extension where anonymous record + members can be declared using user defined typedefs. +- clang supports the Microsoft "#pragma pack" feature for controlling + record layout. GCC also contains support for this feature, however + where MSVC and GCC are incompatible clang follows the MSVC + definition. +- clang defaults to C++11 for Windows targets. + +.. _cxx: + +C++ Language Features +===================== + +clang fully implements all of standard C++98 except for exported +templates (which were removed in C++11), and `many C++11 +features `_ are also implemented. + +Controlling implementation limits +--------------------------------- + +**-fconstexpr-depth=N**: Sets the limit for recursive constexpr function +invocations to N. The default is 512. + +**-ftemplate-depth=N**: Sets the limit for recursively nested template +instantiations to N. The default is 1024. + +.. _objc: + +Objective-C Language Features +============================= + +.. _objcxx: + +Objective-C++ Language Features +=============================== + + +.. _target_features: + +Target-Specific Features and Limitations +======================================== + +CPU Architectures Features and Limitations +------------------------------------------ + +X86 +^^^ + +The support for X86 (both 32-bit and 64-bit) is considered stable on +Darwin (Mac OS/X), Linux, FreeBSD, and Dragonfly BSD: it has been tested +to correctly compile many large C, C++, Objective-C, and Objective-C++ +codebases. + +On ``x86_64-mingw32``, passing i128(by value) is incompatible to Microsoft +x64 calling conversion. You might need to tweak +``WinX86_64ABIInfo::classify()`` in lib/CodeGen/TargetInfo.cpp. + +ARM +^^^ + +The support for ARM (specifically ARMv6 and ARMv7) is considered stable +on Darwin (iOS): it has been tested to correctly compile many large C, +C++, Objective-C, and Objective-C++ codebases. Clang only supports a +limited number of ARM architectures. It does not yet fully support +ARMv5, for example. + +Other platforms +^^^^^^^^^^^^^^^ + +clang currently contains some support for PPC and Sparc; however, +significant pieces of code generation are still missing, and they +haven't undergone significant testing. + +clang contains limited support for the MSP430 embedded processor, but +both the clang support and the LLVM backend support are highly +experimental. + +Other platforms are completely unsupported at the moment. Adding the +minimal support needed for parsing and semantic analysis on a new +platform is quite easy; see lib/Basic/Targets.cpp in the clang source +tree. This level of support is also sufficient for conversion to LLVM IR +for simple programs. Proper support for conversion to LLVM IR requires +adding code to lib/CodeGen/CGCall.cpp at the moment; this is likely to +change soon, though. Generating assembly requires a suitable LLVM +backend. + +Operating System Features and Limitations +----------------------------------------- + +Darwin (Mac OS/X) +^^^^^^^^^^^^^^^^^ + +None + +Windows +^^^^^^^ + +Experimental supports are on Cygming. + +See also `Microsoft Extensions `. + +Cygwin +"""""" + +Clang works on Cygwin-1.7. + +MinGW32 +""""""" + +Clang works on some mingw32 distributions. Clang assumes directories as +below; + +- ``C:/mingw/include`` +- ``C:/mingw/lib`` +- ``C:/mingw/lib/gcc/mingw32/4.[3-5].0/include/c++`` + +On MSYS, a few tests might fail. + +MinGW-w64 +""""""""" + +For 32-bit (i686-w64-mingw32), and 64-bit (x86\_64-w64-mingw32), Clang +assumes as below; + +- ``GCC versions 4.5.0 to 4.5.3, 4.6.0 to 4.6.2, or 4.7.0 (for the C++ header search path)`` +- ``some_directory/bin/gcc.exe`` +- ``some_directory/bin/clang.exe`` +- ``some_directory/bin/clang++.exe`` +- ``some_directory/bin/../include/c++/GCC_version`` +- ``some_directory/bin/../include/c++/GCC_version/x86_64-w64-mingw32`` +- ``some_directory/bin/../include/c++/GCC_version/i686-w64-mingw32`` +- ``some_directory/bin/../include/c++/GCC_version/backward`` +- ``some_directory/bin/../x86_64-w64-mingw32/include`` +- ``some_directory/bin/../i686-w64-mingw32/include`` +- ``some_directory/bin/../include`` + +This directory layout is standard for any toolchain you will find on the +official `MinGW-w64 website `_. + +Clang expects the GCC executable "gcc.exe" compiled for +``i686-w64-mingw32`` (or ``x86_64-w64-mingw32``) to be present on PATH. + +`Some tests might fail `_ on +``x86_64-w64-mingw32``. diff --git a/docs/index.rst b/docs/index.rst index fab47916b4..05947feebb 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -18,6 +18,17 @@ progress. This page will get filled out with docs soon... PCHInternals ThreadSanitizer Tooling + AddressSanitizer + AnalyzerRegions + ClangPlugins + ClangTools + HowToSetupToolingForLLVM + IntroductionToTheClangAST + JSONCompilationDatabase + LibASTMatchersTutorial + PTHInternals + RAVFrontendAction + UsersManual Indices and tables -- 2.50.1