Start splitting c.c, improve static example

This commit is contained in:
Ciro Santilli 2015-05-19 09:26:58 +02:00
parent bfdb65bffe
commit b075d2d54c
40 changed files with 1224 additions and 787 deletions

View File

@ -6,7 +6,7 @@ C and C++ information, cheatsheets and mini-projects.
Relies on [C++ boilerplate](https://github.com/cirosantilli/cpp-boilerplate) to factor code out. See [its documentation](https://github.com/cirosantilli/cpp-boilerplate/blob/master/README.md) for information on how to use this project.
[Assembly Cheat](https://github.com/cirosantilli/assembly-cheat) contains lower level issues, like assembly and the ELF format.
[Assembly Cheat](https://github.com/cirosantilli/assembly-cheat) contains lower level issues, like assembly, ELF and Binutils.
1. [Getting started](getting-started.md)
1. Featured
@ -18,12 +18,11 @@ Relies on [C++ boilerplate](https://github.com/cirosantilli/cpp-boilerplate) to
1. [OpenGL](opengl/)
1. [KDE](kde/)
1. Introduction
1. [C](c.md)
1. [C](c/)
1. [C++](cpp.md)
1. [C vs C++](c-vs-cpp.md)
1. [Implementations](implementations.md)
1. [Style guides](style-guides.md)
1. [Library](library.md)
1. [Undefined behaviour](undefined-behaviour.md)
1. [C from C++](c-from-cpp/)
1. [Fortran from C](c-from-cpp/)
@ -32,7 +31,6 @@ Relies on [C++ boilerplate](https://github.com/cirosantilli/cpp-boilerplate) to
1. [CMake](cmake.md)
1. [Flex and Bison](flex-bison/)
1. [glibc](glibc/)
1. [hello_world.c](hello_world.c)
1. [hello_world_cpp.cpp](hello_world_cpp.cpp)
1. GUI
1. [GTK](gtk/)

480
c.c
View File

@ -2,8 +2,11 @@
ANSI C cheat.
Small comments on comparing ANSI C with extensions are acceptable.
This cheatsheet is being split up into smaller parts to c/
*/
#define UNDEFINED_BEHAVIOUR
/*
# include
@ -181,29 +184,39 @@ int setjmp_func(int jmp, jmp_buf env_buf) {
Declaration vs definition
*/
void decl_def();
void decl_def();
void decl_def() {}
/* ERROR redefine */
/*void decl_def() {}*/
void decl_def();
void decl_def();
void decl_def() {}
/* ERROR redefine */
/*void decl_def() {}*/
void decl_def_no_arg_name(int i, float f, char d) {}
/* ERROR */
/*void def_no_argname(int){}*/
void decl_def_no_arg_name(int i, float f, char d) {}
/* ERROR */
/*void def_no_argname(int){}*/
int factorial2funcs1(int);
int factorial2funcs0(int n){
if (n != 1) {
return n*factorial2funcs1(n - 1);
}
return 1;
int factorial2funcs1(int);
int factorial2funcs0(int n){
if (n != 1) {
return n*factorial2funcs1(n - 1);
}
int factorial2funcs1(int n){
if (n != 1) {
return n*factorial2funcs0(n - 1);
}
return 1;
return 1;
}
int factorial2funcs1(int n){
if (n != 1) {
return n*factorial2funcs0(n - 1);
}
return 1;
}
#if __STDC_VERSION__ <= 199901L
default_return_type() {
return 1;
}
#endif
int proto_empty_definition() {
return 1;
}
/* Two decls on the same line, with same return type: */
@ -329,22 +342,6 @@ int setjmp_func(int jmp, jmp_buf env_buf) {
return (*function_ptr)(m, n);
}
/*
# void argument vs no argument
`void f()` vs `void f(void)`
In C++, same.
In C, possibly different to suport archaic behavior, which you should never rely on.
So always use `f(void)` instead of `f()` on declarations and definitions.
<http://stackoverflow.com/questions/693788/c-void-arguments>
TODO example.
*/
/*
function struct args
@ -888,20 +885,30 @@ void abort_func() {
# main signature
- http://stackoverflow.com/questions/4207134/what-is-the-proper-declaration-of-main
- http://stackoverflow.com/questions/204476/what-should-main-return-in-c-and-c
- http://stackoverflow.com/questions/4207134/what-is-the-proper-declaration-of-main
Valid signatures:
Valid signatures: either:
int main()
and
or
int main(int argc, char* argv[])
which is the same as:
Or equivalent ones to the above:
TODO name of equivalend:
int main(int argc, char** argv)
Default return type `int` (C89 only):
main()
Explicit `void` prototype:
int main(void)
*/
int main(int argc, char **argv) {
/*
@ -1266,6 +1273,8 @@ int main(int argc, char **argv) {
/*
# Integer types
# Integer literals
Types that represent integer numbers are called integer types.
This classification is explicitly used on the C specification,
@ -1325,6 +1334,7 @@ int main(int argc, char **argv) {
{ short si = 1; }
{ short si = (short int)1; }
{ int i = 1; }
/* Lower case possible but bad, since l looks more like 1 than `L`.*/
{ long li = (long)1l; }
{ long li = (long)1L; }
}
@ -1962,9 +1972,6 @@ int main(int argc, char **argv) {
/*
# const qualifier
Can be overriden by pointer typecasts + relying undefined behaviour,
so it does not generate compile time constant expressions (C99 6.6).
*/
{
int i = 0;
@ -1990,21 +1997,34 @@ int main(int argc, char **argv) {
}
/*
Casting a const to a non const through a pointer is legal.
Modifying he const with the pointer is undefined behavior (C99 6.7.3.5).
For this reason it does not generate compile time constant expressions (C99 6.6):
the undefined behavior could be to change the value of the const.
# Modify const through pointer cast
In particular, existing implementaions may or may not put `const` in read only memory,
so that the undefined behavior may be a page fault.
Casting a const to a non-const through a pointer is legal.
Many compilers raise warnings or prevent compilation of such constructs.
Modifying he const with the pointer is undefined behavior (C99 6.7.3.5).
In C++, discarding const is illegal, and generates compile time constants.
For this reason it does not generate compile time constant expressions (C99 6.6):
the undefined behavior could be to change the value of the const.
In particular, existing implementaions may or may not put `const` in read only memory,
so that the undefined behavior may be a page fault.
`gcc` for examples puts global constants on the `.rodata` section of the elf output.
In practice It might work however for local function variables however,
which are just on the stack or registers.
Many compilers raise warnings or prevent compilation of such constructs.
In C++, discarding const is illegal, and generates compile time constants.
*/
{
const int ic = 0;
/* WARN: initialization discards const qualifier from pointer type. */
/*
WARN: initialization discards const qualifier from pointer type.
Likely to work since local variable.
*/
/*
int* ip = &ic;
*ip = 1;
@ -2555,113 +2575,6 @@ int main(int argc, char **argv) {
#endif
#endif
/* # enum */
{
/* Basics. */
{
enum E
{
E1,
E2,
E3,
E4,
};
enum E e = E1;
/* ERROR */
/* redeclartion of a */
/*int E1 = 1;*/
/* ERROR */
/* can't change value of enums */
/* this is why you can use them for array sizes */
/*E1 = 2;*/
/* by default, values start from 0 and increase */
assert(E1 == 0);
assert(E2 == 1);
assert(E3 == 2);
assert(E4 == 3);
}
/* Typedef combo. No need to type enum everywhere. */
{
/* Multi line */
enum E { E1, E2};
typedef enum E E;
E e;
/* Single line */
typedef enum F {g1, g2} F;
F f;
}
/* You can choose the values explicitly */
{
enum E
{
E1 = 1,
E2 = 2,
E3,
E4 = 2, /* equal values compile */
};
assert(E1 == 1);
assert(E2 == 2);
assert(E4 == 2);
/* if you don't give a value */
/* it gets a value different from all others */
assert(E3 != E1);
assert(E3 != E2);
assert(E3 != E4);
printf("enum E3 = %d\n", E3);
}
/* ERROR: only const expressions allowed */
/* This is why enum values are a good choice for array sizes. */
{
const int i = 0;
/*enum Constexpr { N = i };*/
/*int is[N];*/
}
/*
It seems that it is not possible to control the size of an enum
without extensions.
Compilers could make them smaller than int if there are less than INT_MAX
values in the enum, but gcc 4.8 -O0 does not do that.
<http://stackoverflow.com/questions/4879286/specifying-size-of-enum-type-in-c>
*/
{
{
enum E {E1, E2,};
printf("sizeof(enum E) = %zu\n", sizeof(enum E));
}
/* The largest value that can be portably stored is INT_MAX. */
/* <http://stackoverflow.com/questions/366017/what-is-the-size-of-an-enum-in-c> */
{
enum E {E1 = INT_MAX};
/*enum E_BAD { E1 = INT_MAX + 1};*/
}
}
/* Count elements of an ENUM. */
/* Does not seem possible: http://stackoverflow.com/questions/2102582/how-can-i-count-the-items-in-an-enum */
/* Possible workaround: add an extra element and rely on the increasing order. */
/* Obvious downside: remote name conflict possibility. */
{
enum E {E1, E2, E_SIZE};
assert(E_SIZE == 2);
}
}
/*
# struct
@ -4968,6 +4881,7 @@ int main(int argc, char **argv) {
array functions to manipulate it, and pass string lengths around.
*/
{
/* Basic example. */
{
char cs[] = "abc";
@ -5006,37 +4920,48 @@ int main(int argc, char **argv) {
}
}
/*
# Text segment
C allows you to point directly to the text segment.
In short, the text segment is the part of RAM memory reserved to a process
that contains the instructions of the process, and not, say, regular variables.
Process are not allows to modify those intructions at runtime,
and therefore you cannot modify strings that point to the text segment.
Using text segment pointers has the upside of being memory efficient as you
don't copy the text from.
Note however that you cannot modify that string.
*/
/* Initialize strings */
{
/* To create a pointer to text segment, initialize it as: */
/*
# Text segment
C allows you to point directly to the text segment.
In short, the text segment is the part of RAM memory reserved to a process
that contains the instructions of the process, and not, say, regular variables.
Process are not allows to modify those intructions at runtime,
and therefore you cannot modify strings that point to the text segment.
Using text segment pointers has the upside of being memory efficient as you
don't copy the text from.
Note however that you cannot modify that string.
*/
{
char* cs = "abc";
assert(cs[0] == 'a');
/* To create a pointer to text segment, initialize it as: */
{
char* cs = "abc";
assert(cs[0] == 'a');
}
/* Segmentation fault: text segment cannot be modified */
{
/*cs[0] = '0';*/
}
/* TODO why can't you do the same thing with integers? ex: */
{
/*int * is = { 1, 3, 2 };*/
}
}
/* Segmentation fault: text segment cannot be modified */
/*
Parenthesis. Legal but ugly. GCC 4.8 gives an error with `-pedantic`.
*/
{
/*cs[0] = '0';*/
}
/* TODO why can't you do the same thing with integers? ex: */
{
/*int * is = { 1, 3, 2 };*/
/*char s[] = ("abc");*/
}
}
@ -5833,6 +5758,16 @@ int main(int argc, char **argv) {
assert(factorial2funcs0(4) == 24);
assert(factorial2funcs1(4) == 24);
}
/*
In C89, some functions can be used without any declaration as long as they are defined in another file.
They are called implicit functions.
They are not allowed in C89.
But you can use functions which have a declaration that is not a prototype (i.e. without argument checking).
*/
}
/*
@ -5870,16 +5805,87 @@ int main(int argc, char **argv) {
}
/*
Can redeclare functions with different signatures arguments.
# Identifier list
# Parameter list
TODO
- http://stackoverflow.com/questions/18820751/identifier-list-vs-parameter-type-list-in-c
*/
{
void f();
/*
TODO what happens?
# Prototype vs declaration
http://stackoverflow.com/questions/22076718/why-is-it-possible-redefine-functions-with-different-numbers-of-arguments-than-i
http://stackoverflow.com/questions/5481579/whats-the-difference-between-function-prototype-and-declaration
- Prototype is a declaration that specifies the arguments.
Only a single prototype can exist.
- a declaration can not be a prototype if it does not have any arguments.
The arguments are left unspecified.
- to specify a prototype that takes no arguments, use `f(void)`
In C++ the insanity is reduced, and every declaration is a prototype,
so `f()` is the same as `f(void)`.
Save yourself some headache, and never write declarations that are not prototypes.
TODO why would someone want to use a declaration that is not a prototype?
*/
void f(int);
{
/* Declaration that is not a prototype. */
void proto_decl();
/* Prototype. */
void proto_decl(int);
/* OK, same prototype as above. */
void proto_decl(int i);
/* ERROR: conflicting type for */
/*void proto_decl(float);*/
/* A definition without arguments however already implies `(void)`. */
/* ERROR */
/*int proto_empty_definition(int);*/
assert(proto_empty_definition() == 1);
/*
# float on a prototype after a declaration
You can't use `float`, `char`, etc.: only `int`, `double`
on prototypes that follow declarations!
http://stackoverflow.com/questions/5481579/whats-the-difference-between-function-prototype-and-declaration
*/
{
void proto_decl_float();
/* ERROR: An argument that has default promotion can't match*/
/*void proto_decl_float(float);*/
void proto_decl_double();
void proto_decl_double(double);
}
/*
# void argument vs no argument
http://stackoverflow.com/questions/693788/c-void-arguments
*/
{
/* Prototype that takes no arguments. */
void void_arg(void);
/* ERROR: void must be the only parameter */
/*void void_int_arg(int, void);*/
/* WARN: parameter has void type */
/*void void_arg2(void v);*/
}
}
/* But not with different return types. */
/* ERROR conflicting types for `f` */
@ -5887,6 +5893,26 @@ int main(int argc, char **argv) {
}
}
/*
# Implicit int
# Default return type
http://stackoverflow.com/questions/12373538/warning-return-type-defaults-to-int-wreturn-type
In C89, if not specified, the return type defaulted to `int`.
Appears to have been made illegal in C99.
`gnu99` allows it by default but gerenrates warnings, `-Wno-return-type` to turn off.
*/
{
#if __STDC_VERSION__ <= 199901L
static s;
assert(default_return_type() == 1);
#endif
}
/*
# K&R function declaration
@ -6534,12 +6560,31 @@ int main(int argc, char **argv) {
*/
/*
# Prefined preprocessor macros
# Standard preprocessor defines
Some preprocessor vars are automatically defined by certain compilers
although they are not c standards. Those are not discussed here.
List of standard defines: http://gcc.gnu.org/onlinedocs/cpp/Standard-Predefined-Macros.html
List all them on GCC:
gcc -dM -E - < /dev/null | sort
Sample output:
#define _LP64 1
#define _STDC_PREDEF_H 1
#define __ATOMIC_ACQUIRE 2
#define __ATOMIC_ACQ_REL 4
#define __ATOMIC_CONSUME 1
#define __ATOMIC_HLE_ACQUIRE 65536
#define __ATOMIC_HLE_RELEASE 131072
#define __ATOMIC_RELAXED 0
#define __ATOMIC_RELEASE 3
#define __ATOMIC_SEQ_CST 5
*/
{
/*
@ -6553,6 +6598,10 @@ int main(int argc, char **argv) {
- C11: 201112L
- C99: 199901L
http://sourceforge.net/p/predef/wiki/Standards/
Apperas undefined in C99
*/
{
printf("__STDC_VERSION__ = %li\n", __STDC_VERSION__);
@ -6988,6 +7037,61 @@ int main(int argc, char **argv) {
/*int r = system("echo a | grep b");*/
/*assert(r == 1);*/
}
/*
# atoi
# atol
# atoll
Convert string to integer.
`strtol` is better as it allows error checking, so use that instead.
C99 explicitly says that errno does not need to be set.
*/
{
assert(atoi("123") == 123);
enum N { N = 256 };
char s[N];
snprintf(s, N, "%d", INT_MAX);
assert(atoi(s) == INT_MAX);
snprintf(s, N, "%d", INT_MIN);
assert(atoi(s) == INT_MIN);
#ifdef UNDEFINED_BEHAVIOUR
snprintf(s, N, "%ld", INT_MAX + 1L);
printf("INT_MAX + 1 = %s\n", s);
printf("atoi(INT_MAX + 1) = %d\n", atoi(s));
printf("atoi(123abc) = %d\n", atoi("123abc"));
#endif
/* No hex. use strtol */
/*assert(atoi("0xA") == 10);*/
}
/*
# itoa
# ltoa
Neither POSIX nor glibc?
http://stackoverflow.com/questions/190229/where-is-the-itoa-function-in-linux
The closes one gets is an internal `_itoa` in glibc.
`sprintf` is the way.
*/
/*
# strtol
*/
{
}
}
/*

1
c/Makefile Symbolic link
View File

@ -0,0 +1 @@
../Makefile

View File

@ -1,21 +1,13 @@
# C
## Motivation
1. [Pros and cons](pros-and-cons.md)
1. [hello_world.c](hello_world.c)
1. [min.c](min.c)
1. [enum.c](enum.c)
C is amazingly important as it is used to implement:
## History
- Linux kernel
- Windows kernel
- Python and Ruby reference implementations
- OpenGL
And the list goes on and on...
The reason for this is that C:
- is quite low level, so you can do low level things other languages can't
- is low level, so it can be very fast (if you program it correctly)
- is robust and easy to understand.
C is called C because it came after B!
## Standards
@ -45,52 +37,59 @@ ANSI only specifies language and the library interfaces: what functions, variabl
Some features are even left for the implementors to decide such as the behavior.
New features are often based on extension of major implementations such as gnu's or Microsoft's.
New features are often based on extension of major implementations such as GNU's or Microsoft's.
## C89
## Versions
Not all versions were backwards source compatible.
### C89
ANSI ratified the standard in 89, and ISO in 90 only with formatting changes.
## C90
### C90
Synonym for C89, because ISO adopted it in 90.
Formal name: ISO/IEC 9899:1990
## C94
### C94
## C95
### C95
## Normative Addendum 1
### Normative Addendum 1
Informal names for C89/C90 plus Normative Addendum (aka Amendment) 1, whose primary addition was support for international character sets.
C99 extends this.
## C99
### C99
<http://en.wikipedia.org/wiki/C99>
- large support, but not 100% by many compilers
Highly, but not fully backwards compatible: <http://stackoverflow.com/a/30208133/895245>
Major new features:
- support for `//` comments
- `long`, `long`, `bool`, complex numbers
- gcc flag: add `-std=c99`
- `gcc` flag: add `-std=c99`
- Microsoft stated that they will not update their compilers to C99 and further.
WG14/N1256 is the best free draft available: <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf>
They use C as an inner language, and think it would be too delicate/costly to change it.
#### Microsoft C99 support
They have decided to maintain only C++ and C# up to date. for developers to interface with Windows.
Microsoft stated that they will not update their compilers to C99 and further.
Therefore you will not get those working on MS compiler anytime soon.
They use C as an inner language, and think it would be too delicate/costly to change it.
Every C99 citation here will come from WG14/N1256, the best free draft available:
http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf
They have decided to maintain only C++ and C# up to date. for developers to interface with Windows.
## C11
Therefore you will not get those working on MS compiler anytime soon.
### C11
<http://en.wikipedia.org/wiki/C11_%28C_standard_revision%29>

138
c/enum.c Normal file
View File

@ -0,0 +1,138 @@
#include <assert.h> /* assert */
#include <limits.h> /* INT_MAX */
#include <stdlib.h> /* EXIT_SUCCESS */
int main() {
/* # Values */
{
/*
You can choose the values explicitly.
If you leave one out, it is the previous plus one.
If the first is left out, it is `0`.
- http://stackoverflow.com/questions/6434105/are-default-enum-values-in-c-the-same-for-all-compilers
- http://stackoverflow.com/questions/24946699/enums-in-c-what-assumptions-can-be-made-about-the-value-of-the-underlying-varia?lq=1
*/
{
enum E {
E0,
E1,
E2 = 3,
E3,
E4 = INT_MAX,
/* ERROR: Overflow in enumeration values */
/*E5*/
};
/* If unspecified, the first is 0. */
assert(E0 == 0);
assert(E1 == 1);
assert(E2 == 3);
/* Continue from the last one. */
assert(E3 == 4);
assert(E4 == INT_MAX);
}
/*
# Value size
# Value type
Enum values are `int`.
It does not seem possible to change that:
- http://stackoverflow.com/questions/366017/what-is-the-size-of-an-enum-in-c
- http://stackoverflow.com/questions/4879286/specifying-size-of-enum-type-in-c
- http://stackoverflow.com/questions/18090541/how-to-set-the-value-of-an-enumeration-constant-outside-the-range-of-int
*/
{
/* sizeof */
{
enum E {E1, E2};
assert(sizeof(enum E) == sizeof(int));
assert(sizeof(E1) == sizeof(int));
}
/* The largest value that can be portably stored is INT_MAX. */
/* http://stackoverflow.com/questions/366017/what-is-the-size-of-an-enum-in-c */
{
enum E {E1 = INT_MAX};
/*enum E_BAD { E1 = INT_MAX + 1};*/
}
}
}
/* Declare enum variable. */
{
{
enum E { E0, E1 };
/* Like struct, you need enum. */
enum E e = E1;
}
/* Typedef combo. No need to type enum everywhere. */
{
/* Multi line */
enum E { E1, E2};
typedef enum E E;
E e;
/* Single line */
typedef enum F {g1, g2} F;
F f;
}
}
/* Nomemclature. */
{
enum E { E0 };
/*
# Enumerator
# Member of an enum.
Synonyms.
Enumerator and "Member of an enum" are synonyms defined by the standard.
*/
}
/*
ERROR: only const expressions allowed for initialition.
When used, the values are constant expressions.
*/
{
const int i = 0;
/*enum Constexpr { N = i };*/
/*int is[N];*/
/* This is why enum values don't generate variable size arrays. */
{
enum N { N =2 };
int is[N];
assert(sizeof(is) == 2 * sizeof(int));
}
}
/* Enum constans are not lvalues. No const removal cast. */
/*int *pe = &E1;*/
/*
# Count elements of an ENUM.
Does not seem possible: http://stackoverflow.com/questions/2102582/how-can-i-count-the-items-in-an-enum
Possible workaround: add an extra element and rely on the increasing order.
Obvious downside: remote name conflict possibility.
*/
{
enum E {E1, E2, E_SIZE};
assert(E_SIZE == 2);
}
return EXIT_SUCCESS;
}

View File

32
c/pros-and-cons.md Normal file
View File

@ -0,0 +1,32 @@
# Pros and cons
## Pros
C is amazingly important as it is used to implement:
- Linux kernel
- Windows kernel
- Python and Ruby reference implementations
- OpenGL
- GCC and Binutils (now migrating to C++)
And the list goes on and on...
The reason for this is that C:
- is quite low level, so you can do low level things other languages can't
- is low level, so it can be very fast (if you program it correctly)
- is robust and easy to understand.
## Cons
- memory leaks
- obscure features from the 70's for backwards compatibility, many of which trigger compiler warnings even though they are part of the standard:
- K&R function declarations
- trigraphs
- declarations that are not prototypes
Some were dropped in C99.

40
cpp.cpp
View File

@ -4564,11 +4564,12 @@ int main(int argc, char **argv) {
{
std::string msg = "custom message";
std::ios_base::failure e(msg);
assert(e.what() == msg);
// TODO worked in GCC 4.8, failed in GCC 5.1.
//assert(e.what() == msg);
}
/*
# uncaught exceptions.
# uncaught exceptions
Uncaught exceptions explose at top level and terminate the program.
@ -5159,19 +5160,22 @@ int main(int argc, char **argv) {
}
/*
auto rule: brace initializer can be bound to auto
# auto and initializer lists
This means that for loop work
auto rule: brace initializer can be bound to auto
http://en.cppreference.com/w/cpp/utility/initializer_list
http://en.cppreference.com/w/cpp/utility/initializer_list
*/
{
{
auto l{0, 1, 2};
// TODO GCC 5.1 does not allow this, which conflicts with
// http://en.cppreference.com/w/cpp/utility/initializer_list
// Who is right?
//auto l{0, 1, 2};
// SAME:
//initializer_list<int> l{0, 1, 2};
assert(l.size() == 3);
assert(*l.begin() == 0);
//assert(l.size() == 3);
//assert(*l.begin() == 0);
}
// The rule for auto makes this ranged for work.
@ -5358,12 +5362,12 @@ int main(int argc, char **argv) {
- assignment is made
- the object created by `NoBaseNoMember();` goes out of scope and is destroyed
Therefore the following may be more effecitive due to copy ellision:
Therefore the following may be more effecitive due to copy elision:
NoBaseNoMember c = NoBaseNoMember();
in which case only a single constructor is called.
Copy ellision in this case is widely implemented.
Copy elision in this case is widely implemented.
*/
{
callStack.clear();
@ -5697,14 +5701,14 @@ int main(int argc, char **argv) {
- All input and output operations occur in the same order and with the same content
as if the program was executed as written.
The only exception to the ruls is copy ellision.
The only exception to the ruls is copy elision.
*/
/*
# copy elision
<http://en.cppreference.com/w/cpp/language/copy_elision>
<http://stackoverflow.com/questions/12953127/what-are-copy-elision-and-return-value-optimization>
- http://en.cppreference.com/w/cpp/language/copy_elision
- http://stackoverflow.com/questions/12953127/what-are-copy-elision-and-return-value-optimization
Exceptions to the as-if rules, which specifies cases in which compilers
may reduce the number of copy operations made, which is detectable in C++'
@ -5713,17 +5717,17 @@ int main(int argc, char **argv) {
*/
{
/*
# temporary copy ellision
# temporary copy elision
If no copy elision is done:
1) temporary object constructor
2) copy temporary to c
3) temporary object destructor
1. temporary object constructor
2. copy temporary to c
3. temporary object destructor
If copy elision is done:
1) c is constructed directly.
1. c is constructed directly.
Therefore both results are possible and the result is unpredictable:

16
cpp.md
View File

@ -52,7 +52,9 @@ In `gcc` used to be enabled via `-std=c++0x` flag, now `-std=c++11`. Still marke
Will come after C++11. Known as C++1Y as many have doubts it will come out in 2014.
## STL vs stlib
## stdlib
### STL vs stlib
The term `STL` is *not* mentioned in the C++ ISO standard.
@ -64,6 +66,18 @@ Therefore: **never** use that word, unless you are really talking about the non
Write `stdlib` and say "Standard Library" instead.
### stdlib implementations
#### GCC libstdc++
GCC comes with an implementation of libstdc++.
#### Apache C++ Standard Library
Dead.
<https://en.wikipedia.org/wiki/Apache_C%2B%2B_Standard_Library>
## Interesting libraries
C++ has many major interesting non standard libs.

View File

@ -1,63 +0,0 @@
CFLGS := -Wall
.PHONY: all clean run
all: maina.out mainso.out mainso_fullpath.out libab.so
run: maina.out mainso.out mainso_fullpath.out
./maina.out
# This works only after install target and is the standard production method:
#./mainso.out
# This only a test method:
env LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH ./mainso.out
# Full path is stored in the out.
# Since it starts with './', linker knows it is an absolute path:
./mainso_fullpath.out
install:
#sudo mv libabso /some/where/in/ld/path/
#load path can be found with:
#ldconfig -v 2>/dev/null | grep -v $'^\t'
#sudo ldconfig
# Main to link to .so
# readout -d shows that the ouptut stores the relative path
mainso.out: main.o libab.so
gcc $(CFLGS) -L"." main.o -o mainso.out -lab
#will look for lib with basename *exactly* `libab.so`,
#`libab.so.1` will not do!
#gcc $(CFLGS) -L"." main.o -o mainso.out -l:libab.so
#with ':' uses full basename
#APPLICATION
#select an specific version such as `libab.so.1`
#env LIBRARY_PATH=$LIBRARY_PATH:. gcc $(CFLGS) main.c -o mainso.out -lab
# This is not recommended
# Better use linker path as in mainso.out
# readout -d shows that the ouptut stores the full path.
mainso_fullpath.out: main.o libab.so
gcc $(CFLGS) main.o "$(shell realpath libab.so)" -o mainso_fullpath.out
#gcc $(CFLGS) main.o -o mainso_fullpath.out -l"$(shell realpath libab.so)"
#does not work
#main with .a
maina.out: main.o ab.a
gcc $(CFLGS) main.o ab.a -o maina.out
#make .so
libab.so: a.o b.o
gcc $(CFLGS) -shared a.o b.o -o libab.so
#gcc $(CFLGS) -shared -Wl,-soname,libab.so a.o b.o -o libab.so
#make .a
ab.a: a.o b.o
ar rcs ab.a a.o b.o
#first compile the object files
#this way, if a c file does not change,
#the corresponding .o file does not get recompiled!
%.o: %.c
gcc $(CFLGS) -fPIC -c "$<" -o "$@"
clean:
rm -rf *.o *.a *.so *.out

View File

@ -1,3 +0,0 @@
# Dynamic library
Dynamic and statically linked libraries in Linux. `.a` and `.so` creation and usage.

View File

@ -1,7 +0,0 @@
#include <stdio.h>
#include "a.h"
void a() {
puts("a");
}

View File

@ -1,10 +0,0 @@
#ifndef A_H
#define A_H
void a();
#ifdef DEF
int def;
#endif
#endif

View File

@ -1,5 +0,0 @@
#include <stdio.h>
#include "b.h"
void b() { puts("b"); }

View File

@ -1,6 +0,0 @@
#ifndef B_H
#define B_H
void b();
#endif

View File

@ -1,16 +0,0 @@
#include <stdio.h>
/* MUST come before the include. */
/* Preprocessor does things in the exact same order it sees them. */
/*#define DEF*/
#define DEF
#include "a.h"
#include "b.h"
int main(int argc, char** argv) {
a();
b();
/* Preprocessor includes */
def++;
}

24
extern/Makefile vendored Normal file
View File

@ -0,0 +1,24 @@
CC ?= gcc -pedantic-errors -std=c89 -Wall
IN_EXT ?= .c
OUT_EXT ?= .out
RUN ?= main
TMP_EXT ?= .o
INS := $(wildcard *$(IN_EXT))
OUTS_NOEXT := $(basename $(INS))
OUTS := $(addsuffix $(TMP_EXT), $(OUTS_NOEXT))
RUN_BASENAME := $(RUN)$(OUT_EXT)
.PHONY: clean run
$(RUN_BASENAME): $(OUTS)
$(CC) $+ -o '$@'
%$(TMP_EXT): %$(IN_EXT)
$(CC) -c '$<' -o '$@'
clean:
rm -f *'$(TMP_EXT)' '$(RUN_BASENAME)'
run: $(RUN_BASENAME)
./'$(RUN_BASENAME)'

5
extern/README.md vendored Normal file
View File

@ -0,0 +1,5 @@
# static
Multi file semantics of `static`.
Also contains `extern`. TODO split.

39
extern/a.c vendored Normal file
View File

@ -0,0 +1,39 @@
#include <stdio.h>
#include "a.h"
/* ERROR already defined in main. */
/*int i = 0;*/
/* OK: only declared. */
int i;
/* OK: only visible to this file. */
static int staticInt = 0;
/* Define externInt. Will store this initial value on the executable. */
int externInt = 0;
/* WARN: extern initialized. */
/* It does not make much sense to add extern to a definition: only to a declaration. */
/*extern int externIntInt = 1;*/
/* ERROR redefinition: */
/*void func(){ puts("mainFunc"); }*/
static void staticFunc() {
printf("a#staticFunc:\n");
printf(" staticInt = %d\n", staticInt);
printf(" aHStaticInt = %d\n", aHStaticInt);
printf(" externInt = %d\n", externInt);
puts("");
}
void a() {
staticFunc();
staticInt++;
aHStaticInt++;
externInt++;
}
struct s {};

View File

26
extern/main.c vendored Normal file
View File

@ -0,0 +1,26 @@
#include <stdio.h>
#include "a.h"
int i = 0;
static int staticInt = 0;
static void staticFunc() {
printf( "main#staticFunc:\n" );
printf( " staticInt = %d\n", staticInt );
printf( " aHStaticInt = %d\n", aHStaticInt );
printf( " externInt = %d\n", externInt );
puts("");
}
struct s {
int i;
};
int main(int argc, char** argv) {
staticFunc();
a();
staticFunc();
a();
return 0;
}

View File

@ -2,11 +2,15 @@
Cheat on the GNU Compile Collection (GCC) language extensions and command line utilities.
1. [main.c](main.c): main `gcc` cheat
1. [Introduction](introduction.md)
1. [Compilation steps](compilation-steps.md)
1. [gcc utility](gcc-utility.md)
1. [cpp](cpp.md)
1. [main.c](main.c): main `gcc` cheat
1. [Introduction](introduction.md)
1. [Compilation steps](compilation-steps.md)
1. [gcc utility](gcc-utility.md)
1. [Spec files](spec-files.md)
1. [cpp](cpp.md)
1. Internals
1. [Build and install](build-and-install.md)
1. [Source tree](source-tree.md)
## Scope
@ -25,25 +29,3 @@ This can be done with MinGW (TODO how).
Error messages that are difficult to interpret for newbs:
- `struct has incomplete type = struct undefined`. You forgot to include some header.
## Generated assembly
This contains information that help to understand the assembly code generated by GCC, for example via the `-S` flag.
The generated assembly code is in the `gas` format. `gas` specific features shall not be explained here.
### Label name conventions
- `.L*`: local labels to current file
- `.LFB*`: function begin
- `.LFE*`: function end
- `.LC*`: function end
- `.LCFI`:
- `.LBB`:
- `.LBE`:
## Bibliography
- <http://www.ibm.com/developerworks/library/l-gcc-hacks/>
Good selection of useful features.

15
gcc/bibliography.md Normal file
View File

@ -0,0 +1,15 @@
# Bibliography
Extensions:
- <http://www.ibm.com/developerworks/library/l-gcc-hacks/>
Good selection of useful features.
Internals:
- <https://gcc.gnu.org/onlinedocs/gccint/index.html>
Generated from `gcc/doc/gccint.texi`, so very official, yay!
- <https://en.wikibooks.org/wiki/GNU_C_Compiler_Internals/GNU_C_Compiler_Architecture>

165
gcc/build-and-install.md Normal file
View File

@ -0,0 +1,165 @@
# Build and install
Tested with: version 5.1.0 on Ubuntu 14.04 in a 2013 computer.
Summary:
apt-get install flex bison
git clone git://gcc.gnu.org/git/gcc.git
cd gcc
# No annotated tags... so no describe.
git checkout gcc-5_1_0-release
./contrib/download_prerequisites
cd ..
mkdir gcc-build
cd gcc-build
../gcc/configure --enable-languages=c,c++
make
sudo make install
## Configure
### download_prerequisites
In Ubuntu 14.04, missing dependencies GMP and others did not go away for me even though they were installed with `apt-get build-dep gcc` and `dpkg` says they are present. I needed `download_prerequisites`
<http://stackoverflow.com/questions/9253695/building-gcc-requires-gmp-4-2-mpfr-2-3-1-and-mpc-0-8-0>
Also `configure` does not detect a missing `flex`, but it seems required or else you get a missing `yylex` error: <http://stackoverflow.com/questions/4262531/trouble-building-gcc-4-6>
### Binutils and GCC
Binutils a requirement of GCC. For instance, Linux From Scratch first installs Binutils, then GCC, then recompiles both to bootstrap.
It appears that you can build both GCC and Binutils the same time: <http://stackoverflow.com/questions/1726042/recipe-for-compiling-binutils-gcc-together>
A compatibility matrix between GCC and Binutils can be found at: <http://wiki.osdev.org/Cross-Compiler_Successful_Builds>
### glibc and GCC
GCC depends on glibc. TODO does it depend on `stdlibc++`?
Linux From Scratch compiles it twice to bootstrap.
### Three systems
When you build GCC, you have to configure 3 systems:
- build: where GCC will be built
- host: the system that will run GCC
- target: the system that will run the code generated by the compiled GCC
<https://gcc.gnu.org/onlinedocs/gcc-5.1.0/gccint/Configure-Terms.html#Configure-Terms>
## Build
Making a separate build directory is mandatory.
Took me 2 hours on a and 4GB of disk.
To build only certain parts of GCC <http://stackoverflow.com/questions/14728652/how-to-make-a-light-build-of-gcc-with-language-supports-etc-pruned>:
../gcc/configure --enable-languages=c,c++
With `make -j5`, this took 1 hour.
See all configuration options with:
./configure --help
### bootstrap
### Build stages
By default, the build happens in 3 stages:
1. compile the compiler with the compiler of the `build` computer
2. use the compiled compiler to compile itself
3. repeat 2. and compare 2 and 3. Should be the same.
Configure the build to disable bootstrap and compile only once:
../gcc/configure --disable-bootstrap
TODO does it work without? Is bootstrap really necessary? How can stage 3 ever be different from stage 2?
## Tests
sudo apt-get install autogen runtest
Run all tests:
make -k check
Run only certain tests:
make -k check-gcc-c
make -k check-gcc
## Install
Generated files will be put under:
- `/usr/local/bin` for font-end executables like `gcc`, `ld`
- `/usr/local/lib64` for libraries like `libstdc++`
- `/usr/local/libexec/gcc/x86_64-unknown-linux-gnu/5.1.0` for backend executables like `cc1` and `collect2`
## Run what you've built
TODO what is the required glibc for each GCC?
C programs seemed to run directly:
gcc hello_world.c
./hello_world
On Ubuntu 14.04, GCC 5.1, C++ programs needed `$LD_LIBRARY_PATH` to find the standard library `libstdc++.so` as it is not on the path by default:
LD_LIBRARY_PATH="/usr/local/lib64:$LD_LIBRARY_PATH" ./cpp
## Modify, rebuild and rerun
Modify a single line under on the `gcc` entry point `gcc/gcc-main.c`:
#include <stdio.h>
fputs("hacked\n", stderr);
You *cannot* modify the *stdout* output, or else the build will fail! This probably happens because `dumpspecs` is used on some part of the bootstrap process.
In general, even writing to stderr could make tests fail however: the safest thing possible would be to write any outputs to a file, or use a debugger.
Then on the build directory:
make -j5
sudo make install
And now any invocation of `gcc` should output `hacked` before anything else:
gcc -v
## Uninstall
cd gcc
sudo make uninstall
## Documentation
make
builds `man` and `info` documentation by default:
cd gcc/doc
man ./gcc.1
Those are generated from the `.texi` inputs.
For the HTML docs, use:
make html
firefox/gcc/HTML/gcc-4.8.2/gcc/index.html
The website is not included in the source code, but on a separate repo:
cvs -q -d :pserver:cvs@gcc.gnu.org:/cvs/gcc checkout -P wwwdocs

View File

@ -2,6 +2,25 @@
Useful GCC command line options.
## What the gcc utility does
Nothing. Almost.
`gcc` is a driver: it orchestrates the build process, by calling other programs with the right options, notably:
- `cpp`: preprocessor
- `cc1`: compiler. Generates `.s` file. Not in `PATH` by default.
- `as`: assembler . Generates `.o` files.
- `collect2`: linker. Generates executable files. Not in `PATH`, but almost identical to `ld`.
Options you pass to GCC determine how it will call the other programs.
Therefore, to understand GCC you should first understand the other programs separately, and then just see exactly how exactly GCC is calling them.
The exact way in which GCC calls the other programs is determined by spec files.
GCC does not contain bin-utils: they are maintained in separate git repositories. Binutils has not knowledge about programming languages, only assembly, and can be used separately from GCC. Binutils is a dependency of GCC.
## Recommended compilation flags
Good discussion: <http://stackoverflow.com/questions/154630/recommended-gcc-warning-options-for-c>
@ -282,6 +301,24 @@ Example:
## f
Flags.
These are regular options, most of which have value either true or false.
For true:
gcc -ffoo
and false:
gcc -fno-foo
Does not have any other meaning.
Not all `-f` flags are booleans however:
gcc -frandom-seed=1234
### inline
Allow the compile to inline functions. Implied by `-O3`.
@ -320,4 +357,73 @@ TODO Link statically only to glibc: <http://stackoverflow.com/questions/13187499
### static-libgcc
Link statically to libgcc. This is *not* `libc`, but an internal GCC library.
Link statically to `libgcc`. This is *not* `libc`, but an internal GCC library.
## Informational commands
### v
Get build information.
For version use `-version`.
Sample output:
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/local/libexec/gcc/x86_64-unknown-linux-gnu/5.1.0/lto-wrapper
Target: x86_64-unknown-linux-gnu
Configured with: ../gcc/configure --enable-languages=c,c++
Thread model: posix
gcc version 5.1.0 (GCC)
Non obvious lines:
- `Using built-in specs.`. Means that the default specs files are being used, that is, `-specs` was not passed as an argument.
- `Configured with`: how `configure` was called before building
### print-search-dirs
gcc -print-search-dirs | tr ':' '\n'
Sample output:
install
/usr/local/lib/gcc/x86_64-unknown-linux-gnu/5.1.0/
programs
=/usr/local/libexec/gcc/x86_64-unknown-linux-gnu/5.1.0/
/usr/local/libexec/gcc/x86_64-unknown-linux-gnu/5.1.0/
/usr/local/libexec/gcc/x86_64-unknown-linux-gnu/
[...]
libraries
=/usr/local/lib/gcc/x86_64-unknown-linux-gnu/5.1.0/
/usr/local/lib/gcc/x86_64-unknown-linux-gnu/5.1.0/../../../../x86_64-unknown-linux-gnu/lib/x86_64-unknown-linux-gnu/5.1.0/
/usr/local/lib/gcc/x86_64-unknown-linux-gnu/5.1.0/../../../../x86_64-unknown-linux-gnu/lib/x86_64-linux-gnu/
[...]
The search paths are:
- `install` TODO
- `programs`: where GCC will look for its internal programs like `cc1` and `collect2`. GCC also uses the `PATH` for external programs, which are called even though they are not there.
### print-file-name
Find in the `libraries` section of `-print-search-dirs`.
This horribly named option searches in the library path for a given library:
gcc -print-file-name=libc.so
If the library is found it returns the full path:
/usr/lib/x86_64-linux-gnu/libc.so
otherwise, it returns the input string:
libc.so
which indicates that the program only checks if directories exist, not the actual file searched for.
### print-prog-name
Same as `-print-file-name`, but for the internal called programs instead.

View File

@ -2,6 +2,8 @@
GCC is arguably the most popular C and C++ compiler.
GCC stands for GNU Compiler Collection: *not* C compiler, and currently compiles: C, C++, Objective-C, Fortran, Java, Ada, and Go.
The Linux kernel uses GCC extensions so you need it to build it.
`gcc` is the C compiler. It is a large frontend for other tools such as `as`, `cpp`.
@ -16,8 +18,6 @@ Obviously, it is always better if you avoid using extensions, but you may encoun
GNU extensions have a large chance of being implemented in future ANSI C versions (but sometimes in a modified form) because of the large influence of GCC.
GCC stands for GNU Compiler Collection: *not* C compiler, and currently compiles: C, C++, Objective-C, Fortran, Java, Ada, and Go.
## g++ vs gcc
`g++`: <http://stackoverflow.com/questions/172587/what-is-the-difference-between-g-and-gcc>
@ -36,3 +36,22 @@ Major ones:
- PE (Windows)
but there are others.
## What languages it compiles
GCC has front-ends and back-ends:
- front-ends: input languages
- back-ends: output machine codes
Infrastructure is reused across multiple languages.
Front-ends for which GCC is the major Linux implementation: C, C++, Fortran, Ada.
Other front-ends:
- Java (deprecated), Oracle's `javac`
- Go, Google's `gc`
- Objective-C: TODO. What does Apple use? There is also a LLVM
GCC also offers standard libraries for most languages it supports, except `libc`, which is in the separate project `glibc`, which is highly optimized, and has per-processor implementations.

162
gcc/source-tree.md Normal file
View File

@ -0,0 +1,162 @@
# Source tree
Tested on version 5.1.
## What GCC is coded with
Was C, then in 2012 at 4.8 started allowing C++.
There are however no `.cpp` or `.hpp` files: `.c` and `.hpp` extensions were kept.
## Programs
The single source tree contains:
- C, C++, etc. compilers
- Binutils: `as`, `ln`, `nm`
## Overview
The top 20 directories by size are:
1.6G .git
360M gcc
165M libjava
78M libstdc++-v3
15M libgo
15M libgcc
14M gmp-4.3.2
8.0M libgfortran
7.4M mpfr-2.4.2
3.5M zlib
3.5M libcpp
3.1M libgomp
3.1M boehm-gc
3.0M libffi
2.9M mpc-0.8.1
2.5M libiberty
1.9M libsanitizer
1.7M libquadmath
1.5M libitm
1.5M libdecnumber
The main source is under `gcc`:
175M testsuite
49M ada
32M config
31M po
7.8M cp
6.4M fortran
5.1M doc
2.0M go
1.9M java
1.8M ChangeLog-2004
1.5M ChangeLog-2011
1.5M ChangeLog-2010
1.4M ChangeLog-2005
1.4M ChangeLog-2003
1.3M ChangeLog-2012
1.3M ChangeLog-2009
1.3M ChangeLog-2007
1.3M ChangeLog-2002
1.2M ChangeLog-2001
1.2M ChangeLog-2000
1.1M c-family
1.1M c
1.1M ChangeLog-2008
876K ChangeLog-2006
872K objc
836K configure
784K ChangeLog-1999
744K ChangeLog.tree-ssa
716K dwarf2out.c
640K ChangeLog-1998
552K FSFChangeLog.11
536K fold-const.c
## Entry points
Under `gcc/`:
- `gcc-main.c`: `gcc`. Immediately forwards to `gcc.c driver::main`.
- `main.c`: `cc1`. Immediately forwards to `toplev.c toplev::main`.
- `collect2.c`
## Files
### collect2
TODO vs `ld`?
<https://gcc.gnu.org/onlinedocs/gcc-4.3.5/gccint/Collect2.html>
### cp
C++ specifics.
### lto
Link Time Optimization.
## C family
## c-family
C-family includes: C, C++, Objective C.
## gcc/doc/gccint.texi
Internals documentation, yay! <https://gcc.gnu.org/onlinedocs/gccint/index.html>
## Parser location
<http://stackoverflow.com/questions/2721071/c-grammar-in-gcc-source-code>
In the past long ago C used an explicit grammar and Bison.
4.8 just parses manually with:
gcc/c/c-parser.c
and neighbours. A 10k LOC files.
## Built files
## Generated build files
### gcc
### xgcc
By default, `xgcc` is generated, and installed as `gcc`.
The `x` stands for Cross compiler.
### gcc-ar
### gcc-nm
### gcc-ranlib
Wrappers for LTO support.
<https://gcc.gnu.org/ml/gcc-help/2012-03/msg00100.html>
### libstdc++ location
Where did it go? Installation did install it however.
## libstdc++
TODO is this the most popular C++ stdlib implementation?
Unlike `glibc`, it has no per-architecture optimizations. Likely glibc provides the hardcore layer, and C++ the boilerplate layer.
## Register allocation
One of the hardest compiler problems, as it comes down to an NP complete graph coloring optimization.
Used to be done by the reload system, which was present from day 1, <https://gcc.gnu.org/wiki/reload>, but got replaced in 2012 by LRA by Red Hat: <ftp://gcc.gnu.org/pub/gcc/summit/2003/Graph%20Coloring%20Register%20Allocation.pdf> because reload was very unmaintainable.
On 5.1, `lra.c` is the main entry point, and has a nice block diagram documentation. The graph coloring algorithm is Chaitin-Briggs <http://stackoverflow.com/questions/14399608/chaitin-briggs-algorithm-explanation>

14
gcc/spec-files.md Normal file
View File

@ -0,0 +1,14 @@
# Spec files TODO
Determine exactly how GCC calls other programs.
- <http://stackoverflow.com/questions/7493620/inhibit-default-library-paths-with-gcc>
- <http://unix.stackexchange.com/questions/77779/relationship-between-cc1-and-gcc>
View the current spec file:
gcc -dumpspecs
Use given spec file:
gcc -specs=<specs-file>

View File

@ -87,6 +87,7 @@ The following are useful:
- `disassemble`
- `stepi`
- `nexti`
- `layout asm`
- <http://stackoverflow.com/questions/1902901/show-current-instruction-in-gdb>
- <http://stackoverflow.com/questions/589653/switching-to-assembly-in-gdb>
@ -446,6 +447,12 @@ Once you change the frame, it is as if you have moved to that point:
and so on.
#### down
#### up
Move a stack frame up or down and print it.
#### i
#### info

View File

@ -1,31 +1,4 @@
# glibc
1. [Introduction](introduction.md)
1. [Source tree](source-tree.md)
## Introduction
glibc is GNU's implementation of:
- libc. By far the most popular implementation on Linux.
- POSIX C API
- glibc extensions such as interfaces to system calls
Project home: <http://www.gnu.org/software/libc/>
Documentation: <http://www.gnu.org/software/libc/manual/html_mono/libc.html>
This shall only document GNU extensions to libc in its implementation called glibc.
Extensions which are very thin interfaces to system calls will be documented together with system calls and not here.
POSIX C API shall not be discussed, only points which are specific to glibc shall be discussed here.
glibc does not necessarily come with GCC, the compiler: you could in theory choose between different implementations.
In Ubuntu 14.04:
- headers for glibc are on `/usr/include`, e.g. `/usr/include/stdio.h`. Try: `locate /stdio.h`
- lib for glibc are on `/usr/lib/i386-linux-gnu`. Try: `locate libc.so`. I have two `libc.so`:
- `/lib/x86_64-linux-gnu/libc.so.6` from `libc6` package, which is used by other packages. The packages does not contain headers.
- `/usr/lib/x86_64-linux-gnu/libc.so` from the `libc6-dev` package, which is used by things you compile yourself. The package also contains headers.
- the Ubuntu package that contains it is called `libc6-dev`. See it with: `apt-file search `

51
glibc/introduction.md Normal file
View File

@ -0,0 +1,51 @@
# Introduction
glibc is GNU's implementation of:
- libc. By far the most popular implementation on Linux.
- POSIX C API
- glibc extensions such as interfaces to system calls
Project home: <http://www.gnu.org/software/libc/>
Documentation: <http://www.gnu.org/software/libc/manual/html_mono/libc.html>
This shall only document GNU extensions to libc in its implementation called glibc.
Extensions which are very thin interfaces to system calls will be documented together with system calls and not here.
POSIX C API shall not be discussed, only points which are specific to glibc shall be discussed here.
glibc does not necessarily come with GCC, the compiler: you could in theory choose between different implementations.
In Ubuntu 14.04:
- headers for glibc are on `/usr/include`, e.g. `/usr/include/stdio.h`. Try: `locate /stdio.h`
- lib for glibc are on `/usr/lib/i386-linux-gnu`. Try: `locate libc.so`. I have two `libc.so`:
- `/lib/x86_64-linux-gnu/libc.so.6` from `libc6` package, which is used by other packages. The packages does not contain headers.
- `/usr/lib/x86_64-linux-gnu/libc.so` from the `libc6-dev` package, which is used by things you compile yourself. The package also contains headers.
- the Ubuntu package that contains it is called `libc6-dev`. See it with: `apt-file search `
## Find glibc version
`libc.so.6` is executable!
/lib/x86_64-linux-gnu/libc.so.6
Sample output:
GNU C Library (Ubuntu EGLIBC 2.19-0ubuntu6.6) stable release version 2.19, by Roland McGrath et al.
Copyright (C) 2014 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.
There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.
Compiled by GNU CC version 4.8.2.
Compiled on a Linux 3.13.11 system on 2015-02-25.
Available extensions:
crypt add-on version 2.1 by Michael Glad and others
GNU Libidn by Simon Josefsson
Native POSIX Threads Library by Ulrich Drepper et al
BIND-8.2.3-T5B
libc ABIs: UNIQUE IFUNC
For bug reporting instructions, please see:
<https://bugs.launchpad.net/ubuntu/+source/eglibc/+bugs>.

View File

@ -8,7 +8,13 @@ Most of those compilers work for multiple related languages such as C, C++, etc.
### clang
LLVM.
LLVM based.
Made by Apple in 2007 when GCC did not meet it's technical and licensing requirements, later merged into LLVM. It then had contributions by Google, Apple, Intel, etc.
FreeBSD moved to it in 2012: <http://unix.stackexchange.com/questions/49906/why-is-freebsd-deprecating-gcc-in-favor-of-clang-llvm>
Sony PS4 (2013 Q4, FreeBSD based) moved to it while PS3 used GCC.
### icc
@ -28,6 +34,14 @@ INRIA formally verified compiler to a very large subset of C99.
Written and verified in Coq.
### Portable C Compiler
<https://en.wikipedia.org/wiki/Portable_C_Compiler>
Very small.
Popular in the 1980s, until GCC killed it.
## libc
- glibc: GNU implementation, major on Linux

View File

@ -1,328 +0,0 @@
# Library
To use a compiled library you need:
- the compiled `.so` or `.a`
- for C and C++, the header `.h` file(s).
This allows your compiler to check at compile time that your are calling functions correctly (supposing of course that the `.h` files you include correspond to the actual library files)
This is not needed in languages such as Fortran. Down point for Fortran.
Either those must be in you compiler find path (different for headers and compiled files) or you must explicitly add them to the path.
## Dynamic vs static
Dynamic libraries are compiled libraries kept outside of the executable and are used at run time.
They have `.so` extension on Linux and `.dll` on windows
Dynamic libraries are different from static libraries (`.o` and `.a`) static libraries are put inside the executable at compile time.
Advantages and disadvantages of dynamic libraries over static libraries are the usual trade-offs of share vs embed design choices.
Advantages:
- memory saving by keeping only a single version of the library
Static libraries mean one version per executable
This makes it absolutely essential to use dynamic libraries for very large libraries.
- if the library inner working get updated to be (faster, use less memory),
But not the interface ( inputs, outputs, side effects)
There is no need to recompile the executable to get the updates.
Disadvantages:
- more complicated to use
- usage is OS dependant
- slight load overhead
Since the disadvantages are so minor, it is almost always better to use dynamic linking.
### Dynamic linking vs Dynamic loading
<http://www.ibm.com/developerworks/library/l-dynamic-libraries/>
## ld.so
man ld.so
TODO.
## Search path
Find where GCC search path for both `.a` and `.so`:
gcc -print-search-dirs | grep '^libraries' | tr ':' $'\n'
### -L option
Append to search path of executable:
gcc a.c -o a.out -L/full/path/to/ -lm
gcc a.c -o a.out -L./rel/path/to/ -lm
### LIBRARY_PATH
Colon separated list of paths that does the same as `-L`.
## Static
Gets included inside the generated executable, making it larger.
You don't have to worry about dependencies.
gcc -c a.c
gcc -c b.c
ar rcs a.a a.o b.o
gcc a.a c.c
## Dynamic
### loading vs linking
There are two methods of using dynamic libraries in Linux: linking and loading.
#### linking
Link to lib for entire program.
Simpler.
#### loading
Explicitly load needed functions during program execution.
### create so
*Must* compile like this:
gcc -c -fPIC a.c
gcc -c -fPIC b.c
gcc -shared a.o b.o -o libab.so
using `-fPIC` and `-shared`.
### Version numbering
Standard: up to 3 numbers.
Yes, they come after the `.so` otherwise there would be ambiguity: `liba.1.so` is version 1 of `liba` or simply a library called `lib.a.1`?
To link to a given version use full basename linking with version number.
Linking takes care of version defaults:
- `liba.so.1.1.1`
Necessarily itself.
- `liba.so.1.1`
- Itself
- or a link to `1.1.1`
- or a link to `1.1.2`
- or a link to ...
- `liba.so.1`
- Itself,
- or a link to `1.1`
- or a link to `1.2`
- or a link to `1.1.2`
- or a link to `1.2.1`
- or a link to ...
- `liba.so`
- Itself,
- or a link to `1`
- or a link to `2`
- or a link to `1.1`
- or a link to `1.2`
- or a link to ...
Rationale: if you underspecify the library you get by default the most recent.
Convention: change in first number means possible interface break.
TODO confirm: GCC does not resolve the conventional versioning names automatically: for library `a` to be found, there must be a file named exactly `liba.so` in the path. `liba.so.1` and others will not work.
### Compile executable that depends on an so
You must tell `gcc` which libs to use with the `-l` flag.
The linker will check that the library is there and that it contains the necessary definitions.
Also, the path information will be kept inside the executable.
How this information is represented is a part of the `.elf` format definition.
*Remember*: when the program will run, it must be able to find that `.so` again on the load path!
#### What can be passed to -l
The name given to `-l` must be either:
- stripped from `lib` and `.so` part
Ex: `m`, for `libm.so`. *will not work for `libm.so.1` !!!!!
- colon + `:`full basename. Ex: `-l:libm.so.1`
You need to compile like this so GCC can tell if all your functions are defined.
#### Relative vs absolute
The path to the so gets stored inside the elf so that it can be found when the program will load.
Link to library libm.so:
gcc a.c -o a.out -lm
gcc a.c -o a.out -l:libm.so
Relative paths to the load path get stored in the elf file.
`readelf -d` shows that:
readelf -d a.out
Store the full path in the elf file:
gcc a.c -o a.out -l:/full/path/to/libm.so
readelf -d a.out
It must be in the load path.
#### Append path to so search path
##### LD_LIBRARY_PATH
env LIBRARY_PATH=$LIBRARY_PATH:/path/to/ gcc a.c -o a.out -llib
`LIBRARY_PATH` is different from `LD_LIBRARY_PATH`! `LIBRARY_PATH` is only used at compile time while `LD_LIBRARY_PATH` is only used at load time.
### Use so at runtime
After an executable has been compiled to use an so, the so must be found at runtime.
This is done by a program called the interpreter.
The interpreter will use the library path stored inside the elf file that is being executed and will also search inside a search path called load path.
There is no need to use the load path if an absolute path was stored in the executable, but this is not recommended since it would not be portable.
#### Best production method
sudo mv liba.so /some/where/in/link/path
sudo ldconfig
./a.elf
This supposes that when you compiled you used: `-lliba.so`.
#### Environment variable
Good:
env LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/absolute/path/to/lib ./a.out
./a.elf
Bad:
env LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./rel/path/to/lib/from/cd ./a.out
./a.out
This only works if you are in the right dir since relative path is take to current dir.
`LD_LIBRARY_PATH` has nothing to do with `LIBRARY_PATH` path variable which is used during compilation by gcc! `LD_LIBRARY_PATH` is used during execution by the linker!
#### Load path
View library load path:
cat /etc/ld.so.conf
Remember: after modifying this file, you must update the load file cache or your changes will not be taken into effect.
May also include other files by adding a line to that file:
include /etc/ld.so.conf.d/*.conf
This is done by default on Ubuntu.
To take includes into consideration and print the actual search path, use `ldconfig`.
So you also need to look inside included files for the libraries:
cat /etc/ld.so.conf.d/*.conf
The following paths are hard codded in `ldconfig`:
- `/lib/`
- `/usr/lib/`
#### View load path
Print actual search path after resolving directives like `include`:
ldconfig -v 2>/dev/null | grep -v $'^\t'
Show directories that are scanned and libraries that are found in each dir:
ldconfig -v
Print cache stored in `/etc/ld.so.cache` and `.d` includes. does not show in which directory libraries are stored in, only where they link to:
ldconfig -p
##### hwcap
When using commands like `ldconfig -v`, you may see outputs like:
/usr/lib/i386-linux-gnu/sse2: (hwcap: 0x0000000004000000)
`hwcap` stands for `hardware capacities`
If present, means that those libraries can only be used if you hardware has the given capacities.
Here for example, as shown in the directory name, this path is for libraries which depend on the sse2 extensions (a set of cpu instructions, not present in older CPUs).
What the flags mean is defined by x86 and somewhat standardized across vendors:
<http://en.wikipedia.org/wiki/CPUID#EAX.3D1:_Processor_Info_and_Feature_Bits>
TODO where `ldconfig` finds this info:
#### Cache
It would be very slow to search the path every time.
Therefore the linker keeps uses a cache at:
cat /etc/ld.so.cache
It first looks for libs there, and only then searches the path.
You can generate `/etc/ld.so.cache` automatically once you have your `ld.so.conf` with `ldconfig`.
Even if the linker finds the lib in the path, it does not automatically add it to the cache so you still have to run `ldconfig`.
Running `ldconfig` is a part of every package install/uninstall if it contains a library.
### Override symbols in libraries
Symbols in `a.o` will override symbols in linked libs.
echo "/path/to/my/a.o" | sudo tee -a /etc/ld.so.preload
Useful mainly for emergency or tests.
Can also be achieved via:
export LD_PRELOAD=

View File

@ -1442,6 +1442,8 @@ int main(int argc, char** argv) {
/*
# access
http://pubs.opengroup.org/onlinepubs/9699919799/functions/access.html
Check if file or directory exists and or has a given permission (rwx):
- `R_OK`
@ -1450,6 +1452,8 @@ int main(int argc, char** argv) {
- `F_OK`: file exists
If the access is not permitted, errno is still set even if this call did not give an error.
TODO vs stat?
*/
{
char *exist = realpath(".", NULL);

View File

@ -1,4 +1,4 @@
CC ?= gcc -pedantic-errors -std=c89 -Wall
CCC ?= gcc -pedantic-errors -std=c89 -Wall
IN_EXT ?= .c
OUT_EXT ?= .out
RUN ?= main
@ -12,10 +12,10 @@ RUN_BASENAME := $(RUN)$(OUT_EXT)
.PHONY: clean run
$(RUN_BASENAME): $(OUTS)
$(CC) $+ -o '$@'
$(CCC) $+ -o '$@'
%$(TMP_EXT): %$(IN_EXT)
$(CC) -c '$<' -o '$@'
$(CCC) -c '$<' -o '$@'
clean:
rm -f *'$(TMP_EXT)' '$(RUN_BASENAME)'

View File

@ -1,5 +1,3 @@
# static
Multi file semantics of `static`.
Also contains `extern`. TODO split.
Multi file examples of `static`.

View File

@ -1,39 +1,19 @@
#include <stdio.h>
#include "a.h"
/* ERROR already defined in main. */
/* Link time error: already defined in main. */
/*int i = 0;*/
/* OK: only declared. */
/* OK: only declared, not defined. Will use the one in main. */
int i;
/* OK: only visible to this file. */
static int staticInt = 0;
/* Define externInt. Will store this initial value on the executable. */
int externInt = 0;
/* WARN: extern initialized. */
/* It does not make much sense to add extern to a definition: only to a declaration. */
/*extern int externIntInt = 1;*/
/* ERROR redefinition: */
/*void func(){ puts("mainFunc"); }*/
static void staticFunc() {
printf("a#staticFunc:\n");
printf(" staticInt = %d\n", staticInt);
printf(" aHStaticInt = %d\n", aHStaticInt);
printf(" externInt = %d\n", externInt);
puts("");
}
static int si = 0;
void a() {
staticFunc();
staticInt++;
aHStaticInt++;
externInt++;
i++;
si++;
puts("a()");
printf("i = %d\n", i);
printf("si = %d\n", si);
puts("");
}
struct s {};

View File

@ -1,26 +1,23 @@
#include <stdio.h>
#include "a.h"
void a();
int i = 0;
static int staticInt = 0;
static int si = 0;
static void staticFunc() {
printf( "main#staticFunc:\n" );
printf( " staticInt = %d\n", staticInt );
printf( " aHStaticInt = %d\n", aHStaticInt );
printf( " externInt = %d\n", externInt );
void m() {
i++;
si++;
puts("m()");
printf("i = %d\n", i);
printf("si = %d\n", si);
puts("");
}
struct s {
int i;
};
int main(int argc, char** argv) {
staticFunc();
int main() {
m();
m();
a();
staticFunc();
a();
return 0;
}

View File

@ -6,6 +6,10 @@ Anything can happen while still complying to the standard: the program continues
Bad stuff.
Examples:
- modify `const` variable with pointer cast that removes `const`
## Implementation-defined behaviour
Each implementation must document what it does but setting a fixed parameter.