Ciro Santilli 5e2bd82d1c bak
2017-04-09 10:57:31 +01:00

1032 lines
28 KiB
C

/*
# stdio.h
stream Input and Output
# stream
An stream is an abstraction over different input/output methods
such as regular files, stdin/stdout/stderr (pipes in linux), etc.
so that all of them can be treated on an uniform basis once you opened the stream.
Most functions have a form which outputs only to stdout,
and most input functions have a form which reads only from sdtin
coupled with a general form that outputs to any stream.
Unfortunatelly, sometimes there are subtle differences between those two
forms, so beawere!
# FILE
FILE is a macro that represents a stream object.
Its name is FILE of course because files are one of the main types of streams.
However, streams can represent other resources in the filesystem in general
such as Linux FIFOs or sockets.
# stream vs file descriptors
A file descriptor is a POSIX concept and thus shall not be discussed here.
*/
#include "common.h"
#define TMPFILE(x) __FILE__ "__" x ".tmp"
/*
Standard action to take in case of an IO error.
*/
void io_error(char *function, char *path){
fprintf(stderr, "eror: %s errno = %d, path = %s\n", function, errno, path);
exit(EXIT_FAILURE);
}
/*
Returns the size of the given open `FILE*`.
If an error occurs, returns `-1L`.
Does not work for pipes.
*/
long fget_file_size(FILE *fp) {
long oldpos;
long return_value;
oldpos = ftell(fp);
if (oldpos == -1L) {
return -1L;
}
if (fseek(fp, 0, SEEK_END) != 0) {
return -1L;
}
return_value = ftell(fp);
if (return_value == -1L) {
return -1L;
}
/* retore old position */
if (fseek(fp, oldpos , SEEK_SET) != 0) {
return -1L;
}
return return_value;
}
/*
Same as `file_size`, but takes the path instead of a `FILE*`.
*/
long file_size(char *path) {
FILE *fp;
long return_value;
fp = fopen(path, "r");
if (fp == NULL) {
return -1L;
}
return_value = fget_file_size(fp);
if (fclose(fp) == EOF) {
return -1L;
}
return return_value;
}
/*
Read the entire file to a char[] dynamically allocated inside this function.
Returns a pointer to the start of that array.
In case of any error, returns NULL.
The entire file must fit into the memory avilable to the program.
http://stackoverflow.com/questions/174531/easiest-way-to-get-files-contents-in-c
*/
char *file_read(char *path) {
FILE *fp;
char *buffer;
long fsize;
fp = fopen(path , "rb");
if (fp==NULL) {
return NULL;
}
fsize = fget_file_size(fp);
if (fsize < 0){
fprintf(stderr, "could not determine lenght of:\n%s\n", path);
return NULL;
}
buffer = (char*)malloc(fsize);
if (buffer == NULL) {
return NULL;
}
if (fread(buffer, 1, fsize, fp) != fsize) {
return NULL;
}
if (fclose(fp) == EOF){
return NULL;
}
return buffer;
}
/*
Write null terminated string to file
Returns `-1` on failulre, 1 on success.
*/
int file_write(char *path, char *write_string) {
long len;
char *buffer;
FILE *fp;
fp = fopen(path, "wb");
if (fp == NULL) {
return -1;
}
len = strlen(write_string);
/* copy the file into the buffer: */
if (fwrite(write_string, 1, len, fp) != len) {
return -1;
}
if (fclose(fp) == EOF) {
return -1;
}
return 0;
}
/*
writes an array of ints to a file
ints are space separated, with a trailling space
on errror, returns, -1, succes 0
*/
int write_int_arr_file(char * path, int *arr, int len) {
int i;
FILE * fp;
fp = fopen(path,"w");
if (fp == NULL) {
return -1;
}
for(i=0; i<len; i++){
if (fprintf(fp,"%d ", arr[i]) < 0){
return -1;
}
}
if (EOF == fclose (fp)){
return -1;
}
return 0;
}
/* Same as int, saved in exp notation, */
/* with precision (deciamal places) precision */
int write_float_arr_file(char * path, float *arr, int len, int precision) {
int i;
FILE * fp;
fp = fopen(path,"w");
if (fp == NULL){
return -1;
}
for(i=0; i<len; i++){
/*if (fprintf(fp,format, arr[i]) < 0){*/
if (fprintf(fp,"%.*e", precision, arr[i]) < 0){
return -1;
}
}
if (EOF == fclose (fp)){
return -1;
}
return 0;
}
int main() {
/*
# BUFSIZ
TODO
*/
{
printf("BUFSIZ = %ju\n", (uintmax_t)BUFSIZ);
assert(BUFSIZ >= 256);
}
/*
# EOF
EOF is a C concept.
EOF works because there are only 256 bytes you can get from an fd
so EOF is just some int outside of the possible 0-255 range, tipically -1
In Linux for example, EOF does not exist.
The only way to know if a file is over is to make a `sys_read` call
and check if you get 0 bytes.
Since `sys_read` returns the number of bytes read, if we get less than we asked for
this means that the file is over.
In case more data could become available in the future, for example on a pipe,
`sys_read` does not return immediately, and the reader sleeps until that data becomes available.
*/
/*
# stderr
The `stderr` macro is a `FILE*` that represents the standard error.
Is is always open when the program starts.
The output to stderr may not be synchronized with that of stdout,
so this message could appear anywhere relative to other things that were
printed to stdout.
*/
{
fputs("stderr\n", stderr);
}
/*
# stdout
Sames as stderr but for stdout.
Less useful than `stderr` since most IO functions have a convenience form that writes to stdout.
*/
{
fputs("stdout\n", stdout);
}
/*
# stdin
be careful!! stdin won't return EOF automatically
For a tty you can tell the user to input a EOF (ctrl d in linux, ctrl z in windows)
but as you see this is system dependent. for pipes I am yet to find how to do this,
might be automatic when process closes only.
The best way to know that a stdin ended is recognizing some specific
pattern of the input, such as a newline with fgets, or the end of a
number with scanf
Before this comes, the program just stops waiting for the stdin to
produce this, either from user keyboard input, or from the program
behind the pipe.
*/
/* # Stream output */
{
/*
# putchar
Write single char to stdout.
Basically useless subset of putc which writes to any stream,
and very slow since it may mean several stream IO operations.
*/
{
putchar('p');
putchar('u');
putchar('t');
putchar('c');
putchar('h');
putchar('a');
putchar('r');
putchar('\n');
}
/*
# putc
putchar to any stream.
Why is it not called fputc?
*/
{
putc('p', stdout);
putc('u', stdout);
putc('t', stdout);
putc('c', stdout);
putc('\n', stdout);
}
/*
# puts
Write to stdout.
Newline appended at end.
*/
{
puts("puts");
}
/*
# fputs
Write to any stream.
Unlike puts, *no* newline is automatically appended at end!
Very confusing.
*/
{
fputs("fputs\n", stdout);
}
/*
# sprintf
Same as printf, but stores result in a given string.
Make sure that the string is large enough to contain the output.
If this is a hard and important task, consider `snprintf` + malloc.
*/
{
char cs[] = "123";
char cs2[sizeof(cs)];
sprintf(cs2, "%s", cs);
assert(strcmp(cs, cs2) == 0);
}
#if __STDC_VERSION__ >= 199901L
/*
# snprintf
Like `sprintf`, but writes at most n bytes, so it is safer,
because it may not be possible or easy to calculate the resulting
size of a formated string.
The size given *includes* the null terminator.
*/
{
char cs[] = "123";
char cs2[3];
snprintf(cs2, 3, "%s", cs);
assert(strcmp(cs2, "12") == 0);
}
#endif
/*
# fprintf
Same as printf, but to an arbitrary stream
*/
{
assert(fprintf(stdout, "fprintf = %d\n", 1) == 12);
}
/*
Large strings to stdout
stdout it line buffered
if you fill up the buffer without any newlines it will just print
buffer size cannot be accessed programatically
TODO what is the bin buffer size? in practice, 1024 works just fine
it may be much larger than BUFSIZ.
*/
{
enum N { N = 100000 };
char buf[N];
memset(buf, 'z', N);
buf[N] = '\0';
buf[N/2] = '\n';
/* large amount of 'z's verywhere! */
/*printf("%s\n", buf);*/
}
}
/* # stream input */
{
/*
# getchar
getchar == getc(stdin)
# getc
get single char from given stream (should be called fgetc...)
it blocks until any char made available.
whatever char entered including on a tty is made available immediatelly.
*/
if (0) {
/*
echo a | c.out
a
sleep 3 | c.out
EOF after 3 secs
*/
fputs("enter a char (on linux, ctrl+d EOF): ", stderr);
/* BAD does not work. */
/*fputc('a', stdin);*/
char c = getchar();
if (c != EOF) {
fprintf(stderr, "you entered:\n%c|<<<\n", c);
}
else {
fprintf(stderr, "EOF\n");
}
}
#if __STDC_VERSION__ < 201112L
/*
# gets
Deprecated, removed in C11.
Dangerous: no size checking possible
if too much input, just seg faults.
*/
if (0) {
/*printf("enter a string terminated by newline: (max %d chars, newline will be included in the string)\n", sn);*/
/*gets(s);*/
/*printf("you entered:\n%s\n\n",s);*/
}
#endif
/*
# fgets
Reads up to whichever comes first:
- a newline char
- buff_size - 1 chars have been read
- the end of file was reached
if the input comes from stdin on a tty
and if user inputs more than the buffer size, this will wait until the user enters
<enter>, and only at that point will those bytes be made available for `fgets`,
the exceding chars remaining in the buffer if you try to read again.
saves result in buff, '\0' terminated
this is the safest method io method to get a line at time,
since it allows the programmer to deal with very long lines.
the trailling newline is included in the input.
*/
if (0) {
FILE* fp = stdin;
const int buff_size = 4;
char buff[buff_size];
fprintf(stderr, "enter a string and press enter (max %d bytes):\n", buff_size - 1);
if (fgets(buff, buff_size, fp) == NULL) {
if (feof(fp)) { fprintf(stderr, "fgets was already at the end of the stream and read nothing");
} else if (ferror(fp)) {
fprintf(stderr, "fgets error reading from stream");
}
}
/* Some bytes are left in the buffer, may want to reread it. */
else if (!feof(fp)) {
/* TODO why does this not work with stdin from a tty nor pipe? */
/* Why is EOF not reached even if user inputs 1 single char? */
/*fprintf(stderr, "you entered more than the maximum number of bytes\n");*/
}
fprintf(stderr, "you entered:\n%s", buff);
}
/*
# scanf
complicated behaviour
input is space separated regardless of scanf string
hard to do error checking
stops reading at newline
use only if error checking is not a priority
to do proper error checking, try `fgets` and the `strtol` family
*/
if (0) {
int i, j;
unsigned int ui;
float f;
printf("enter an integer in decimal and <enter> (max 32 bits signed):\n");
i = scanf("%d", &i);
printf("you entered: %d\n", i);
/* stuff is space separated */
/* try 123 456 789 at once. 456 789 stay in the buffer, and are eaten by the second scanf */
printf("enter an integer, a space, an integer and a <enter> (max 32 bits signed):\n");
i = scanf("%d %d", &i, &j);
printf("you entered: %d %d\n", i, j);
printf("enter a float and a newline:\n");
i = scanf("%f", &f);
printf("you entered: %.2f\n", f);
printf("enter an integer in hexadecimal and a <enter>: (max 32 bits signed)\n");
i = scanf("%x", &ui);
printf("you entered (in decimal): %d\n", i);
}
/*
# fscanf
complicated like scanf
*/
if (0) {
FILE* fp = stdin;
int i;
float f;
puts("enter a int a space and a float (scientific notation) and then EOF (ctrl-d in linux):");
if (fscanf(stdin, "%d %e\n", &i, &f) != 2) {
if (feof(fp)) {
fprintf(stderr, "fscanf reached the of file and read nothing\n");
} else if (ferror(fp)) {
fprintf(stderr, "fscanf error reading from stream\n");
}
}
fprintf(stderr, "you entered: %d %.2e\n", i, f);
}
}
/*
# File streams
# File IO
To get streams that deal with files, use `fopen`.
To close those streams after usage, use `fclose`.
# fopen
Open file for read/write
Don't forget to fclose after using! open streams are a process resource.
Modes:
- `r`: read. compatible with a,w
- `w`: read and write. destroy if exists, create if not.
- `a`: append. write to the end. creates if does not exist.
- `+`: can do both input and output. msut use flush or fseek
- `x`: don't destroy if exist (c11, not c++!)
- `b`: binary.
Means nothing in POSIX systems.
On our dear DOS/Windows and Mac OS X, automatically converts between \n and \n\r or \r.
http://stackoverflow.com/questions/229924/difference-between-files-writen-in-binary-and-text-mode
Windows also does trailing \z magic for ultra backwards compatibility.
Therefore for portability, always use this when you are going to do IO
with binary IO functions such as fwrite.
In case of error:
- return `NULL` and set `errno`.
# Text IO vs Binary IO
# Text vs binary for numerical types
Example: an int 123 can be written to a file in two ways:
- text: three bytes containing the ascii values of `1`, `2` and then `3`
- binary: as the internal int representation of the c value, that is 4 bytes, with `123` in binary and zeroes at the front.
Advantages of text:
- it is human readable since it contains only ASCII or UTF values
- for small values it may be more efficient (123 is 3 bytes in ascii instead of 4 in binary)
- it is portable across multiple systems, while binary varies, especially byte ordering.
Advantages of binary:
- it much shorter for large integers
- inevitable for data that cannot be interpretred as text (images, executables)
# Newline vs carriage return newline
Newline carriage return realated TODO confirm
For portability, use it consistently.
In linux the difference between text methods and binary methods is only conceptual:
some methods output human readable text (`fprintf`) and can be classified as text,
while others output binary, no difference is made at file opening time.
# fclose
Don't forget to close because:
- open `FILE*` are a program resource
- close also flushes
In case of error:
- return `EOF`
- set `errno`
*/
{
int elems_write[] = {1, 2, 3};
enum constexpr {nelems = sizeof(elems_write) / sizeof(elems_write[0])};
int elems_read[nelems];
FILE *fp;
char path[] = TMPFILE("fwrite");
/*
# fwrite
Returns number of elements written.
If less elements are written than required an error occurred.
Why take both bytes per item and items instead of just total bytes:
http://stackoverflow.com/questions/295994/what-is-the-rationale-for-fread-fwrite-taking-size-and-count-as-arguments
It seems that no less than size per item can be written, so we are guaranteed
that some object will not be half written.
The byte order is implementation defined.
This is therefore not a valid way to serialize data across machines.
*/
{
fp = fopen(path, "wb");
if (fp == NULL) {
io_error("fopen", path);
} else {
if (fwrite(elems_write, sizeof(elems_write[0]), nelems, fp) < nelems) {
io_error("fwrite", path);
}
if (fclose(fp) == EOF) {
io_error("fclose", path);
}
}
}
/*
# fread
Returns number of *elements* written, not bytes.
If less elements are returned than required then either:
- an error ocured
- the end of file was reached.
It is only possible to distinguish between those cases by using
the `feof` and `ferror` functions.
# ferror
# feof
*/
{
fp = fopen(path, "rb");
if (fp == NULL) {
io_error("fopen", path);
}
else {
if (fread(elems_read, sizeof(elems_read[0]), nelems, fp) < nelems && ferror(fp)) {
io_error("fread", path);
}
}
if (fclose(fp) == EOF) {
io_error("fclose", path);
}
}
assert(memcmp(elems_read, elems_write, nelems) == 0);
/*
# Endianess
# Byte order
The C standard does not specify how bytes are ordered in memory.
http://www.ibm.com/developerworks/aix/library/au-endianc/
*/
{
/*
# Fix endianess
You need this when you want to export data to some format.
- http://stackoverflow.com/questions/105252/how-do-i-convert-between-big-endian-and-little-endian-values-in-c
- http://stackoverflow.com/questions/13994674/how-to-write-endian-agnostic-c-c-code
- http://stackoverflow.com/questions/2182002/convert-big-endian-to-little-endian-in-c-without-using-provided-func
- http://stackoverflow.com/questions/19275955/convert-little-endian-to-big-endian
Methods:
- binary operations just work, use them
- POSIX has the htons family, but I could not find a POSIX quote
that says network order is big endian (which seems the default.)
*/
/*
Check endianess.
Works because `short int` is guaranteed to be at least of size 2.
We must work with pointers, because doing `(char)i` directly is specified ot be 1.
The compilers produces the assembly code required to do so taking endianess into consideration.
*/
{
const short int i = 1;
if ((*(char*)&i) == 0) {
printf("Endianess = big\n");
} else {
printf("Endianess = small\n");
}
}
}
}
/*
# freopen
Open a given `FILE*` again but as a different file.
*/
{
/* This will discard stdin on Linux. */
/*freopen("/dev/null", "r", stdin);*/
}
/* # Reposition read write */
{
/*
For new code, always use `fgetpos` and `fsetpos` unless you absolutely
need `SEEK_END` because ftell and fseek
must return `long` which may limit the maximum file to be read,
while `fgetpos` uses a typedef `fpos_t`
# ftell
Get current position of `FILE*`.
# fseek
Set current position in `FILE*` relative to:
- SEEK_SET: relative to beginning of file
- SEEK_CUR: relative to current position
- SEEK_END: relative to end of file
It seems that seeking after the eof is undefined behaviour in ANSI C:
http://bytes.com/topic/c/answers/219508-fseek-past-eof
This contrasts with POSIX lseek + write, in which the unwriten gap is 0.
*/
{
/*
long int curpos = ftell(pf);
if (curpos == -1L){
ERROR
}
*/
/*
FILE* fp;
if (fseek (fp, 0 , SEEK_SET) != 0) {
ERROR;
}
*/
}
/*
# rewind
Same as therefore useless.
fseek(stream, 0L, SEEK_SET)
*/
/*
Like ftell/fseek except that:
- the return is a typedef `fpos_t`, so it may represent larger files.
- there is a single possible reference position equivalent to `SEEK_SET`.
This makes sence since that argument was only useful for convenience.
Always use it instead of ftell/fseek.
# fgetpos
Get a position in stream that is later usable with a later call to `fsetpos`.
# fsetpos
Set position to a point retreived via fgetpos.
*/
{
}
}
/*
# flush(fp)
For output streams only.
Makes sure all the data is put on the stream.
May be necessary as the data may be in a buffer.
*/
{
/*
if (fflush(fp) == EOF) {
ERROR
}
*/
/* debugging application: your program segfaults
To find where, you put printf everywhere.
However nothing shows on screen.
Solution: flush immediatelly after the printf and add a newline at the end of the printed string.
This should ensure that your string gets printed.
*/
}
/* # Applications */
{
{
char path[] = TMPFILE("str_file");
char input[] = "asdf\nqwer";
/* Write entire string to file at once. */
{
if (file_write(path, input) == -1) {
io_error("file_write", path);
}
}
/* Read entire file at once to a string. */
{
char *output = file_read(path);
if (output == NULL) {
io_error("file_read", path);
}
assert(strcmp(input, output) == 0);
free(output);
}
/* Get file size: */
{
long size = file_size(path);
if (size == -1) {
io_error("file_size", path);
}
assert(size == strlen(input));
}
}
/*
# Linewise file processing
Process a file linewise.
Allows one to read files larger than RAM, suppposing that each line is smaller than RAM.
glibc and C++ stdlib offer the `getline` function which does it.
There does not seem to be such a function in C! http://stackoverflow.com/questions/3501338/c-read-file-line-by-line
*/
{
FILE* fp;
/* Maximum accepted line length is buf_size including the newline. */
enum Constexpr {buffer_size = 4};
char buffer[buffer_size];
size_t last_newline_pos, current_pos;
int linenum = 0;
long file_size;
long nbytes_read;
char path[] = TMPFILE("cat");
char file_data[] = "abc\nde\nfgh";
size_t file_data_size = strlen(file_data);
char lines[3][4] = {"abc\n", "de\n", "fgh\n"};
size_t current_line;
/* Prepare test. */
fp = fopen(path, "wb");
if (fp == NULL) {
io_error("fopen", path);
} else {
if (fwrite(file_data, 1, file_data_size, fp) < file_data_size) {
io_error("fwrite", path);
}
if (fclose(fp) == EOF) {
io_error("fclose", path);
}
}
/* The actual cat. */
/*
fp = fopen(path, "rb");
if (fp == NULL) {
io_error("fopen", path);
} else {
nbytes_read = buffer_size;
last_newline_pos = buffer_size;
current_line = 0;
while (fread(buffer, 1, nbytes_read, fp) == nbytes_read) {
while (current_pos != last_newline_pos)
if (buffer[current_pos] == '\n') {
assert(memcmp(&buffer[current_pos], lines[current_line], ));
last_newline_pos = current_pos;
cur_line++;
}
current_pos = (current_pos + 1) % buffer_size;
}
}
if (feof(fp)) {
io_error("fread", path);
}
if (fclose(fp) == EOF) {
io_error("fclose", path);
}
}
*/
}
/* Simple write arrays to file */
{
FILE* fp;
char path[256];
int arri[] = { 0, 1, -1, 12873453 };
float arrf[] = { 1.1f, 1.001f, -1.1f, 1.23456e2 };
strcpy(path, TMPFILE("arri"));
write_int_arr_file(path, arri, 4);
strcpy(path, TMPFILE("arrf"));
write_float_arr_file(path, arrf, 4, 2);
}
}
/*
# file operations
A few file operations are available in ANSI C.
They are present in <stdio.h> mainly to support file IO.
# remove
# delete file
Remove a file.
int remove(const char *filename);
ANSI C does not way what happen if it does not exist.
If the file is open, the behaviour is undefined.
# rename
Rename a file.
int rename(const char *old, const char *new);
If the new file exists, undefined behaviour.
# directory operations #path
There seems to be no directory of path operations with system independent separator,
only with POSIX or Boost.
*/
/*
# perror
Print description of errno to stderr with given prefix appended, `NULL` for no prefix.
Basic way to print error messages after error on a posix function
*/
{
errno = EDOM;
perror("perror test EDOM");
}
return EXIT_SUCCESS;
}