// PAGE 1 #include #include #include #include void child(int p2c_read, int c2p_write); void parent(int c2p_read, int p2c_write); // Scenario: One process (the parent) sends writes text to another (the child). // The child echoes this text back to the parent. // The parent compares its output stream to the reply stream. // To set this up, we need two pipes, one for p2c messages and one for c2p. // // +-----------+ +-----------+ // | p2c_write|=========================|p2c_read | // | | >> text to child >> | | // | Parent | | Child | // | | << echoed replies << | | // | c2p_read|=========================|c2p_write | // +-----------+ +-----------+ ssize_t safe_read(int fd, void *data, size_t size); int main(int argc, char *argv[]) { int p2c_pipe[2], c2p_pipe[2]; pid_t p; pipe(p2c_pipe); pipe(c2p_pipe); if ((p = fork()) == 0) { // close write end of p2c_pipe, and read end of c2p_pipe close(p2c_pipe[1]); close(c2p_pipe[0]); child(p2c_pipe[0], c2p_pipe[1]); } else if (p > 0) { // close write end of c2p_pipe, and read end of p2c_pipe close(c2p_pipe[1]); close(p2c_pipe[0]); parent(c2p_pipe[0], p2c_pipe[1]); } else abort(); } #define MESSAGESIZ 4 void parent(int c2p_read, int p2c_write) { char message_out[MESSAGESIZ]; char message_in[MESSAGESIZ]; ssize_t r; memset(message_out, ' ', MESSAGESIZ); message_out[0] = '!'; while (1) { // Write message to child r = write(p2c_write, message_out, MESSAGESIZ); assert(r == MESSAGESIZ); // Read reply r = safe_read(c2p_read, message_in, MESSAGESIZ); // If reply doesn't equal original message, complain if (r != MESSAGESIZ || memcmp(message_out, message_in, MESSAGESIZ) != 0) abort(); else printf("Message OK\n"); } } // The child just echoes every character it reads. void child(int p2c_read, int c2p_write) { char c; ssize_t r; FILE *read_f = fdopen(p2c_read, "r"); FILE *write_f = fdopen(c2p_write, "w"); // Note: don't handle all error conditions! while ((r = read(p2c_read, &c, 1)) == 1) write(c2p_write, &c, 1); } // The normal read() function can return less data than requested // because the system call was interrupted. // (For example, maybe the reading process received a signal.) // This function calls read() repeatedly until all the requested data is read. ssize_t safe_read(int fd, void *data, size_t size) { ssize_t n = 0, r; while (n < size) { r = read(fd, (char *) data + n, size - n); if (r > 0) n += r; else if (r == 0 || (r < 0 && errno != EINTR)) break; } return n; } /* * When MESSAGESIZ is 4, this program prints a pleasant stream of "Message OK" * messages. When MESSAGESIZ is 16385, it prints a slower, but still * pleasant, stream of "Message OK" messages. But when MESSAGESIZ is 16386, * the program deadlocks! Both processes block forever. What's going on? * * This is a deadlock on _buffer space_. The kernel allocates some buffer * space for each pipe file descriptor. When a process calls write() on a * pipe, the kernel copies the data into the buffer; when a process calls * read() to get data out of the pipe, the kernel copies the data out of the * buffer, which frees up the buffer for more written data. A common buffer * length is 8192 bytes (8 KB). * * Why do this? Like any buffering strategy, this improves _throughput_. A * process can write a lot of data into the buffer before filling it. This * means that the writing process can continue to run for a while before the * kernel must context-switch to the reading process to drain the buffer. A * smaller buffer would mean more frequent context switches, and therefore * lower throughput (because of time wasted in the context switches). * * How much buffering does the system provide to absorb the parent's message? * There's one 8192-byte buffer for parent-to-child data, another 8192-byte * buffer for child-to-parent data, and one more byte of buffering _within the * child application_: the "c" variable in the child() function above. * * +-----------+ 8192 BYTES +-----------+ * | p2c_write|=========================|p2c_read...| * | | >> text to child >> | |\ * | Parent | KERNEL BUFFERS | Child | = 1 BYTE ("c") * | | << echoed replies << | |/ * | c2p_read|=========================|c2p_write..| * +-----------+ 8192 BYTES +-----------+ * * That's a total of 16385 bytes of buffering. * * So if the parent tries to write a message that's longer than 16385 bytes, * the system will deadlock!! The parent will fill all buffer space, then * wait forever for more. The parent "locks" the C2P buffer (by refusing to * read from it) and blocks while trying to "acquire" the P2C buffer. The * child "locks" the P2C buffer and blocks while trying to "acquire" the C2P * buffer! A classical circular-wait. * * In a bit more detail: * * Buffer Sizes * Action P2C C2P * Initial state 0 0 * Parent: write(16386 bytes) * -- parent blocks ON WRITE because buffer full, after * writing 8192 bytes! 8192 0 * Child: read(1 byte) 8191 0 * Child: write(1 byte) 8191 1 * ... and so forth ... * Child: read(1 byte) 0 8191 * Child: write(1 byte) 0 8192 * Child: read(1 byte) 0 8192 * -- child blocks on read! * Parent: write(16386 bytes) * unblocks with 8194 bytes left * -- parent blocks ON WRITE because buffer full, after * writing 8192 bytes! 8192 8192 * Child: read(1 byte) unblocks 8191 8192 * Child: write(1 byte) 8191 8192 * -- child blocks ON WRITE because buffer full! * Parent: write(16386 bytes) * unblocks with 2 bytes left * -- parent blocks ON WRITE because buffer full, after * writing 1 byte! 8192 8192 * ****** DEADLOCK ****** * * How do we fix this problem?? Well, we can limit the maximum message size; * but in more complicated situations -- where the child might send _longer_ * messages than the parent, for example -- this breaks down. The one way to * solve it for good is to use _non-blocking I/O_. In terms of the four * necessary conditions for deadlock -- that is, * * 1. Mutual Exclusion (here, only one process reads from each pipe) * 2. Hold-and-Wait (process blocks on write) * 3. No Preemption (no one can unblock the process) * 4. Circular Wait (C waits for P waits for C) * * -- non-blocking I/O removes the second condition. * * The problem here is that both processes can block forever on a write, which * causes them to stop _reading_. At least one of the processes needs to * drain its read buffer _whether or not its write buffer is full_! * * Remember that a non-blocking I/O operation _never blocks_. Instead, it * returns an error (EAGAIN) indicating that it would have blocked. This lets * us do exactly what we want! If a write buffer fills up, we won't block; * instead, we'll carry on with a different task -- namely, reading from the * read buffer. * * The next example shows how it works. The parent has been changed to use * non-blocking I/O. When it can read from the read buffer, it does so; and * when it can write to the write buffer, it does so. Notice that the control * flow has gotten more confusing. (Question: When can we check the input * message against the output message?) This is a typical, and unfortunate, * consequence of using non-blocking I/O (or "event-driven programming"). * Libraries and programming language support can make this a lot easier. */ // PAGE 2 // Avoid deadlock by making parent asynchronous #include #include #include #include #include void child(int p2c_read, int c2p_write); void parent(int c2p_read, int p2c_write); int main(int argc, char *argv[]) { int p2c_pipe[2], c2p_pipe[2]; pid_t p; pipe(p2c_pipe); pipe(c2p_pipe); if ((p = fork()) == 0) { // close write end of p2c_pipe, and read end of c2p_pipe close(p2c_pipe[1]); close(c2p_pipe[0]); child(p2c_pipe[0], c2p_pipe[1]); } else if (p > 0) { // close write end of c2p_pipe, and read end of p2c_pipe close(c2p_pipe[1]); close(p2c_pipe[0]); parent(c2p_pipe[0], p2c_pipe[1]); } else abort(); } #define MESSAGESIZ 16386 void parent(int c2p_read, int p2c_write) { char message_out[MESSAGESIZ]; char message_in[MESSAGESIZ]; ssize_t r; fd_set read_fds, write_fds; ssize_t read_pos, write_pos; memset(message_out, ' ', MESSAGESIZ); message_out[0] = '!'; // make file descriptors nonblocking fcntl(c2p_read, F_SETFL, O_NONBLOCK); fcntl(p2c_write, F_SETFL, O_NONBLOCK); read_pos = write_pos = 0; while (1) { // wait until something happens FD_ZERO(&read_fds); FD_SET(c2p_read, &read_fds); FD_ZERO(&write_fds); if (write_pos < MESSAGESIZ) FD_SET(p2c_write, &write_fds); select(FD_SETSIZE, &read_fds, &write_fds, NULL, NULL); // Are we partially through writing a message? if (FD_ISSET(p2c_write, &write_fds)) { r = write(p2c_write, message_out + write_pos, MESSAGESIZ - write_pos); if (r >= 0) write_pos += r; else if (errno != EAGAIN && errno != EINTR) abort(); } // Are we ready to read part of an echo response? if (FD_ISSET(c2p_read, &read_fds)) { r = read(c2p_read, message_in + read_pos, MESSAGESIZ - read_pos); if (r >= 0) read_pos += r; else if (errno != EAGAIN && errno != EINTR) abort(); } // Check if we have completed reading a response if (read_pos == MESSAGESIZ) { if (memcmp(message_out, message_in, MESSAGESIZ) != 0) abort(); else { printf("Message OK\n"); // Move on to next message read_pos = write_pos = 0; } } } } void child(int p2c_read, int c2p_write) { char c; ssize_t r; // Note: don't handle all error conditions! while ((r = read(p2c_read, &c, 1)) == 1) write(c2p_write, &c, 1); }