All of lore.kernel.org
 help / color / mirror / Atom feed
* Cleaning up elkscmd and adding help text - Questions
@ 2015-03-27  8:19 Nils Stec
  2015-03-27 13:19 ` Jody Bruchon
  2015-03-27 13:54 ` Alan Cox
  0 siblings, 2 replies; 13+ messages in thread
From: Nils Stec @ 2015-03-27  8:19 UTC (permalink / raw)
  To: ELKS

[-- Attachment #1: Type: text/plain, Size: 1122 bytes --]

Hi,

i started to write some patches, at the moment 2. Now it's the time to 
ask you guys some questions about that.

I wrote a patch for cat which shows the user some usage-information.
Jody Bruchon used write() for this purpose, other tools are using 
fprintf(stderr, ....).
I don't know which i should prefer. In my opinion i would rather use 
fprintf.
STDERR should be the output for operations, right?

I wrote a second patch, which adds some more functionality to cp.
It adds three command-line-options:
     -v    be verbose
     -i     be interactive, ask before overwriting
     -n    don't overwrite files

i want to add 3 more options:
     -u    update-only (only overwrite if source is newer)
     -b    make a backup if destination file already exists
     -f     force, if destfile cannot be opened, remove it, try again.


Is this the right way to write code for this project? It's the first 
time I'm doing this in here and i don't wan't to do things wrong...

I think these two patches are still just drafts - i send them because i 
want to show, i know theres more work to do on this two patches.

Nils

[-- Attachment #2: patch-usage-cat.diff --]
[-- Type: text/x-patch, Size: 829 bytes --]

diff --git a/elkscmd/file_utils/cat.c b/elkscmd/file_utils/cat.c
index e8b9576..d4616c7 100644
--- a/elkscmd/file_utils/cat.c
+++ b/elkscmd/file_utils/cat.c
@@ -35,6 +35,8 @@ int main(int argc, char **argv)
 		if (dumpfile(STDIN_FILENO)) goto error_read;
 	} else {
 		for (i = 1; i < argc; i++) {
+			if(!strcmp(argv[1], "-h")) goto usage;
+
 			errno = 0;
 			fd = open(argv[i], O_RDONLY);
 			if (fd == -1) {
@@ -52,4 +54,11 @@ error_read:
 		argv[0], argv[i], strerror(errno));
 	close(fd);
 	exit(1);
+
+usage:
+	write(STDERR_FILENO, "concatentate file(s) or standard input to standard output.\n", 60);
+	write(STDERR_FILENO, "usage: cat [OPTIONS] [FILE]\n", 29);
+	write(STDERR_FILENO, "options: -h  help\n", 18);
+	write(STDERR_FILENO, "when no file is given or file is '-' STDIN is used for input.\n", 62);
+	exit(0);
 }

[-- Attachment #3: patch-cp-add_options_001.diff --]
[-- Type: text/x-patch, Size: 4523 bytes --]

diff --git a/elkscmd/file_utils/cp.c b/elkscmd/file_utils/cp.c
index 6e23864..ed1e66c 100644
--- a/elkscmd/file_utils/cp.c
+++ b/elkscmd/file_utils/cp.c
@@ -22,9 +22,16 @@
 #include <errno.h>
 
 #define BUF_SIZE 4096
+#define NOT_OVERWRITING		0xABCD
 
 static char *buf;
 
+static unsigned int copy_count;
+
+/* we need some flags for command-line-options */
+static int option_no_overwrite;
+static int option_verbose;
+static int option_interactive;
 
 /*
  * Build a path name from the specified directory name and file name.
@@ -79,14 +86,26 @@ int copyfile(char *srcname, char *destname, int setmodes)
 	struct	stat	statbuf2;
 	struct	utimbuf	times;
 
-	if (stat(srcname, &statbuf1) < 0) {
+	if (stat(srcname, &statbuf1) < 0) {	// src nonexistent
 		perror(srcname);
 		return 0;
 	}
 
-	if (stat(destname, &statbuf2) < 0) {
+	if (stat(destname, &statbuf2) < 0) {	// dst nonexistent
 		statbuf2.st_ino = -1;
 		statbuf2.st_dev = -1;
+	} else {	// dst existent
+		if(option_no_overwrite) {
+			fprintf(stderr, "dest '%s' is existent, not overwriting\n", destname);
+			return NOT_OVERWRITING;
+		}
+		if(option_interactive) {
+			/* TODO fix this, getchar wants return and seems not to work properly */
+			//printf("really overwrite '%s'? (y/n) \n", destname);
+			//if(getchar() != 'y') {
+			//	return NOT_OVERWRITING;
+			//}
+		}
 	}
 
 	if ((statbuf1.st_dev == statbuf2.st_dev) &&
@@ -143,6 +162,12 @@ int copyfile(char *srcname, char *destname, int setmodes)
 		(void) utime(destname, &times);
 	}
 
+	if(option_verbose) {
+		printf("copied '%s' -> '%s'\n", srcname, destname);
+	}
+
+	copy_count++;
+
 	return 1;
 
 
@@ -157,37 +182,78 @@ error_exit:
 int main(int argc, char **argv)
 {
 	int	dirflag;
+	int     arg_counter;
+	int	options_counter;
+	int	retval;
+	unsigned int file_count;
 	char	*srcname;
 	char	*destname;
 	char	*lastarg;
 
 	if (argc < 3) goto usage;
 
-	lastarg = argv[argc - 1];
+	lastarg = argv[argc - 1];	// lastarg is copy destination
 
-	dirflag = isadir(lastarg);
+	dirflag = isadir(lastarg);	// is destination a directory?
+
+	// set defaults for options //
+	option_no_overwrite = 0;
+	option_verbose = 0;
+	option_interactive = 0;
+
+	// walk throug all args, see if there are options for copy-operation
+	// count options for later addition to arg-counter
+	arg_counter = 1;	// start at argv[1]
+	options_counter = 0;
+	while(arg_counter < argc) {
+		if(!strcmp(argv[arg_counter], "-n")) {
+			options_counter++;
+			option_no_overwrite = 1;
+		}
+		if(!strcmp(argv[arg_counter], "-v")) {
+			options_counter++;
+			option_verbose = 1;
+		}
+		if(!strcmp(argv[arg_counter], "-i")) {
+			options_counter++;
+			option_interactive = 1;
+		}
+		if(!strcmp(argv[arg_counter], "-h")) goto usage;
+		arg_counter++;
+	}
 
-	if ((argc > 3) && !dirflag) {
-		fprintf(stderr, "%s: not a directory\n", lastarg);
+	if((argc > (3+options_counter)) && !dirflag) { 	// can't copy more than one src-files into one dst-file
+		fprintf(stderr, "%s is not a directory\n", lastarg);
 		exit(1);
 	}
 
 	buf = malloc(BUF_SIZE);
-	while (argc-- > 2) {
-		srcname = argv[1];
+	copy_count = 0;
+	file_count = 0;
+	while (argc-- > (2+options_counter)) {
+		srcname = argv[options_counter+1+file_count];
 		destname = lastarg;
-		if (dirflag) destname = buildname(destname, srcname);
+		if(dirflag) destname = buildname(destname, srcname);
 
-		if (!copyfile(*++argv, destname, 0)) goto error_copy;
+		retval = copyfile(srcname, destname, 0);
+		if((!retval) && (!(retval == NOT_OVERWRITING))) goto error_copy;
+		file_count++;
 	}
 	free(buf);
+	if(option_verbose) printf("copy-count = %d\n", copy_count);
 	exit(0);
 
 error_copy:
 	fprintf(stderr, "Failed to copy %s -> %s\n", srcname, destname);
 	exit(1);
 usage:
-	fprintf(stderr, "usage: %s source_file dest_file\n", argv[0]);
-	fprintf(stderr, "       %s file1 file2 ... dest_directory\n", argv[0]);
+	fprintf(stderr, "usage: %s [options] source_file dest_file\n", argv[0]);
+	fprintf(stderr, "       %s [options] file1 file2 ... dest_directory\n", argv[0]);
+	fprintf(stderr, "options:  -b   make a backup of each exisiting destination file\n");
+	fprintf(stderr, "          -f   if a existing destination file cannot be opened, remove it and try again\n");
+	fprintf(stderr, "          -n   no overwriting\n");
+	fprintf(stderr, "          -v   be verbose\n");
+	fprintf(stderr, "          -i   interactive, prompt before overwrite\n");
+	fprintf(stderr, "          -u   copy only if source file is newer than destination file\n");
 	exit(1);
 }

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: Cleaning up elkscmd and adding help text - Questions
  2015-03-27  8:19 Cleaning up elkscmd and adding help text - Questions Nils Stec
@ 2015-03-27 13:19 ` Jody Bruchon
  2015-03-27 14:10   ` Alan Cox
  2015-03-27 17:59   ` Nils Stec
  2015-03-27 13:54 ` Alan Cox
  1 sibling, 2 replies; 13+ messages in thread
From: Jody Bruchon @ 2015-03-27 13:19 UTC (permalink / raw)
  To: ELKS

On 3/27/2015 4:19 AM, Nils Stec wrote:
> I wrote a patch for cat which shows the user some usage-information.
> Jody Bruchon used write() for this purpose, other tools are using
> fprintf(stderr, ....).
> I don't know which i should prefer. In my opinion i would rather use
> fprintf.
> STDERR should be the output for operations, right?

Some of the programs already used write() and write() avoids dragging 
the code in for the printf family of functions (all ELKS programs are 
statically linked), so if there is NO usage of the printf family of 
functions in the program, it may make sense to use write() to minimize 
total program size. That being said, even a trivial usage message should 
mention the program being run in the usage, and it's not a good idea to 
assume that i.e. 'ls' is ALWAYS run as 'ls' since someone could symlink 
or alias it in the future. Because of this,

fprintf(stderr, "usage: %s args\n", argv[0]);

is a thousand times better than:

write(STDERR_FILENO, "usage: ", 7);
write(STDERR_FILENO, argv[0], strlen(argv[0]));
write(STDERR_FILENO, "\n", 1);

This results in three calls to write() and a call to strlen() when we 
could have just used one fprintf() to do all the work. In the future, I 
think many calls to write() will be eliminated, since even one call to a 
printf family function brings in the core code for all printf functions 
anyway.

> Is this the right way to write code for this project? It's the first
> time I'm doing this in here and i don't wan't to do things wrong...

The cat patch should use fprintf(stderr, ...) since I've already brought 
in the printf function family with my error_read: code and write() is 
bad for the reasons outlined above.

A few other comments:

+#define NOT_OVERWRITING		0xABCD
...snip...
+			return NOT_OVERWRITING;

If the user asks you to not overwrite (clobber), you should silently not 
clobber files and not consider it an error. If any program succeeds in 
doing what the user asked for, it should return success (0) with no 
messages. If you haven't read Eric S. Raymond's rules of the Unix 
philosophy, take a look at them; I find them to be very helpful 
guidelines when making program design decisions.

+	if (stat(srcname, &statbuf1) < 0) {	// src nonexistent

You can't write comments using // unless you're using a C99 compiler. 
bcc is a K&R C compiler that is not even guaranteed to be C89 compliant, 
so no // comments. Use /* comment */ instead. Source comments are nice 
to have as long as they're not excessive or explaining something that is 
very obvious.

+static int option_no_overwrite;
+static int option_verbose;
+static int option_interactive;

Depending on how many options you add, you may want to use one global 
static int called 'flags' or similar, and use bit flags instead of a 
bunch of ints. From the program 'fdupes' in one of my GitHub repos, 
snipped up for brevity:

#define ISFLAG(a,b) ((a & b) == b)
#define SETFLAG(a,b) (a |= b)
#define F_RECURSE 0x0001
#define F_HIDEPROGRESS 0x0002
etc. etc.
...
switch (opt) {
case 'q':
   SETFLAG(flags, F_HIDEPROGRESS);
   break;
...
if (ISFLAG(flags, F_RECURSEAFTER)) <code here>...
if (!ISFLAG(flags, F_HIDEPROGRESS)) <code here>...

I don't necessarily recommend using this code, it's just an example and 
it's written to be usable with more than one variable that holds a set 
of flags. I'd rewrite it like this for ELKS programs to make it cleaner:

static int flags;
#define ISFLAG(a) ((flags & a) == a)
#define SETFLAG(a) (flags |= a)
#define F_QUIET 0x0001
etc.
...
if (strcmp(argv[i], "-q")) SETFLAG(F_QUIET);
if (!ISFLAG(F_QUIET)) fprintf(stderr, "copied %d files\n", count);

---
+	if(option_verbose) {
+		printf("copied '%s' -> '%s'\n", srcname, destname);
+	}

Firstly, the output from GNU 'cp' looks like this:

$ cp -v foo/* bar/
'foo/1' -> 'bar/1'
'foo/2' -> 'bar/2'
'foo/3' -> 'bar/3'

I'd also prefer one-liners such as this with no multi-line 'else' clause 
to be collapsed together (it makes the code easier to read and avoids 
'brace hell'). I also prefer that if, for, and while statements have a 
space between the word and the (condition) part for the sake of code 
readability. Write it like this instead:

if (option_verbose) printf("'%s' -> '%s'\n", srcname, destname);

Next:

+	// set defaults for options //
+	option_no_overwrite = 0;
+	option_verbose = 0;
+	option_interactive = 0;

The comment is unnecessary since it's obvious that "options are being 
initialized to zero" by the code itself. This is self-commenting code. 
I'd also prefer to see the variable names shortened a bit ('o_' or 
'opt_' instead of 'option_' for example) and as mentioned previously, 
it's probably better to use the bit flags method mentioned previously if 
you're only using the variables as a yes/no flag.

+	arg_counter = 1;	// start at argv[1]
+	options_counter = 0;
+	while(arg_counter < argc) {
+		if(!strcmp(argv[arg_counter], "-n")) {
+			options_counter++;
+			option_no_overwrite = 1;
+		}
+		if(!strcmp(argv[arg_counter], "-v")) {
+			options_counter++;
+			option_verbose = 1;
+		}
+		if(!strcmp(argv[arg_counter], "-i")) {
+			options_counter++;
+			option_interactive = 1;
+		}
+		if(!strcmp(argv[arg_counter], "-h")) goto usage;
+		arg_counter++;
+	}

-	if ((argc > 3) && !dirflag) {
-		fprintf(stderr, "%s: not a directory\n", lastarg);
+	if((argc > (3+options_counter)) && !dirflag) { 	// can't copy more 
than one src-files into one dst-file

Ouch. Rather than try to explain the problems in detail, let me rewrite it.

	arg_cnt = 1;	// start at argv[1]
	opt_cnt = 0;
	while(arg_cnt < argc) {
		if (!strcmp(argv[arg_cnt], "-n")) {
			opt_noclobber = 1;
		} else if (!strcmp(argv[arg_cnt], "-v")) {
			opt_verbose = 1;
		} else if (!strcmp(argv[arg_cnt], "-i")) {
			opt_interactive = 1;
		} else if (!strcmp(argv[arg_cnt], "-h")) goto usage;
		else break;
		opt_cnt++;
		arg_cnt++;
	}

	/* If multiple files specified, last one must be a directory */
	if ((argc > (opt_cnt + 3)) && !dirflag) {

This rewrite combines all the option count increments into one instance 
which reduces code size and stops parsing options at the first 
non-option string. The comparison on the last line is more readable and 
it is clearer that it is "comparing total argument count to the number 
of options specified plus 3." The 'if-elseif-else' tree stops processing 
once a valid option is hit and "falls through" once a non-option is hit. 
It could (and probably should) be improved by handling the combined 
option case such as 'cp -iv src dest' but this is sufficient for now as 
long as the usage text implies that option flags must be specified 
separately.

+	copy_count = 0;
+	file_count = 0;
+	while (argc-- > (2+options_counter)) {
+		srcname = argv[options_counter+1+file_count];

I've spent a lot of time cleaning these compressed mathematical 
expressions out of the elkscmd code; let's not introduce more of them 
(also shorten variable names while retaining their meaning, this is a 
good example for why to use shorter names):

+	copy_cnt = 0;
+	file_cnt = 0;
+	while (argc-- > (opt_cnt + 2)) {
+		srcname = argv[opt_cnt + 1 + file_cnt];

Next,

-		if (dirflag) destname = buildname(destname, srcname);
+		if(dirflag) destname = buildname(destname, srcname);

Don't do this, if no other reason than the fact that 'if' is not a 
function call. I understand that it comes down to individual coding 
style and the compiler doesn't care if you write

if((something+(q/2))==argv[x+2+y*z[aaa]])usage();

but it is hard to read and readability is very important. I believe that 
'if', 'while', and 'for' shouldn't look like function calls in code 
since they are actually C control statements. Using a space makes those 
control statements immediately obvious, whereas if() and while() and 
for() look like function calls when skimming the code.

-		if (!copyfile(*++argv, destname, 0)) goto error_copy;
+		retval = copyfile(srcname, destname, 0);
+		if((!retval) && (!(retval == NOT_OVERWRITING))) goto error_copy;

Remember the rule about silent success mentioned earlier? This change 
can be eliminated because of it. Even if it wasn't, though, this code 
could be simplified and made more readable in one shot:

+		retval = copyfile(srcname, destname, 0);
+		if (retval != 1) goto error_copy;

I'd actually recommend reversing the return values in copyfile(), using 
nonzero as an error return instead (most C library calls seem to use 
this convention and consistency with that is rarely a bad thing) so you 
could just go:

+		if (retval) goto error_copy;

Next,

+	if(option_verbose) printf("copy-count = %d\n", copy_count);

This shouldn't be done. Here's GNU coreutils 'cp' in action:

file_utils $ cp -v foo/* bar/
'foo/COPYING' -> 'bar/COPYING'
'foo/COPYING.sash' -> 'bar/COPYING.sash'
'foo/Makefile' -> 'bar/Makefile'
'foo/README' -> 'bar/README'
'foo/WARRANTY' -> 'bar/WARRANTY'
...snip...

If someone is interested in knowing a "copy-count" for some reason, 
they'll almost certainly do this instead:

file_utils $ cp -v foo/* bar/ | wc -l
46

Your helpful message will throw this off and make it not work as 
expected. A shell script author will have to introduce an incompatible 
kludge to their script to make up for 'cp' misbehaving, i.e.

CNT=$(cp -v $A/* $B/ | wc -l)
CNT=$((CNT - 1))  # This shouldn't be necessary...

Another problem is that you've declared a static global 'copy_count' 
variable and you're introducing a local 'copy_count' variable as well. 
It needs to be called something else for the sake of avoiding programmer 
confusion. However, assuming you eliminate the "copy-count" message I 
just talked about along with its local variable counterpart, the global 
'static unsigned int copy_count' is set (copy_count++) but not used 
throughout the entire program and can be discarded.

If you ever want an "official" reference for how a particular elkscmd 
program "should" behave, take a look at the POSIX standards:

http://pubs.opengroup.org/onlinepubs/9699919799/

Whew, that took me a long time to write. I hope it's helpful to you and 
everyone else on the list. Thanks for your impressive work so far! I'm 
looking forward to your finalized [PATCH] submissions. :-)

-Jody

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: Cleaning up elkscmd and adding help text - Questions
  2015-03-27  8:19 Cleaning up elkscmd and adding help text - Questions Nils Stec
  2015-03-27 13:19 ` Jody Bruchon
@ 2015-03-27 13:54 ` Alan Cox
  1 sibling, 0 replies; 13+ messages in thread
From: Alan Cox @ 2015-03-27 13:54 UTC (permalink / raw)
  To: Nils Stec; +Cc: ELKS

On Fri, 27 Mar 2015 09:19:56 +0100
Nils Stec <nils.stec@gmail.com> wrote:

> Hi,
> 
> i started to write some patches, at the moment 2. Now it's the time to 
> ask you guys some questions about that.
> 
> I wrote a patch for cat which shows the user some usage-information.
> Jody Bruchon used write() for this purpose, other tools are using 
> fprintf(stderr, ....).
> I don't know which i should prefer. In my opinion i would rather use 
> fprintf.
> STDERR should be the output for operations, right?

A lot of small programs try and avoid using stdio as it makes them a lot
smaller in memory and on disk if they do so.

> 
> I wrote a second patch, which adds some more functionality to cp.
> It adds three command-line-options:
>      -v    be verbose
>      -i     be interactive, ask before overwriting
>      -n    don't overwrite files
> 
> i want to add 3 more options:
>      -u    update-only (only overwrite if source is newer)
>      -b    make a backup if destination file already exists
>      -f     force, if destfile cannot be opened, remove it, try again.
> 
> 
> Is this the right way to write code for this project? It's the first 
> time I'm doing this in here and i don't wan't to do things wrong...
> 
> I think these two patches are still just drafts - i send them because i 
> want to show, i know theres more work to do on this two patches.

They look ok to me, in fact the bug I see in patch #2 isn't yours - the
existing code is not checking if the malloc of buf fails.... (and given
its always needed it looks like it could just be declared as a fixed
static buffer so that malloc isn't needed at all)

Alan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: Cleaning up elkscmd and adding help text - Questions
  2015-03-27 13:19 ` Jody Bruchon
@ 2015-03-27 14:10   ` Alan Cox
  2015-03-27 16:17     ` Jody Bruchon
  2015-03-27 17:59   ` Nils Stec
  1 sibling, 1 reply; 13+ messages in thread
From: Alan Cox @ 2015-03-27 14:10 UTC (permalink / raw)
  To: Jody Bruchon; +Cc: ELKS

> This results in three calls to write() and a call to strlen() when we 
> could have just used one fprintf() to do all the work. In the future, I 
> think many calls to write() will be eliminated, since even one call to a 
> printf family function brings in the core code for all printf functions 
> anyway

And pretty soon all your apps won't fit on the floppy disc image.

If you are going to go that way then probably it needs "fake" shared
libraries. Fake in the sense that while you load a library off disk into
the app executable space it's not shared in memory but is shared on disc.

It's an item I have (and for similar reasons) on the FUZIX TODO list as
well.

Alan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: Cleaning up elkscmd and adding help text - Questions
  2015-03-27 14:10   ` Alan Cox
@ 2015-03-27 16:17     ` Jody Bruchon
  2015-03-27 18:19       ` MFLD
  0 siblings, 1 reply; 13+ messages in thread
From: Jody Bruchon @ 2015-03-27 16:17 UTC (permalink / raw)
  To: ELKS

On March 27, 2015 10:10:10 AM EDT, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
>And pretty soon all your apps won't fit on the floppy disc image.
>If you are going to go that way then probably it needs "fake" shared
>libraries.

This was actually one of the reasons I was experimenting with "BusyELKS" since it would minimize the number of copies of statically linked code on disk. The downside is that the size of the programs overall would increase, but it wouldn't require new logic to support sort-of-shared libraries.

There are a lot of programs that were directly ripped out of the sash code base and can easily share code between them since they used to be one big code unit originally. I see a lot of potential for combining the smaller tools and sharing code between them. Perhaps I should revisit that concept...?

-Jody
-- 
Sent from my Android phone with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: Cleaning up elkscmd and adding help text - Questions
  2015-03-27 13:19 ` Jody Bruchon
  2015-03-27 14:10   ` Alan Cox
@ 2015-03-27 17:59   ` Nils Stec
  2015-03-27 18:31     ` Alan Cox
  1 sibling, 1 reply; 13+ messages in thread
From: Nils Stec @ 2015-03-27 17:59 UTC (permalink / raw)
  To: ELKS

Wow i really belive that writing this took some time... Thank you, this 
gave me a lot of hints, do's and don'ts.

I read The Art of Unix Programming, but you showed me that i forgot a lot ;)

I think I'm going to rewrite my patches with all that things in mind, 
will send them soon.

I hope my submissions will be ready very soon.

Nils





On 03/27/2015 02:19 PM, Jody Bruchon wrote:
> On 3/27/2015 4:19 AM, Nils Stec wrote:
>> I wrote a patch for cat which shows the user some usage-information.
>> Jody Bruchon used write() for this purpose, other tools are using
>> fprintf(stderr, ....).
>> I don't know which i should prefer. In my opinion i would rather use
>> fprintf.
>> STDERR should be the output for operations, right?
>
> Some of the programs already used write() and write() avoids dragging 
> the code in for the printf family of functions (all ELKS programs are 
> statically linked), so if there is NO usage of the printf family of 
> functions in the program, it may make sense to use write() to minimize 
> total program size. That being said, even a trivial usage message 
> should mention the program being run in the usage, and it's not a good 
> idea to assume that i.e. 'ls' is ALWAYS run as 'ls' since someone 
> could symlink or alias it in the future. Because of this,
>
> fprintf(stderr, "usage: %s args\n", argv[0]);
>
> is a thousand times better than:
>
> write(STDERR_FILENO, "usage: ", 7);
> write(STDERR_FILENO, argv[0], strlen(argv[0]));
> write(STDERR_FILENO, "\n", 1);
>
> This results in three calls to write() and a call to strlen() when we 
> could have just used one fprintf() to do all the work. In the future, 
> I think many calls to write() will be eliminated, since even one call 
> to a printf family function brings in the core code for all printf 
> functions anyway.
>
>> Is this the right way to write code for this project? It's the first
>> time I'm doing this in here and i don't wan't to do things wrong...
>
> The cat patch should use fprintf(stderr, ...) since I've already 
> brought in the printf function family with my error_read: code and 
> write() is bad for the reasons outlined above.
>
> A few other comments:
>
> +#define NOT_OVERWRITING        0xABCD
> ...snip...
> +            return NOT_OVERWRITING;
>
> If the user asks you to not overwrite (clobber), you should silently 
> not clobber files and not consider it an error. If any program 
> succeeds in doing what the user asked for, it should return success 
> (0) with no messages. If you haven't read Eric S. Raymond's rules of 
> the Unix philosophy, take a look at them; I find them to be very 
> helpful guidelines when making program design decisions.
>
> +    if (stat(srcname, &statbuf1) < 0) {    // src nonexistent
>
> You can't write comments using // unless you're using a C99 compiler. 
> bcc is a K&R C compiler that is not even guaranteed to be C89 
> compliant, so no // comments. Use /* comment */ instead. Source 
> comments are nice to have as long as they're not excessive or 
> explaining something that is very obvious.
>
> +static int option_no_overwrite;
> +static int option_verbose;
> +static int option_interactive;
>
> Depending on how many options you add, you may want to use one global 
> static int called 'flags' or similar, and use bit flags instead of a 
> bunch of ints. From the program 'fdupes' in one of my GitHub repos, 
> snipped up for brevity:
>
> #define ISFLAG(a,b) ((a & b) == b)
> #define SETFLAG(a,b) (a |= b)
> #define F_RECURSE 0x0001
> #define F_HIDEPROGRESS 0x0002
> etc. etc.
> ...
> switch (opt) {
> case 'q':
>   SETFLAG(flags, F_HIDEPROGRESS);
>   break;
> ...
> if (ISFLAG(flags, F_RECURSEAFTER)) <code here>...
> if (!ISFLAG(flags, F_HIDEPROGRESS)) <code here>...
>
> I don't necessarily recommend using this code, it's just an example 
> and it's written to be usable with more than one variable that holds a 
> set of flags. I'd rewrite it like this for ELKS programs to make it 
> cleaner:
>
> static int flags;
> #define ISFLAG(a) ((flags & a) == a)
> #define SETFLAG(a) (flags |= a)
> #define F_QUIET 0x0001
> etc.
> ...
> if (strcmp(argv[i], "-q")) SETFLAG(F_QUIET);
> if (!ISFLAG(F_QUIET)) fprintf(stderr, "copied %d files\n", count);
>
> ---
> +    if(option_verbose) {
> +        printf("copied '%s' -> '%s'\n", srcname, destname);
> +    }
>
> Firstly, the output from GNU 'cp' looks like this:
>
> $ cp -v foo/* bar/
> 'foo/1' -> 'bar/1'
> 'foo/2' -> 'bar/2'
> 'foo/3' -> 'bar/3'
>
> I'd also prefer one-liners such as this with no multi-line 'else' 
> clause to be collapsed together (it makes the code easier to read and 
> avoids 'brace hell'). I also prefer that if, for, and while statements 
> have a space between the word and the (condition) part for the sake of 
> code readability. Write it like this instead:
>
> if (option_verbose) printf("'%s' -> '%s'\n", srcname, destname);
>
> Next:
>
> +    // set defaults for options //
> +    option_no_overwrite = 0;
> +    option_verbose = 0;
> +    option_interactive = 0;
>
> The comment is unnecessary since it's obvious that "options are being 
> initialized to zero" by the code itself. This is self-commenting code. 
> I'd also prefer to see the variable names shortened a bit ('o_' or 
> 'opt_' instead of 'option_' for example) and as mentioned previously, 
> it's probably better to use the bit flags method mentioned previously 
> if you're only using the variables as a yes/no flag.
>
> +    arg_counter = 1;    // start at argv[1]
> +    options_counter = 0;
> +    while(arg_counter < argc) {
> +        if(!strcmp(argv[arg_counter], "-n")) {
> +            options_counter++;
> +            option_no_overwrite = 1;
> +        }
> +        if(!strcmp(argv[arg_counter], "-v")) {
> +            options_counter++;
> +            option_verbose = 1;
> +        }
> +        if(!strcmp(argv[arg_counter], "-i")) {
> +            options_counter++;
> +            option_interactive = 1;
> +        }
> +        if(!strcmp(argv[arg_counter], "-h")) goto usage;
> +        arg_counter++;
> +    }
>
> -    if ((argc > 3) && !dirflag) {
> -        fprintf(stderr, "%s: not a directory\n", lastarg);
> +    if((argc > (3+options_counter)) && !dirflag) { // can't copy more 
> than one src-files into one dst-file
>
> Ouch. Rather than try to explain the problems in detail, let me 
> rewrite it.
>
>     arg_cnt = 1;    // start at argv[1]
>     opt_cnt = 0;
>     while(arg_cnt < argc) {
>         if (!strcmp(argv[arg_cnt], "-n")) {
>             opt_noclobber = 1;
>         } else if (!strcmp(argv[arg_cnt], "-v")) {
>             opt_verbose = 1;
>         } else if (!strcmp(argv[arg_cnt], "-i")) {
>             opt_interactive = 1;
>         } else if (!strcmp(argv[arg_cnt], "-h")) goto usage;
>         else break;
>         opt_cnt++;
>         arg_cnt++;
>     }
>
>     /* If multiple files specified, last one must be a directory */
>     if ((argc > (opt_cnt + 3)) && !dirflag) {
>
> This rewrite combines all the option count increments into one 
> instance which reduces code size and stops parsing options at the 
> first non-option string. The comparison on the last line is more 
> readable and it is clearer that it is "comparing total argument count 
> to the number of options specified plus 3." The 'if-elseif-else' tree 
> stops processing once a valid option is hit and "falls through" once a 
> non-option is hit. It could (and probably should) be improved by 
> handling the combined option case such as 'cp -iv src dest' but this 
> is sufficient for now as long as the usage text implies that option 
> flags must be specified separately.
>
> +    copy_count = 0;
> +    file_count = 0;
> +    while (argc-- > (2+options_counter)) {
> +        srcname = argv[options_counter+1+file_count];
>
> I've spent a lot of time cleaning these compressed mathematical 
> expressions out of the elkscmd code; let's not introduce more of them 
> (also shorten variable names while retaining their meaning, this is a 
> good example for why to use shorter names):
>
> +    copy_cnt = 0;
> +    file_cnt = 0;
> +    while (argc-- > (opt_cnt + 2)) {
> +        srcname = argv[opt_cnt + 1 + file_cnt];
>
> Next,
>
> -        if (dirflag) destname = buildname(destname, srcname);
> +        if(dirflag) destname = buildname(destname, srcname);
>
> Don't do this, if no other reason than the fact that 'if' is not a 
> function call. I understand that it comes down to individual coding 
> style and the compiler doesn't care if you write
>
> if((something+(q/2))==argv[x+2+y*z[aaa]])usage();
>
> but it is hard to read and readability is very important. I believe 
> that 'if', 'while', and 'for' shouldn't look like function calls in 
> code since they are actually C control statements. Using a space makes 
> those control statements immediately obvious, whereas if() and while() 
> and for() look like function calls when skimming the code.
>
> -        if (!copyfile(*++argv, destname, 0)) goto error_copy;
> +        retval = copyfile(srcname, destname, 0);
> +        if((!retval) && (!(retval == NOT_OVERWRITING))) goto error_copy;
>
> Remember the rule about silent success mentioned earlier? This change 
> can be eliminated because of it. Even if it wasn't, though, this code 
> could be simplified and made more readable in one shot:
>
> +        retval = copyfile(srcname, destname, 0);
> +        if (retval != 1) goto error_copy;
>
> I'd actually recommend reversing the return values in copyfile(), 
> using nonzero as an error return instead (most C library calls seem to 
> use this convention and consistency with that is rarely a bad thing) 
> so you could just go:
>
> +        if (retval) goto error_copy;
>
> Next,
>
> +    if(option_verbose) printf("copy-count = %d\n", copy_count);
>
> This shouldn't be done. Here's GNU coreutils 'cp' in action:
>
> file_utils $ cp -v foo/* bar/
> 'foo/COPYING' -> 'bar/COPYING'
> 'foo/COPYING.sash' -> 'bar/COPYING.sash'
> 'foo/Makefile' -> 'bar/Makefile'
> 'foo/README' -> 'bar/README'
> 'foo/WARRANTY' -> 'bar/WARRANTY'
> ...snip...
>
> If someone is interested in knowing a "copy-count" for some reason, 
> they'll almost certainly do this instead:
>
> file_utils $ cp -v foo/* bar/ | wc -l
> 46
>
> Your helpful message will throw this off and make it not work as 
> expected. A shell script author will have to introduce an incompatible 
> kludge to their script to make up for 'cp' misbehaving, i.e.
>
> CNT=$(cp -v $A/* $B/ | wc -l)
> CNT=$((CNT - 1))  # This shouldn't be necessary...
>
> Another problem is that you've declared a static global 'copy_count' 
> variable and you're introducing a local 'copy_count' variable as well. 
> It needs to be called something else for the sake of avoiding 
> programmer confusion. However, assuming you eliminate the "copy-count" 
> message I just talked about along with its local variable counterpart, 
> the global 'static unsigned int copy_count' is set (copy_count++) but 
> not used throughout the entire program and can be discarded.
>
> If you ever want an "official" reference for how a particular elkscmd 
> program "should" behave, take a look at the POSIX standards:
>
> http://pubs.opengroup.org/onlinepubs/9699919799/
>
> Whew, that took me a long time to write. I hope it's helpful to you 
> and everyone else on the list. Thanks for your impressive work so far! 
> I'm looking forward to your finalized [PATCH] submissions. :-)
>
> -Jody
> -- 
> To unsubscribe from this list: send the line "unsubscribe linux-8086" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: Cleaning up elkscmd and adding help text - Questions
  2015-03-27 16:17     ` Jody Bruchon
@ 2015-03-27 18:19       ` MFLD
  2015-03-27 18:26         ` Jody Bruchon
  2015-03-27 18:41         ` Alan Cox
  0 siblings, 2 replies; 13+ messages in thread
From: MFLD @ 2015-03-27 18:19 UTC (permalink / raw)
  To: ELKS

Hello,

I really wonder why you spend time on the ELKS userland... if I have a 
look to the sizes of uclibc and busybox on a 32 bits embedded system, 
built with BuildRoot, and with many features (threads, unicode, large 
files, etc) that have no sense on a 8086 architecture:

-rwxr-xr-x 1 mfld mfld 296704 19 mars  09:20 libuClibc-0.9.33.2.so
-rwxr-xr-x 1 mfld mfld 633548 19 mars  09:20 busybox

Yes, it is still too large for a 1 MB address space, but don't you think 
it would be worth giving a try to reduce the uclibc and Busybox 
footprint, rather than doing the same job as the our friends ?

If I take the example of my 80188 system where I am playing with ELKS, I 
have 512 KB of Flash, so not so far from the current size of uclibc and 
busybox on my Cortex A8 system...

Regards,

MFLD



Le 27/03/2015 17:17, Jody Bruchon a écrit :
> On March 27, 2015 10:10:10 AM EDT, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
>> And pretty soon all your apps won't fit on the floppy disc image.
>> If you are going to go that way then probably it needs "fake" shared
>> libraries.
> This was actually one of the reasons I was experimenting with "BusyELKS" since it would minimize the number of copies of statically linked code on disk. The downside is that the size of the programs overall would increase, but it wouldn't require new logic to support sort-of-shared libraries.
>
> There are a lot of programs that were directly ripped out of the sash code base and can easily share code between them since they used to be one big code unit originally. I see a lot of potential for combining the smaller tools and sharing code between them. Perhaps I should revisit that concept...?
>
> -Jody

--
To unsubscribe from this list: send the line "unsubscribe linux-8086" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: Cleaning up elkscmd and adding help text - Questions
  2015-03-27 18:19       ` MFLD
@ 2015-03-27 18:26         ` Jody Bruchon
  2015-03-27 18:52           ` MFLD
  2015-03-27 18:41         ` Alan Cox
  1 sibling, 1 reply; 13+ messages in thread
From: Jody Bruchon @ 2015-03-27 18:26 UTC (permalink / raw)
  To: MFLD, ELKS

On March 27, 2015 2:19:54 PM EDT, MFLD <mfld.fr@gmail.com> wrote:
>I really wonder why you spend time on the ELKS userland... if I have a 
>look to the sizes of uclibc and busybox on a 32 bits embedded system, 
...snip...
>If I take the example of my 80188 system

BusyBox and uClibc don't work on 16-bit segmented architectures with 64K code and data size limits. Linux doesn't either. If you want to run a Linux-like system on an 80x86 where x < 3, you can't use any of the software you just mentioned at all.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: Cleaning up elkscmd and adding help text - Questions
  2015-03-27 17:59   ` Nils Stec
@ 2015-03-27 18:31     ` Alan Cox
  0 siblings, 0 replies; 13+ messages in thread
From: Alan Cox @ 2015-03-27 18:31 UTC (permalink / raw)
  To: Nils Stec; +Cc: ELKS

On Fri, 27 Mar 2015 18:59:28 +0100
Nils Stec <nils.stec@gmail.com> wrote:

> Wow i really belive that writing this took some time... Thank you, this 
> gave me a lot of hints, do's and don'ts.
> 
> I read The Art of Unix Programming, but you showed me that i forgot a lot ;)
> 
> I think I'm going to rewrite my patches with all that things in mind, 
> will send them soon.

One thing that is not obvious

stderr is unbuffered, and this is required by the C language standard

That means that

   /* A function so we don't have two copies of the same string, and
      we don't go mad counting bytes */
   static int writes(int fd, const char *s) {
	return write(fd, s, strlen(s));
   }

   write(2, argv[0], strlen(argv[0]));
   writes(2, " broke.\n");

is actually better than fprintf. Not that performance matters here , if
you are optimising the performance of error reporting then you've almost
certainly got the wrong priorities.


In the FUZIX case we've shared several Kbytes of some key binaries by
carefully avoiding the use of malloc, stdio and floating point. The same
ought to be the case for ELKS although some of the other changes we do
like avoiding floating point and if possible division of long types is
much much less important as the 8086 has multiply/divide instructions
even if only 16bit ones.

Several of the elks tools do

     blah = malloc(somefixedsize)
     do stuff
     free(blah)

which for a utility which always needs the buffer except in error cases
is actually massively less efficient than simply doing

      static uint8_t blah[somefixedsize];



The v7 utilities are well worth reading for their elegance and efficiency
of memory usage. It's quite old style C so can be harder to read than
modern ANSI C, but the techniques used are frequently very clever indeed,
and some like the v7 implementation of "dd" are masterpieces.

Alan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: Cleaning up elkscmd and adding help text - Questions
  2015-03-27 18:19       ` MFLD
  2015-03-27 18:26         ` Jody Bruchon
@ 2015-03-27 18:41         ` Alan Cox
  1 sibling, 0 replies; 13+ messages in thread
From: Alan Cox @ 2015-03-27 18:41 UTC (permalink / raw)
  To: MFLD; +Cc: ELKS

> Yes, it is still too large for a 1 MB address space, but don't you think 
> it would be worth giving a try to reduce the uclibc and Busybox 
> footprint, rather than doing the same job as the our friends ?

Massively dieting a large C library (or indeed most large apps) is
actually usually harder than just doing the job properly in the first
place. There are trade-offs you make that differ when you've got 64K of
space to play with and there are whole areas of implementation that you
do differently.

The same with kernels. One of the big problems ELKS has even after all
this time is stuff like some of the waitqueue and disk logic inherited
from Linux 1.x era. It's way way less compact than the implementation
that would have been done from scratch because the Linux model was
designed to scale to hundreds of processes and assumes that kernel data
space is cheap. Neither is true. As a result of that design ELKS can't
swap out the kernel stacks of idle processes either to other memory or to
disk, which leaves it permanently starved of buffer space.

Alan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: Cleaning up elkscmd and adding help text - Questions
  2015-03-27 18:26         ` Jody Bruchon
@ 2015-03-27 18:52           ` MFLD
  2015-03-27 22:19             ` Alan Cox
  0 siblings, 1 reply; 13+ messages in thread
From: MFLD @ 2015-03-27 18:52 UTC (permalink / raw)
  To: ELKS

I am not suggesting to build Linux on 8086, I do know it won't fit 
(protected mode...) and this is why I came to ELKS!

This is also why I cannot reuse coreboot for my project and I started to 
write a kind of boot86 / mon86.

I only suggested to try to build Busybox with dev86, and using the libc 
from dev86 for the glue between Busybox code and ELKS.

Was it tried before ?

MFLD


Le 27/03/2015 19:26, Jody Bruchon a écrit :
> On March 27, 2015 2:19:54 PM EDT, MFLD <mfld.fr@gmail.com> wrote:
>> I really wonder why you spend time on the ELKS userland... if I have a
>> look to the sizes of uclibc and busybox on a 32 bits embedded system,
> ...snip...
>> If I take the example of my 80188 system
> BusyBox and uClibc don't work on 16-bit segmented architectures with 64K code and data size limits. Linux doesn't either. If you want to run a Linux-like system on an 80x86 where x < 3, you can't use any of the software you just mentioned at all.
>

--
To unsubscribe from this list: send the line "unsubscribe linux-8086" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: Cleaning up elkscmd and adding help text - Questions
  2015-03-27 18:52           ` MFLD
@ 2015-03-27 22:19             ` Alan Cox
  2015-03-28 15:35               ` LM
  0 siblings, 1 reply; 13+ messages in thread
From: Alan Cox @ 2015-03-27 22:19 UTC (permalink / raw)
  To: MFLD; +Cc: ELKS

On Fri, 27 Mar 2015 19:52:31 +0100
MFLD <mfld.fr@gmail.com> wrote:

> I am not suggesting to build Linux on 8086, I do know it won't fit 
> (protected mode...) and this is why I came to ELKS!
> 
> This is also why I cannot reuse coreboot for my project and I started to 
> write a kind of boot86 / mon86.
> 
> I only suggested to try to build Busybox with dev86, and using the libc 
> from dev86 for the glue between Busybox code and ELKS.
> 
> Was it tried before ?

Busybox is in relative terms huge - even some of the busybox utilities
split standalone are oversized compared with other implementations.

Alan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: Cleaning up elkscmd and adding help text - Questions
  2015-03-27 22:19             ` Alan Cox
@ 2015-03-28 15:35               ` LM
  0 siblings, 0 replies; 13+ messages in thread
From: LM @ 2015-03-28 15:35 UTC (permalink / raw)
  To: ELKS

On 3/27/15, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> Busybox is in relative terms huge - even some of the busybox utilities
> split standalone are oversized compared with other implementations.

Have you looked at other utility collections like Heirloom and Toybox?
 I read that Toybox is being developed by a former developer of
Busybox and that he is very interested in efficiency.  Would be very
curious how his idea of efficiency translates to a system like Elks.
I do know portability is not one of his areas of interest.

Also, it may be too basic, but sbase has an interesting take on some
utilities.  There's also pdclib as another alternative to uclibc.
It's not as complete, but there are some interesting optimizations
like with strstr using the railgun algorithm.

I found it interesting comparing the Fusix version of ls with the
Minix version.  Liked the removal of the large array to track
user/group.

Sincerely,
Laura
http://www.distasis.com/cpp

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2015-03-28 15:35 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-27  8:19 Cleaning up elkscmd and adding help text - Questions Nils Stec
2015-03-27 13:19 ` Jody Bruchon
2015-03-27 14:10   ` Alan Cox
2015-03-27 16:17     ` Jody Bruchon
2015-03-27 18:19       ` MFLD
2015-03-27 18:26         ` Jody Bruchon
2015-03-27 18:52           ` MFLD
2015-03-27 22:19             ` Alan Cox
2015-03-28 15:35               ` LM
2015-03-27 18:41         ` Alan Cox
2015-03-27 17:59   ` Nils Stec
2015-03-27 18:31     ` Alan Cox
2015-03-27 13:54 ` Alan Cox

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.