All of lore.kernel.org
 help / color / mirror / Atom feed
From: Amit <amitchoudhary0523@gmail.com>
To: linux-kernel@vger.kernel.org
Subject: String functions in C language that are not present in standard C library.
Date: Mon, 25 Apr 2022 13:05:11 +0530	[thread overview]
Message-ID: <CAFf+5zjS8E57mvj5gDhW-Qfj+2hB292wzU1Y87jxNYSLwBTaeg@mail.gmail.com> (raw)

Hi,

I have written few functions for manipulating strings that are not
present in standard C library.

I am sending it to linux kernel mailing list, in case the linux kernel
needs them.

The main functions are:

    ## char *get_input_from_stdin_and_discard_extra_characters(char
*str, long size);
    ## char **str_split(const char *str, const char *delim, long max_splits);
    ## char *str_join(unsigned int skip_null_and_empty_input_strings,
const char *delim, long num_args, ...);
    ## char *substr(const char *str, long start_index, long end_index);

--------------------
string_library.c
--------------------


/*
 * Author: Amit Choudhary
 *
 * Email: amitchoudhary0523 at gmail dot com
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>

#include "string_library.h"

/* The description of functions in this file is in the header file
"string_library.h". */

static void free_all_allocated_memory(char **strings_array, long n);
static char **transform_str_to_string_array(const char *str);

char *get_input_from_stdin_and_discard_extra_characters(char *str, long size)
{

    int c = 0;
    long i = 0;

    // If 'size' is 0 then this function will discard all input and return NULL.
    // No need to check 'str' if 'size' is 0.
    if (size == 0) {
        // discard all input
        while ((c = getchar()) && (c != '\n') && (c != EOF));
        return NULL;
    }

    if (!str)
        return str;

    if (size < 0)
        return NULL;

    for (i = 0; i < (size - 1); i = i + 1) {

        c = getchar();

        if ((c == '\n') || (c == EOF)) {
            str[i] = 0;
            return str;
        }

        str[i] = (char)(c);

    } // end of for loop

    str[i] = 0;

    // discard rest of input
    while ((c = getchar()) && (c != '\n') && (c != EOF));

    return str;

} // end of get_input_from_stdin_and_discard_extra_characters

char **str_split(const char *str, const char *delim, long max_splits)
{

    char **output_strings_array = NULL;
    char *temp = NULL;
    char *prev_temp = NULL;
    long num_tokens = 0;
    size_t delim_len = 0;
    size_t len = 0;
    long i = 0;

    if ((!str) || (!*str))
        return NULL;

    if ((!delim) || (!*delim))
        return transform_str_to_string_array(str);

    if (max_splits == 0)
        return transform_str_to_string_array(str);

    // handle special case where delim does not occur in str
    if (strstr(str, delim) == NULL)
        return transform_str_to_string_array(str);

    delim_len = strlen(delim);

    temp = (char *)(str);
    prev_temp = (char *)(str);

    while (1) {

        temp = strstr(temp, delim);

        num_tokens = num_tokens + 1;

        if (!temp)
            break;

        temp = temp + delim_len;
        prev_temp = temp;

    } // end of while loop

    if ((max_splits > 0) && (max_splits < num_tokens))
        num_tokens = max_splits + 1;

    // allocate 1 extra character pointer to terminate output_strings_array with
    // a NULL pointer.
    output_strings_array = calloc((size_t)(num_tokens) + 1,
(sizeof(*output_strings_array)));
    if (!output_strings_array)
        return NULL;

    temp = (char *)(str);
    prev_temp = (char *)(str);
    i = 0;

    while (1) {

        temp = strstr(temp, delim);

        len = (size_t)(temp - prev_temp);

        // allocate 1 extra byte for null terminator
        output_strings_array[i] = malloc(len + 1);
        if (!output_strings_array[i]) {
            free_all_allocated_memory(output_strings_array, i);
            return NULL;
        }

        memmove(output_strings_array[i], prev_temp, len);
        (output_strings_array[i])[len] = 0;
        i = i + 1;

        temp = temp + delim_len;
        prev_temp = temp;

        if ((num_tokens - i) == 1) { // last token

            len = (size_t)(str + strlen(str) - prev_temp);

            // allocate 1 extra byte for null terminator
            output_strings_array[i] = malloc(len + 1);
            if (!output_strings_array[i]) {
                free_all_allocated_memory(output_strings_array, i);
                return NULL;
            }

            memmove(output_strings_array[i], prev_temp, len);
            (output_strings_array[i])[len] = 0;
            i = i + 1;

            break;

        } // end of if ((num_tokens - i) == 1)

    } // end of while loop

    output_strings_array[i] = 0;

    return output_strings_array;

} // end of str_split

/*
 * static char **transform_str_to_string_array(const char *str):
 *
 * Function transform_str_to_string_array() basically allocates a
pointer to pointer
 * to character (means a pointer to an array of strings/elements). This array of
 * strings have two elements - the first element is a pointer to a copy of 'str'
 * and the second element is a NULL pointer/string/element.
 *
 * This is a static function and this function should not be called from outside
 * this file.
 */
static char **transform_str_to_string_array(const char *str)
{

    char **output_strings_array = NULL;
    size_t num_tokens = 1;
    size_t len = strlen(str);

    // allocate 1 extra character pointer to terminate output_strings_array with
    // a NULL pointer.
    output_strings_array = calloc(num_tokens + 1,
(sizeof(*output_strings_array)));
    if (!output_strings_array)
        return NULL;

    // allocate 1 extra byte for null terminator
    output_strings_array[0] = malloc(len + 1);
    if (!output_strings_array[0]) {
        free(output_strings_array);
        return NULL;
    }

    memmove(output_strings_array[0], str, len);
    (output_strings_array[0])[len] = 0;

    output_strings_array[num_tokens] = 0;

    return output_strings_array;

} // end of transform_str_to_string_array

/*
 * static void free_all_allocated_memory(char **strings_array, long n):
 *
 * Function free_all_allocated_memory() frees all elements of the
array of strings
 * that is passed to this function. It also frees the pointer to the array of
 * strings ('strings_array').
 *
 * This is a static function and this function should not be called from outside
 * this file.
 */
static void free_all_allocated_memory(char **strings_array, long n)
{

    long i = 0;

    if (!strings_array)
        return;

    for (i = 0; i < n; i = i + 1) {
        free(strings_array[i]);
    }

    free(strings_array);

} // end of free_all_allocated_memory

void print_strings_array(char **strings_array)
{

    long i = 0;

    printf("Tokens are printed below (within single quotes):\n\n");
    printf("---- Start of Tokens ----\n");

    if (!strings_array) {
        printf("---- End of Tokens ----\n\n");
        return;
    }

    while (strings_array[i]) {
        printf("'%s'\n", strings_array[i]);
        i = i + 1;
    }

    printf("---- End of Tokens ----\n\n");

} // end of print_strings_array

void free_strings_array(char **strings_array)
{

    long i = 0;

    if (!strings_array)
        return;

    while (strings_array[i]) {
        free(strings_array[i]);
        i = i + 1;
    }

    free(strings_array);

} // end of free_strings_array

long get_number_of_strings_in_strings_array(char **strings_array)
{

    long i = 0;

    if (!strings_array)
        return 0;

    while (strings_array[i]) {
        i = i + 1;
    }

    return i;

} // end of get_number_of_strings_in_strings_array

char *str_join(unsigned int skip_null_and_empty_input_strings, const
char *delim, long num_args, ...)
{

    va_list valist;
    long i = 0;
    size_t iica = 0; // iica - index into character array
    size_t len = 0;
    size_t delim_len = 0;
    size_t total_len = 0;
    long num_delim_to_concat = -1;
    char *new_char_array = NULL;
    char *temp = NULL;

    if (num_args <= 0)
        return NULL;

    if (delim) {
        delim_len = strlen(delim);
    }

    va_start(valist, num_args);
    for (i = 0; i < num_args; i++) {

        temp = va_arg(valist, char *);

        if (skip_null_and_empty_input_strings) {
            if ((!temp) || (!*temp))
                continue;
        }

        if ((!temp) || (!*temp))
            len = 0;
        else
            len = strlen(temp);

        total_len = total_len + len;
        num_delim_to_concat = num_delim_to_concat + 1;
        if (num_delim_to_concat > 0)
            total_len = total_len + delim_len;

    }
    va_end(valist);

    if (total_len == 0)
        return NULL;

    total_len = total_len + 1; // 1 extra for terminating null byte

    new_char_array = malloc(total_len);
    if (!new_char_array)
        return NULL;

    va_start(valist, num_args);
    for (i = 0; i < num_args; i++) {

        temp = va_arg(valist, char *);

        if (skip_null_and_empty_input_strings) {
            if ((!temp) || (!*temp))
                continue;
        }

        if ((!temp) || (!*temp))
            len = 0;
        else
            len = strlen(temp);

        memmove(&(new_char_array[iica]), temp, len);
        iica = iica + len;

        if (num_delim_to_concat > 0) {
            memmove(&(new_char_array[iica]), delim, delim_len);
            iica = iica + delim_len;
            num_delim_to_concat = num_delim_to_concat - 1;
        }

    }
    va_end(valist);

    new_char_array[iica] = 0;

    return new_char_array;

} // end of str_join

char *substr(const char *str, long start_index, long end_index)
{

    char *substring = NULL;
    long len = 0;
    long substr_len = 0;

    if ((!str) || (!*str))
        return NULL;

    if ((start_index < 0) || (end_index < 0) || (end_index < start_index))
        return NULL;

    len = (long)(strlen(str));

    if ((start_index > (len - 1)) || (end_index > (len - 1)))
        return NULL;

    substr_len = end_index - start_index + 1;

    substring = malloc((size_t)(substr_len + 1)); // extra 1 byte for null byte
    if (!substring)
        return NULL;

    memmove(substring, str + start_index, (size_t)(substr_len));
    substring[substr_len] = 0;

    return substring;

} // end of substr

-------------------
string library.h
-------------------


/*
 * Author: Amit Choudhary
 *
 * Email: amitchoudhary0523 at gmail dot com
 *
 */

#ifndef _STRING_LIBRARY_H_
#define _STRING_LIBRARY_H_

/*
 * get_input_from_stdin_and_discard_extra_characters(char *str, long size):
 *
 * Function get_input_from_stdin_and_discard_extra_characters() reads at most
 * 'size - 1' characters into 'str' from stdin and then appends the null
 * character ('\0'). If 'size' is 0 then this function will discard all input
 * and return NULL. So, to discard all input, this function can be called with
 * 'str' having value NULL and 'size' having value 0.
 * In all cases, reading input stops after encountering a newline ('\n') or EOF
 * even if 'size - 1' characters have not been read. If a newline ('\n') or EOF
 * is read then it is replaced by null character ('\0'). If there are extra
 * characters in input, they are read and discarded.
 * In all cases, 'str' or NULL is returned.
 */
char *get_input_from_stdin_and_discard_extra_characters(char *str, long size);

/*
 * char **str_split(const char *str, const char *delim, long max_splits):
 *
 * Function str_split() splits a string ('str') into tokens. It uses the 'delim'
 * string to split 'str' into tokens. If a 'delim' is found at
position "i", then
 * the token ends at position "i - 1".
 *
 * If there are "n" 'delim' in 'str' then "n + 1" tokens are generated/returned.
 * However, some or all of these tokens may be empty strings. For example, if
 * 'str' contains only a single 'delim' then two empty tokens are generated.
 *
 * The reason that empty tokens are returned is that some users may want empty
 * tokens. One use case is that, if they are splitting records from a file to
 * insert in a database, then when an empty token is found, then they can insert
 * NULL value or 0 or empty string, etc. in that column.
 *
 * Users who don't want empty tokens can skip them by testing which
token is empty
 * and which is not.
 *
 * The return value of this function is a pointer to pointer to character (means
 * a pointer to an array of strings/elements). This array of strings
is terminated
 * by a NULL pointer/string/element which means that the last element in this
 * strings of array is a NULL pointer/string. So, you can loop through
this array
 * of strings until you get a NULL pointer/string.
 *
 * The code of looping through this array of strings is:
 *
 *          long i = 0;
 *          while (strings_array[i]) {
 *              ..do stuff here..
 *              i = i + 1;
 *          }
 *
 * The above can be achieved using a for loop also:
 *
 *          long i = 0;
 *          for (i = 0; strings_array[i]; i = i + 1) {
 *              ..do stuff here..
 *          }
 *
 * If you want to skip the empty tokens then the following would be the code for
 * looping through this array of strings:
 *
 *          long i = 0;
 *          while (strings_array[i]) {
 *              if (!*(strings_array[i])) {
 *                  i = i + 1;
 *                  continue;
 *              }
 *              ..do stuff here..
 *              i = i + 1;
 *          }
 *
 * The above can be achieved using a for loop also:
 *
 *          long i = 0;
 *          for (i = 0; strings_array[i]; i = i + 1) {
 *              if (!*(strings_array[i]))
 *                  continue;
 *              ..do stuff here..
 *          }
 *
 *
 * If 'str' is NULL or empty then NULL is returned. NULL is also
returned if memory
 * was not available. To find out what exactly happened, the user can
check whether
 * 'str' is NULL or empty. In case, 'str' is neither NULL nor empty
then it means
 * that memory was not available.
 *
 * 'max_splits' argument is used to control hwo many times 'str'
should be split.
 * If 'max_splits' is less than the number of tokens that would be
ideally generated
 * then the number of tokens is reduced to "max_splits + 1". If max_splits is
 * negative then it means that all tokens should be returned.
 *
 * If 'max_splits' is 0 or 'delim' is NULL or empty string or 'delim'
is not found
 * in 'str' then an array of strings is returned which will have two elements -
 * the first element will be a pointer to a copy of 'str' and the second element
 * will be a NULL pointer/string/element.
 *
 * The return value of this function is a pointer to pointer to character (means
 * a pointer to an array of strings/elements) and it had been allocated using
 * malloc, so it is user's responsibility to free this memory. The user can
 * use the function free_strings_array() to free the strings_array returned by
 * this function.
 */
char **str_split(const char *str, const char *delim, long max_splits);

/*
 * void print_strings_array(char **strings_array):
 *
 * Function print_strings_array() prints all the string elements of
'strings_array'.
 */
void print_strings_array(char **strings_array);

/*
 * void free_strings_array(char **strings_array):
 *
 * Function free_strings_array() frees all the string elements of
'strings_array'.
 * It also frees 'strings_array'.
 */
void free_strings_array(char **strings_array);

/*
 * long get_number_of_strings_in_strings_array(char **strings_array):
 *
 * Function get_number_of_strings_in_strings_array() returns the count of number
 * of elements in 'strings_array'. It is assumed that this array of strings is
 * terminated by a NULL pointer/string/element.
 *
 */
long get_number_of_strings_in_strings_array(char **strings_array);

/*
 * char *str_join(unsigned int skip_null_and_empty_input_strings,
const char *delim, long num_args, ...):
 *
 * Parameters:
 *
 *      num_args: number of variable arguments that are passed to this function
 *                excluding the 'delim' string.
 *      ...: Variable number of "char *" pointers.
 *
 * Description:
 *
 *      Function str_join() concatenates all the strings/character arrays passed
 *      to it. If 'delim' is not NULL or not empty then between every
two strings,
 *      the 'delim' string is concatenated.
 *
 *      If skip_null_and_empty_input_strings is zero then this means
that NULL/empty
 *      strings should be considered valid strings for the purpose of
concatenating
 *      'delim' string - this means that if there is a NON-NULL/non-empty string
 *      in the variable arguments list which is then followed or preceded by a
 *      NULL/empty string then one 'delim' string will be concatenated between
 *      NON-NULL/non-empty string and NULL/empty string. This can be useful in
 *      case columns of a database are concatenated to form a record which will
 *      then be written in a file - so here, a column containing
NULL/empty value
 *      will be represented as empty by having two consecutive 'delim' strings.
 *
 *      If skip_null_and_empty_input_strings is non-zero then this means that
 *      NULL/empty strings should be skipped and no 'delim' string should be
 *      concatenated for them.
 *
 *      Function str_join() allocates a new character array whose size is equal
 *      to the sum of the lengths of all strings passed to it plus 1 (extra 1
 *      for terminating null byte). It then concatenates all the strings passed
 *      to it (these strings are separated by 'delim' string but
please see above
 *      for NULL/empty strings) into the newly allocated character
array and then
 *      returns the pointer to the newly allocated character array. If memory
 *      allocation fails then NULL is returned.
 *
 *      It is the responsibility of the caller to free the allocated
memory (that
 *      is, to free the returned pointer from this function).
 */
char *str_join(unsigned int skip_null_and_empty_input_strings, const
char *delim, long num_args, ...);

/*
 * char *substr(const char *str, long start_index, long end_index):
 *
 * Function substr() allocates memory and returns a pointer to a
string / character
 * array which is a substring of 'str' starting from index 'start_index' till
 * 'end_index' (inclusive). This substring is terminated by null byte
at the end.
 * If 'str' is NULL or 'str' is empty or 'start_index' is less than 0
or 'end_index'
 * is less than 0 or 'end_index' is less than 'start_index' or 'start_index' is
 * greater than length of 'str' - 1 or 'end_index' is greater than length of
 * 'str' - 1 then NULL is returned.
 *
 * The returned pointer points to a memory region containing the
substring and this
 * memory region was allocated using malloc. So, it is the user's
responsibility to
 * free the allocated memory.
 *
 */
char *substr(const char *str, long start_index, long end_index);

#endif

Regards,
Amit

                 reply	other threads:[~2022-04-25  7:35 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAFf+5zjS8E57mvj5gDhW-Qfj+2hB292wzU1Y87jxNYSLwBTaeg@mail.gmail.com \
    --to=amitchoudhary0523@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.