/*
# cgetline.c
# $Id: cgetline.c 3132 2024-09-11 12:54:07Z darcy $
# Written by D'Arcy J.M. Cain
# darcy@druid.net
# Copyright 1991 - 2024

# This code may be used on any computer system for any purpose by anyone

NAME
	cgetline, xgetline, sgetline

SYNOPSIS
	char *cgetline(FILE *fp, char *buf);
	char *xgetline(FILE *fp, char *buf, size_t *linenum);
	char *sgetline(FILE *fp, char *buf);

	xcat [file ...] # shell function

DESCRIPTION
	Reads a line from the stream given by fp and returns a pointer to the
	string.  There is no length restriction on the returned string.  Space
	is dynamically allocated for the string as needed.  The pointer given
	is realloced so the calling function should not expect the pointer to be
	valid after the call.  It should reset the pointer to the return value
	of this function.  At end of file the space will be freed.  The way
	to use this function would be something like this:

		char *buf = NULL;
		FILE *fp;
		... open file ...
		while ((buf = cgetline(fp, buf)) != NULL)
			... do stuff with buf ...

	Because the xgetline may read an unknown number of lines, the linenum
	variable allows the calling process to track the real line number.  If
	it is non-NULL then it is taken as a pointer to a variable that gets
	incremented each time the function reads another raw line in.

	In the xgetline version anything from '#' till the end of line is
	ignored and A  trailing '\' character is treated as a continuation
	character.  After this processing the resulting line is ignored if
	it is empty.  The '#' and '\' characters can be included by preceding
	them with a '\'.  Note that the leading backslash is left in the input
	string and must be dealt with by the caller.  This can be somewhat of a
	problem at the end of a line but in general should be workable.

	If the caller wants to keep the memory it should call with the buf
	argument set to NULL every time.

	Defining XCAT causes a main function to be included allowing the
	xgetline function to be available in shell programs.

	Note there used to be a curses version but it was so seldom used
	that I removed it for simplicity.  You generally don't want to
	have unbounded input in a curses window.

	The sgetline version is for a special case in one of my packages
	and you shouldn't worry about it.

RETURNS
	A pointer to the string without the terminating newline is returned
	if successful or NULL if there was an error or end of file.  Use
	feof(3) and ferror(3) to find out if it was a file error, memory
	allocation problem or EOF condition.

AUTHOR
	D'Arcy J.M. Cain (darcy@druid.net)
*/

#include	"cain.h"

#include	<ctype.h>
#include	<errno.h>
#include	<stdlib.h>

/* I originally was going to use 80 here as the most common case but */
/* decided that a few extra bytes to save a malloc from time to time */
/* would be a better choice.  Comments welcome.  */
#define		CHUNK	128

/* XCAT implies XGETLINE */
#ifdef	XCAT
#ifndef	XGETLINE_VERSION
#define	XGETLINE_VERSION
#endif
#endif

#ifdef	STREAM_VERSION
#	include	"stream.h"
#	define		INPUT_STREAM		int
#else
#	define		INPUT_STREAM		FILE *
#endif	/* STREAM_VERSION */

#ifdef	XGETLINE_VERSION
// Add special use characters here
// These are used to protect backslashed characters
// We can add more as they become useful
static const char ECHAR[] = "&@!,)$#\n(%|~\"'";
#define ECHAR_NL 0xe00d

static int eof;
static long stash = 0;
static long
xgetc(INPUT_STREAM fp, size_t *linenum)
{
    long c;
    size_t i;

    if (stash)
    {
        c = stash;
        stash = 0;
        return c;
    }

    if (eof) return EOF;
    c = getc(fp);
    if (c == EOF)
    {
        eof = 1;
        return EOF;
    }

    if (c == '\n' && linenum) (*linenum)++;
    if (c != '\\') return c; // simple if not a backslash

    if ((c = getc(fp)) == '\n') // is backslash so get next one
    {
        // newline is a special case
        if (linenum) (*linenum)++; // line number incremented regardless
        return ECHAR_NL;
    }

    // see if character in ECHAR
    for (i = 0; ECHAR[i]; i++)
    {
        if (c == ECHAR[i])
        {
            stash = c | 0x80;
            return 0xe0;
        }
    }

    // Not found in ECHAR so just return as is
    stash = c;
    return '\\';
}

static int
xputc(int c, FILE *fp)
{
    static int e0 = 0;

    if (stash)
    {
        c = stash;
        stash = 0;
    }

    if (e0)
    {
        c &= 0x7f;
        e0 = 0;
    }
    if (c == 0xe0)
    {
        e0 = 1;
        return 0;
    }

    return putc(c, fp);
}

int
xputline(FILE *fp, const char *s)
{
    int c;

    while ((c = *s++))
    {
        if (c == '\\')
        {
            c = *s++;
            switch (c)
            {
                case 'n':
                    xputc('\n', fp);
                    break;

                default:
                    xputc('\\', fp);
                    xputc(c, fp);
                    break;
            }
            continue;

        }
        else
            xputc(c, fp);
    }

    xputc('\n', fp);
    return ferror(fp);
}

char *
xgetline(INPUT_STREAM fp, char *buf, size_t *linenum)
#else
char *
cgetline(INPUT_STREAM fp, char *buf)
#endif
{
	size_t	sz = 0;			// this keeps track of the current size of buffer
	size_t	i = 0;			// index into string tracking current position
	int		c;				// to store getc return
#ifdef	XGETLINE_VERSION
	int		in_comment = 0;	// if we are in a comment
	int		in_quote = 0;	// if we are in quote and holds quote character
#endif

	/* no real good idea for handling this */
	if (!fp)
		return(buf);

	/* get characters from stream until EOF */
#ifdef	XGETLINE_VERSION
    eof = stash = 0;

	while ((c = xgetc(fp, linenum)) != EOF)
	{
        if (c == ECHAR_NL || c == '\n')
        {
            if (c == ECHAR_NL) c = ' ';
            in_comment = in_quote = 0;      /* nl ends comment and quote */
        }

        // skip leading space at start of line
        if (!i && (isspace(c)))
            continue;
#else
	while ((c = getc(fp)) != EOF && c != '\n')
    {
#endif
		/* check for buffer overflow */
		if ((sz - i) < 2)
        {
			if ((buf = realloc(buf, (sz += CHUNK) + 2)) == NULL)
				return(NULL);
        }

#ifdef	XGETLINE_VERSION
        if (c == ECHAR_NL) // check for continuation
        {
            c = ' ';
		    in_comment = in_quote = 0;		/* nl ends comment and quote */
        }
        else if (c == '#')
        {
            c = ' ';
            in_comment = 1;
            continue;
        }
		else if (c == '\n')
		{
            buf[i] = 0;
		    in_comment = in_quote = 0;		/* nl ends comment and quote */
            if (!i) continue; // redundant

			/* lose trailing spaces */
            while (i && isspace(buf[i])) buf[i--] = 0;
            if (!i) continue;

			if (i)
                return buf;

            continue;
		}
        else if (in_comment)
            continue;
		else if (in_quote)
		{
			if (c == in_quote)
				in_quote = 0;
		}
		else if (c == '\'' || c == '"')
		{
			in_quote = c;
		}

        if (!in_comment) buf[i++] = c;
#else
		/* the following needed in case we are in cbreak or raw mode */
		if (c != '\b') buf[i++] = c;
		else if (i) i--;

        buf[i++] = c;
#endif
	}

	/* is there anything to return? */
	if (i)
	{
	    buf[i] = 0;
	    return(buf);
	}

    // otherwise we are at end of file
	free(buf);
	return(NULL);

	buf[i] = 0;
	return(buf);
}

#ifdef	TEST_MODULE
int		main(void)
{
	char	*p;

	while ((p = cgetline(stdin, 0)) != NULL)
		printf("%s\n", p);

	return(0);
}
#endif

#ifdef	XCAT
#include	<string.h>

static void
xcat(FILE *fp)
{
	char	*p = NULL;

	while ((p = xgetline(fp, p, 0)) != NULL)
		printf("%s\n", p);
}

int
main(int argc, char **argv)
{
	FILE	*fp;
	int		k;

	if (argc < 2)
		xcat(stdin);
	else for (k = 1; k < argc; k++)
	{
		if ((fp = fopen(argv[k], "r")) == NULL)
			fprintf(stderr, "xcat: Can't open file %s - %s\n",
								argv[k], strerror(errno));
		else
		{
			xcat(fp);
			fclose(fp);
		}
	}

	return(0);
}
#endif	/* XCAT */

