*******************************************************************************

    Memory management

    The data type \code{mat_csr_t} stores $m \times n$ matrices in a variant 
    of the compressed row format.

    On a technical note, the \code{mat_csr_t} object is an array of length 
    one for a \code{mat_csr} structure.

    A \code{mat_csr} structure for an $m \times n$ matrix comprises the 
    following data:
    \begin{itemize}
    \item Two positive \code{long} integers \code{m} and \code{n}, holding 
          the matrix dimensions.  These are set at creation time and are 
          not allowed to be changed during the lifetime of this structure.
    \item A non-negative \code{long} integer \code{alloc} which morally 
          denotes the length of the arrays \code{x} and \code{j}.  If 
          \code{alloc} is zero, these two arrays might be \code{NULL}.
    \item An array \code{x}, which is either \code{NULL} or of length 
          \code{alloc} times \code{ctx->size}, where \code{ctx} is the 
          context at the time of creation.  That is, \code{x} has space 
          for \code{alloc} elements of the type of the base ring.
    \item An array \code{j}, which is either \code{NULL} or of length 
          \code{alloc}.
    \item An array \code{p} of length~\code{m} such that \code{p[i]} 
          gives the least index into \code{x} and \code{j} referring to 
          an element in row~\code{i}.
    \item An array \code{lenr} of length~\code{m} such that \code{lenr[i]} 
          gives the number of non-zero elements in row~\code{i}.
    \end{itemize}

    We assume that the non-zero entries of any one row of the matrix 
    are stored in contiguous blocks in the arrays \code{x} and \code{j}. 
    However, unless specifically stated otherwise, we do not enforce 
    any particular ordering of the entries within each such block, and 
    we do not enforce any particular ordering of the rows.  Thus, the 
    common way to access the data is:
    \begin{lstlisting}[language=c]
    mat_ctx_t ctx;
    mat_csr_t A;
    long i, q;

    for (i = 0; i < A->m; i++)
    {
        for (q = A->p[i]; q < A->p[i] + A->lenr[i]; q++)
        {
            long j  = A->j[q];
            void *x = A->x + (q * ctx->size);

            /* Non-zero entry at (i, j) with value *x */
        }
    }
    \end{lstlisting}

*******************************************************************************

void mat_csr_init(mat_csr_t A, long m, long n, const mat_ctx_t ctx)

void mat_csr_init2(mat_csr_t A, long m, long n, long alloc, const mat_ctx_t ctx)

void mat_csr_realloc(mat_csr_t A, long alloc, const mat_ctx_t ctx)

void mat_csr_clear(mat_csr_t A, const mat_ctx_t ctx)

void mat_csr_fit_length(mat_csr_t A, long len, const mat_ctx_t ctx)

void mat_csr_sort_rows(mat_csr_t A, const mat_ctx_t ctx)

    Ensures that the column indices in array \code{A->j} appear in sorted 
    order, by re-ordering the two arrays \code{A->j} and \code{A->x} 
    accordingly.

*******************************************************************************

    Assignment

*******************************************************************************

void mat_csr_set_mat(mat_csr_t A, const mat_t mat, const mat_ctx_t ctx)

    Sets the sparse matrix \code{A} to the dense matrix \code{mat}.

void mat_csr_set_array3(mat_csr_t A, char *mem, long len, int copy, 
                        const mat_ctx_t ctx)

    Sets $A$ to the sparse matrix given by the array \code{mem} in 
    coordinate list format.

    The array \code{mem} contains \code{len} elements, each in the form 
    of row and column indices as \code{long}s followed by the entry, 
    requiring memory of size \code{2 * sizeof(long) + ctx->size}.  The 
    entries are allowed to be zero, and no particular ordering of the 
    list is assumed.

    If \code{copy} is non-zero, the entries are copied into the 
    matrix $A$ and the caller is responsible for clearing the list 
    \code{mem} appropriately, which is important for managed entry 
    types.  Otherwise, it suffices that the caller frees \code{mem}.

void mat_csr_zero(mat_csr_t A, const mat_ctx_t ctx)

    Sets the matrix $A$ to zero.

*******************************************************************************

    Randomisation

*******************************************************************************

void mat_csr_randtest(mat_csr_t A, 
                      flint_rand_t state, double d, const mat_ctx_t ctx)

    Sets $A$ to a random matrix of density $d$.

*******************************************************************************

    Comparison

*******************************************************************************

int mat_csr_is_zero(const mat_csr_t A, const mat_ctx_t ctx)

    Returns whether the matrix $A$ is zero.

*******************************************************************************

    Permutations

*******************************************************************************

void _mat_csr_permute_cols(long m, long n, long *j, long *p, long *lenr, 
                           const long *pi)

void mat_csr_permute_cols(mat_csr_t A, const long *pi, const mat_ctx_t ctx)

    Permutes the columns of $A$ according to the permutation $\pi$.  This 
    means that if $Q$ is the permutation matrix corresponding to $\pi$, 
    this function sets $A$ to $A Q^t$.

    See \code{mat_csr_permute_rows()}.

void _mat_csr_permute_rows(long m, long *p, long *lenr, const long *pi)

void mat_csr_permute_rows(mat_csr_t A, const long *pi, const mat_ctx_t ctx)

    Permutes the rows of $A$ according to the permutation $\pi$.  This 
    means that the $i$th row of the matrix on exit is the same as the 
    $\pi_i$th row of the matrix on entry to this function.  In terms of 
    permutation matrices, if $P$ is defined by 
    \begin{equation*}
    P_{ij} = \begin{cases} 1 & \text{if $j = \pi_i$} \\
                           0 & \text{otherwise} \end{cases}
    \end{equation*}
    then this function sets $A$ to $P A$.

*******************************************************************************

    Multiplication

*******************************************************************************


void mat_csr_mul_vec(char *y, const mat_csr_t A, const char *x, 
                     const mat_ctx_t ctx)

    Sets $y$ to $A x$.  In terms of the entries this means that, for all 
    $0 \leq i < m$, 
    \begin{equation*}
    y_i = \sum_{0 \leq j < n} A_{ij} x_j.
    \end{equation*}

*******************************************************************************

    Linear systems

    This group contains functionality for solving linear systems 
    $A x = b$ with an emphasis on repeatedly solving systems with 
    constant matrix $A$ and varying constraints $b$.

    There is a structure \code{__mat_csr_solve_struct} that encapsulates 
    the information obtained in the preprocessing stage:

    Suppose we are given a non-singular matrix $A$.  First find a permutation 
    $P$ such that $P A$ has only non-zero entries on the diagonal, which is 
    facilitated by \code{zfdiagonal()}.  Next, find a permutation $Q$ such 
    that $Q P A Q^t$ is block triangular.  We set \code{nb} to the number 
    of blocks, and the array \code{B} is such that \code{B[k]} gives the 
    index of the first row belonging to the $k$th block.  We also arrange 
    for \code{B[nb] == m}.

*******************************************************************************

long _mat_csr_block_triangularise(long *arp, long *b, long n, const long *j, 
                                  const long *p, const long *lenr, long *w)

long mat_csr_block_triangularise(long *pi, long *b, const mat_csr_t A, 
                                                    const mat_ctx_t ctx)

    Given a square $n \times n$ matrix $A$, computes a permutation $\pi$ 
    represented by a permutation matrix $Q$ such that $Q A Q^t$ is lower 
    block triangular.

    Returns an integer $\beta$ with $1 \leq \beta \leq n$ that denotes 
    the number of blocks.  The array $b$ is expected to be of length $n$. 
    On exit, for $0 \leq k < \beta$, \code{b[k]} is the lowest row index in 
    the $k$th block.

    All the above mentioned \emph{blocks} are square.

long _mat_csr_zfdiagonal(long *pi, long n, const long *j, const long *p, 
                         const long *lenr, long *w)

    Expects a temporary array $w$ of length $5 n$.

long _mat_csr_zfdiagonal(long *pi, const mat_csr_t A)

    Computes a permutation $\pi$ of rows, which we denote by $P$, such that 
    the matrix $P A$ has the greatest possible number of non-zero elements on 
    the diagonal.  This number is the return value.  For a structurally 
    non-singular matrix $A$, a permutation matrix $P$ can be found such that 
    the diagonal is zero-free.

    The permutation matrix $P$ is derived from the array $\pi$ of length $n$ 
    as follows:
    \begin{equation*}
    P_{ij} = \begin{cases} 1 & \text{if $j = \pi_i$} \\
                           0 & \text{otherwise} \end{cases}
    \end{equation*}

    Assumes that $m = n \geq 1$.

    Note that this function only works with the structural data of the 
    sparse matrix $A$.  That is to say, the array \code{A->x} is never 
    read.

void mat_csr_lupdecompose(mat_csr_t L, mat_csr_t U, long *pi, 
                          const mat_csr_t A, const mat_ctx ctx)

    Asserts that the dimensions of the matrices $L$ and $U$ are the same 
    of that of the square $n \times n$ matrix $A$.  Assumes that the array 
    $\pi$ is an array of length $n$.  Assumes that $A$ is non-singular.

void mat_csr_solve_init(mat_csr_solve_t s, const mat_csr_t mat, 
                        const mat_ctx_t ctx)

    Initialises the solve structure for the matrix \code{mat}.

void mat_csr_solve_clear(mat_csr_solve_t s, const mat_ctx_t ctx)

    Clears the memory occupied internally by the solve structure.

void mat_csr_solve(char *x, const mat_csr_solve_t s, const char *b, 
                   const mat_ctx_t ctx)

    Suppose the solve structure \code{s} was created for an 
    $m \times n$ $A$.  Given a vector $b$ of length $m$, this 
    functions solves the system of linear equations $A x = b$ 
    for a vector $x$ of length $n$.

*******************************************************************************

    Input and output

*******************************************************************************

int mat_csr_debug(const mat_csr_t A, const mat_ctx_t ctx)

    Prints some debugging information about $A$ to \code{stdout}.

int mat_csr_print_dense(const mat_csr_t A, const mat_ctx_t ctx)

    Prints a dense representation of $A$ to \code{stdout}.

