eccLib 1.3.0
Python library for bioinformatics written in C
Loading...
Searching...
No Matches
Macros | Functions | Variables
gtf.c File Reference

Implementations for the GTF module. More...

#include "gtf.h"
#include <ctype.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <Python.h>
#include "../classes/GtfDict/GtfDict.h"
#include "../common.h"
Include dependency graph for gtf.c:

Macros

#define GTF_NONE_VAL   '.'
 
#define MAX_2   0x10000
 
#define MAX_4   0x110000
 
#define IS_CONTINUATION(c)   ((c & 0xC0) == 0x80)
 
#define IS_1LEAD(c)   (c >= 0)
 
#define IS_2LEAD(c)   ((c & 0xE0) == 0xC0)
 
#define IS_4LEAD(c)   ((c & 0xF8) == 0xF0)
 
#define IS_URL_ENCODED(str, i, len)
 
#define REALLOC_EXPR(buf, alloc, min_expr, alloc_expr)
 Helper macro to reallocate a buffer and update the allocation size.
 

Functions

bool validGTFLineToParse (const char *line, size_t len)
 Determines if the provided line is a valid GTF line that can be parsed safely.
 
static uint8_t hex_to_byte (char c)
 Converts a hex character to half a byte.
 
static PyObject * PyUnicode_FromPercentEncoded (const char *str, size_t len)
 Converts a percent encoded string to a Python unicode string.
 
static PyObject * lookup_str (hashmap_t *map, const char *key, size_t len)
 Looks up a string in the hashmap, and if it doesn't exist, creates it.
 
static int add_key_value (hashmap_t *restrict attributes, hashmap_t *restrict attr_keys, hashmap_t *restrict attr_vals, const char *restrict key, size_t keyLen, const char *restrict value, size_t valLen, PyObject *atrr_tp)
 Adds a key-value pair to the GTF dictionary.
 
static int handleGTFAttributes (GtfDict *dict, const occurrence_t *lastoccurrence, PyObject *attr_tp, hashmap_t *restrict attr_keys, hashmap_t *restrict attr_vals)
 Handles the attributes of a GTF line and adds them to the provided dict.
 
static PyObject * process_token_str (const occurrence_t *token, hashmap_t *attr_vals)
 Processes a token as a string.
 
static PyObject * process_token_int (const occurrence_t *token)
 Processes a token as an integer.
 
GtfDictcreateGTFdict (const occurrence_t *token, PyObject *attr_tp, hashmap_t *restrict attr_keys, hashmap_t *restrict attr_vals)
 
static char * gtf_percent_encode (char *restrict buf, size_t *len, size_t *alloc, const char *restrict in, size_t inLen)
 Percent encodes restricted GTF characters.
 
static char * write_unicode_encoded (PyObject *unicode, char *buf, size_t *size, size_t *alloc)
 Returns the string representation of a Python string, with restricted characters percent encoded.
 
char * write_owned (char *restrict buf, size_t *restrict len, size_t *alloc, const char *restrict in, size_t inLen)
 Writes a string to the output buffer, reallocing if necessary.
 
static int iterate_to_str (void *const context, struct hashmap_element_s *const e)
 
char * GtfDict_dump (GtfDict *restrict const self, char *restrict in, size_t *restrict len, size_t *restrict alloc)
 Dumps the contents of a GtfDict to a string.
 
int GtfDict_as_annotation (PyObject *restrict object, long *restrict start, long *restrict end)
 Converts a GtfDict or mapping object to an annotation.
 

Variables

static const char missing = '.'
 
static const char break_char = '\t'
 
static const char attr_break [] = " \""
 
static const char attr_finish [] = "\";"
 
const char * keywords [CORE_FIELD_COUNT]
 Array containing the keywords of the GTF fields.
 
const uint8_t keyword_sizes [CORE_FIELD_COUNT] = {7, 6, 7, 5, 3, 5, 7, 5}
 Array containing the sizes of the keywords.
 
const PyTypeObject * keyword_types [CORE_FIELD_COUNT]
 Array containing the types of the keywords.
 

Detailed Description

Implementations for the GTF module.

Macro Definition Documentation

◆ GTF_NONE_VAL

#define GTF_NONE_VAL   '.'

◆ IS_1LEAD

#define IS_1LEAD (   c)    (c >= 0)

◆ IS_2LEAD

#define IS_2LEAD (   c)    ((c & 0xE0) == 0xC0)

◆ IS_4LEAD

#define IS_4LEAD (   c)    ((c & 0xF8) == 0xF0)

◆ IS_CONTINUATION

#define IS_CONTINUATION (   c)    ((c & 0xC0) == 0x80)

◆ IS_URL_ENCODED

#define IS_URL_ENCODED (   str,
  i,
  len 
)
Value:
(str[i] == '%' && i + 2 < len && isxdigit(str[i + 1]) && \
isxdigit(str[i + 2]))

◆ MAX_2

#define MAX_2   0x10000

◆ MAX_4

#define MAX_4   0x110000

◆ REALLOC_EXPR

#define REALLOC_EXPR (   buf,
  alloc,
  min_expr,
  alloc_expr 
)
Value:
while (buf == NULL || *alloc < min_expr) { \
*alloc = alloc_expr; \
buf = realloc(buf, *alloc); \
if (buf == NULL) { \
return NULL; \
} \
}

Helper macro to reallocate a buffer and update the allocation size.

Parameters
bufthe buffer to reallocate
allocthe allocation size to update
min_exprthe minimum allocation size
alloc_exprthe expression to calculate the new allocation size
Note
This does use while, but should be fine if you just need an if

Function Documentation

◆ add_key_value()

static int add_key_value ( hashmap_t *restrict  attributes,
hashmap_t *restrict  attr_keys,
hashmap_t *restrict  attr_vals,
const char *restrict  key,
size_t  keyLen,
const char *restrict  value,
size_t  valLen,
PyObject *  atrr_tp 
)
inlinestatic

Adds a key-value pair to the GTF dictionary.

Parameters
attributesthe dictionary to add the key-value pair to
attr_keysthe cache of previously seen attribute keys
attr_valsthe cache of previously seen attribute values
keythe key to add
keyLenthe length of the key
valuethe value to add
valLenthe length of the value
atrr_tpPython Dict or None, if dict then its key->Callable
Returns
-1 on error

This is a convenience function to improve readability of the handleGTFAttributes function

Here is the call graph for this function:
Here is the caller graph for this function:

◆ createGTFdict()

GtfDict * createGTFdict ( const occurrence_t token,
PyObject *  attr_tp,
hashmap_t *restrict  attr_keys,
hashmap_t *restrict  attr_vals 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gtf_percent_encode()

static char * gtf_percent_encode ( char *restrict  buf,
size_t *  len,
size_t *  alloc,
const char *restrict  in,
size_t  inLen 
)
static

Percent encodes restricted GTF characters.

Parameters
bufthe buffer to store the encoded string
lenthe length of the encoded string. Must be a valid pointer
allocthe allocated size of the buffer
inthe string to percent encode
inLenthe length of the string to percent encode
Returns
a newly allocated string with percent encoded characters
Note
buf may be NULL, if there is no pre-allocated buffer
Warning
regardless of whether buf is NULL. len and alloc must be valid pointers; set to 0 if buf is NULL
Here is the call graph for this function:
Here is the caller graph for this function:

◆ GtfDict_as_annotation()

int GtfDict_as_annotation ( PyObject *restrict  object,
long *restrict  start,
long *restrict  end 
)

Converts a GtfDict or mapping object to an annotation.

Parameters
objectthe object to convert
starta pointer to the start of the annotation
enda pointer to the end of the annotation
Returns
0 on success, -1 on failure
Here is the caller graph for this function:

◆ GtfDict_dump()

char * GtfDict_dump ( GtfDict *restrict const  self,
char *restrict  in,
size_t *restrict  len,
size_t *restrict  alloc 
)

Dumps the contents of a GtfDict to a string.

Parameters
selfthe GtfDict to dump
inthe input buffer to write to
lena pointer to the length of the output string
alloca pointer to the size of the allocated output string
Returns
the output buffer
Note
if no allocation is done, in is returned unchanged
Warning
this function takes ownership of in and will free it if necessary
Here is the call graph for this function:
Here is the caller graph for this function:

◆ handleGTFAttributes()

static int handleGTFAttributes ( GtfDict dict,
const occurrence_t lastoccurrence,
PyObject *  attr_tp,
hashmap_t *restrict  attr_keys,
hashmap_t *restrict  attr_vals 
)
inlinestatic

Handles the attributes of a GTF line and adds them to the provided dict.

Parameters
dictthe dict to which the attributes should be added
lastoccurrencethe last occurrence of the attributes in the GTF line
attr_tpa mapping containing the callable to use to convert the attribute values to the correct type, or None
attr_keysset of previously seen attribute keys
attr_valsset of previously seen attribute values
Returns
-1 on error

This function parses the key-value section of the GTF line. It utilizes a hashmap cache to store the keys of the attributes, and utilizes a unique encoding processing function

Here is the call graph for this function:
Here is the caller graph for this function:

◆ hex_to_byte()

static uint8_t hex_to_byte ( char  c)
inlinestatic

Converts a hex character to half a byte.

Parameters
cthe character to convert
Returns
the half a byte represented by the character
Here is the caller graph for this function:

◆ iterate_to_str()

static int iterate_to_str ( void *const  context,
struct hashmap_element_s *const  e 
)
static
Here is the call graph for this function:
Here is the caller graph for this function:

◆ lookup_str()

static PyObject * lookup_str ( hashmap_t *  map,
const char *  key,
size_t  len 
)
inlinestatic

Looks up a string in the hashmap, and if it doesn't exist, creates it.

Parameters
mapthe hashmap to look up the string in
keythe key to look up
lenthe length of the key
Returns
the found or created object as a new reference, or NULL on error
Here is the call graph for this function:
Here is the caller graph for this function:

◆ process_token_int()

static PyObject * process_token_int ( const occurrence_t token)
inlinestatic

Processes a token as an integer.

Parameters
tokenthe token to process
Returns
the processed token
Here is the caller graph for this function:

◆ process_token_str()

static PyObject * process_token_str ( const occurrence_t token,
hashmap_t *  attr_vals 
)
inlinestatic

Processes a token as a string.

Parameters
tokenthe token to process
attr_valsthe cache of previously seen attribute values
Returns
the processed token

This function processes a token as a string, meant to be used during core field parsing

Here is the call graph for this function:
Here is the caller graph for this function:

◆ PyUnicode_FromPercentEncoded()

static PyObject * PyUnicode_FromPercentEncoded ( const char *  str,
size_t  len 
)
static

Converts a percent encoded string to a Python unicode string.

Parameters
strthe token to convert
lenthe length of the token
Returns
a Python unicode string or NULL on error
Here is the call graph for this function:
Here is the caller graph for this function:

◆ write_owned()

char * write_owned ( char *restrict  buf,
size_t *restrict  len,
size_t *  alloc,
const char *restrict  in,
size_t  inLen 
)

Writes a string to the output buffer, reallocing if necessary.

Parameters
bufThe output buffer
lenThe length of the output buffer
alloca pointer to the size of the allocated output buffer
inThe string to write
inLenThe length of the string to write
Returns
The updated output buffer
Here is the caller graph for this function:

◆ write_unicode_encoded()

static char * write_unicode_encoded ( PyObject *  unicode,
char *  buf,
size_t *  size,
size_t *  alloc 
)
static

Returns the string representation of a Python string, with restricted characters percent encoded.

Parameters
unicodethe string to encode
bufthe buffer to write to
sizethe size of the buffer
allocthe size of the allocated buffer
Returns
a newly allocated string with percent encoded characters
Here is the call graph for this function:
Here is the caller graph for this function:

Variable Documentation

◆ attr_break

const char attr_break[] = " \""
static

◆ attr_finish

const char attr_finish[] = "\";"
static

◆ break_char

const char break_char = '\t'
static

◆ missing

const char missing = '.'
static