eccLib 1.1.0
Python library for bioinformatics written in C
Loading...
Searching...
No Matches
Macros | Functions | Variables
gtf.c File Reference

Implementations for the GTF module. More...

#include "gtf.h"
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <Python.h>
#include "../classes/GtfDict.h"
#include "../common.h"
Include dependency graph for gtf.c:

Macros

#define GTF_NONE_VAL   '.'
 
#define MAX_2   0x10000
 
#define MAX_4   0x110000
 
#define IS_CONTINUATION(c)   ((c & 0xC0) == 0x80)
 
#define IS_1LEAD(c)   (c >= 0)
 
#define IS_2LEAD(c)   ((c & 0xE0) == 0xC0)
 
#define IS_4LEAD(c)   ((c & 0xF8) == 0xF0)
 
#define IS_URL_ENCODED(str, i, len)
 

Functions

bool validGTFLineToParse (const char *line, size_t len)
 Determines if the provided line is a valid GTF line that can be parsed safely.
 
static uint8_t hex_to_byte (char c)
 Converts a hex character to half a byte.
 
static PyObject * PyUnicode_FromPercentEncoded (const char *str, size_t len)
 Converts a percent encoded string to a Python unicode string.
 
static PyObject * lookup_str (hashmap_t *map, const char *key, size_t len)
 Looks up a string in the hashmap, and if it doesn't exist, creates it.
 
static int add_key_value (hashmap_t *restrict attributes, hashmap_t *restrict attr_keys, hashmap_t *restrict attr_vals, const char *restrict key, size_t keyLen, const char *restrict value, size_t valLen, PyObject *atrr_tp)
 Adds a key-value pair to the GTF dictionary.
 
static int handleGTFAttributes (GtfDict *dict, const occurrence_t *lastoccurrence, PyObject *attr_tp, hashmap_t *restrict attr_keys, hashmap_t *restrict attr_vals)
 Handles the attributes of a GTF line and adds them to the provided dict.
 
static PyObject * process_token_str (const occurrence_t *token, hashmap_t *attr_vals)
 Processes a token as a string.
 
static PyObject * process_token_int (const occurrence_t *token)
 Processes a token as an integer.
 
GtfDictcreateGTFdict (const occurrence_t *token, PyObject *attr_tp, hashmap_t *restrict attr_keys, hashmap_t *restrict attr_vals)
 
char * gtf_percent_encode (const char *restrict str, size_t len, size_t *restrict outLen)
 Percent encodes restricted GTF characters.
 

Variables

const char * keywords []
 Array containing the keywords of the GTF fields.
 
const uint8_t keyword_sizes [CORE_FIELD_COUNT] = {7, 6, 7, 5, 3, 5, 7, 5}
 Array containing the sizes of the keywords.
 

Detailed Description

Implementations for the GTF module.

Macro Definition Documentation

◆ GTF_NONE_VAL

#define GTF_NONE_VAL   '.'

◆ IS_1LEAD

#define IS_1LEAD (   c)    (c >= 0)

◆ IS_2LEAD

#define IS_2LEAD (   c)    ((c & 0xE0) == 0xC0)

◆ IS_4LEAD

#define IS_4LEAD (   c)    ((c & 0xF8) == 0xF0)

◆ IS_CONTINUATION

#define IS_CONTINUATION (   c)    ((c & 0xC0) == 0x80)

◆ IS_URL_ENCODED

#define IS_URL_ENCODED (   str,
  i,
  len 
)
Value:
(str[i] == '%' && i + 2 < len && isxdigit(str[i + 1]) && \
isxdigit(str[i + 2]))

◆ MAX_2

#define MAX_2   0x10000

◆ MAX_4

#define MAX_4   0x110000

Function Documentation

◆ add_key_value()

static int add_key_value ( hashmap_t *restrict  attributes,
hashmap_t *restrict  attr_keys,
hashmap_t *restrict  attr_vals,
const char *restrict  key,
size_t  keyLen,
const char *restrict  value,
size_t  valLen,
PyObject *  atrr_tp 
)
inlinestatic

Adds a key-value pair to the GTF dictionary.

Parameters
attributesthe dictionary to add the key-value pair to
attr_keysthe cache of previously seen attribute keys
attr_valsthe cache of previously seen attribute values
keythe key to add
keyLenthe length of the key
valuethe value to add
valLenthe length of the value
atrr_tpPython Dict or None, if dict then its key->Callable
Returns
-1 on error

This is a convenience function to improve readability of the handleGTFAttributes function

Here is the call graph for this function:
Here is the caller graph for this function:

◆ createGTFdict()

GtfDict * createGTFdict ( const occurrence_t token,
PyObject *  attr_tp,
hashmap_t *restrict  attr_keys,
hashmap_t *restrict  attr_vals 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gtf_percent_encode()

char * gtf_percent_encode ( const char *restrict  str,
size_t  len,
size_t *restrict  outLen 
)

Percent encodes restricted GTF characters.

Parameters
strthe string to percent encode
lenthe length of the string
outLenthe length of the output string
Returns
a newly allocated string with percent encoded characters
Here is the call graph for this function:
Here is the caller graph for this function:

◆ handleGTFAttributes()

static int handleGTFAttributes ( GtfDict dict,
const occurrence_t lastoccurrence,
PyObject *  attr_tp,
hashmap_t *restrict  attr_keys,
hashmap_t *restrict  attr_vals 
)
inlinestatic

Handles the attributes of a GTF line and adds them to the provided dict.

Parameters
dictthe dict to which the attributes should be added
lastoccurrencethe last occurrence of the attributes in the GTF line
attr_tpa mapping containing the callable to use to convert the attribute values to the correct type, or None
attr_keysset of previously seen attribute keys
attr_valsset of previously seen attribute values
Returns
-1 on error

This function parses the key-value section of the GTF line. It utilizes a hashmap cache to store the keys of the attributes, and utilizes a unique encoding processing function

Here is the call graph for this function:
Here is the caller graph for this function:

◆ hex_to_byte()

static uint8_t hex_to_byte ( char  c)
inlinestatic

Converts a hex character to half a byte.

Parameters
cthe character to convert
Returns
the half a byte represented by the character
Here is the caller graph for this function:

◆ lookup_str()

static PyObject * lookup_str ( hashmap_t *  map,
const char *  key,
size_t  len 
)
inlinestatic

Looks up a string in the hashmap, and if it doesn't exist, creates it.

Parameters
mapthe hashmap to look up the string in
keythe key to look up
lenthe length of the key
Returns
the found or created object as a new reference, or NULL on error
Here is the call graph for this function:
Here is the caller graph for this function:

◆ process_token_int()

static PyObject * process_token_int ( const occurrence_t token)
inlinestatic

Processes a token as an integer.

Parameters
tokenthe token to process
Returns
the processed token
Here is the caller graph for this function:

◆ process_token_str()

static PyObject * process_token_str ( const occurrence_t token,
hashmap_t *  attr_vals 
)
inlinestatic

Processes a token as a string.

Parameters
tokenthe token to process
attr_valsthe cache of previously seen attribute values
Returns
the processed token

This function processes a token as a string, meant to be used during core field parsing

Here is the call graph for this function:
Here is the caller graph for this function:

◆ PyUnicode_FromPercentEncoded()

static PyObject * PyUnicode_FromPercentEncoded ( const char *  str,
size_t  len 
)
static

Converts a percent encoded string to a Python unicode string.

Parameters
strthe token to convert
lenthe length of the token
Returns
a Python unicode string or NULL on error
Here is the call graph for this function:
Here is the caller graph for this function: