eccLib 1.1.0
Python library for bioinformatics written in C
|
Implementations for the GTF module. More...
#include "gtf.h"
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <Python.h>
#include "../classes/GtfDict.h"
#include "../common.h"
Macros | |
#define | GTF_NONE_VAL '.' |
#define | MAX_2 0x10000 |
#define | MAX_4 0x110000 |
#define | IS_CONTINUATION(c) ((c & 0xC0) == 0x80) |
#define | IS_1LEAD(c) (c >= 0) |
#define | IS_2LEAD(c) ((c & 0xE0) == 0xC0) |
#define | IS_4LEAD(c) ((c & 0xF8) == 0xF0) |
#define | IS_URL_ENCODED(str, i, len) |
Functions | |
bool | validGTFLineToParse (const char *line, size_t len) |
Determines if the provided line is a valid GTF line that can be parsed safely. | |
static uint8_t | hex_to_byte (char c) |
Converts a hex character to half a byte. | |
static PyObject * | PyUnicode_FromPercentEncoded (const char *str, size_t len) |
Converts a percent encoded string to a Python unicode string. | |
static PyObject * | lookup_str (hashmap_t *map, const char *key, size_t len) |
Looks up a string in the hashmap, and if it doesn't exist, creates it. | |
static int | add_key_value (hashmap_t *restrict attributes, hashmap_t *restrict attr_keys, hashmap_t *restrict attr_vals, const char *restrict key, size_t keyLen, const char *restrict value, size_t valLen, PyObject *atrr_tp) |
Adds a key-value pair to the GTF dictionary. | |
static int | handleGTFAttributes (GtfDict *dict, const occurrence_t *lastoccurrence, PyObject *attr_tp, hashmap_t *restrict attr_keys, hashmap_t *restrict attr_vals) |
Handles the attributes of a GTF line and adds them to the provided dict. | |
static PyObject * | process_token_str (const occurrence_t *token, hashmap_t *attr_vals) |
Processes a token as a string. | |
static PyObject * | process_token_int (const occurrence_t *token) |
Processes a token as an integer. | |
GtfDict * | createGTFdict (const occurrence_t *token, PyObject *attr_tp, hashmap_t *restrict attr_keys, hashmap_t *restrict attr_vals) |
char * | gtf_percent_encode (const char *restrict str, size_t len, size_t *restrict outLen) |
Percent encodes restricted GTF characters. | |
Variables | |
const char * | keywords [] |
Array containing the keywords of the GTF fields. | |
const uint8_t | keyword_sizes [CORE_FIELD_COUNT] = {7, 6, 7, 5, 3, 5, 7, 5} |
Array containing the sizes of the keywords. | |
Implementations for the GTF module.
#define GTF_NONE_VAL '.' |
#define IS_1LEAD | ( | c | ) | (c >= 0) |
#define IS_2LEAD | ( | c | ) | ((c & 0xE0) == 0xC0) |
#define IS_4LEAD | ( | c | ) | ((c & 0xF8) == 0xF0) |
#define IS_CONTINUATION | ( | c | ) | ((c & 0xC0) == 0x80) |
#define IS_URL_ENCODED | ( | str, | |
i, | |||
len | |||
) |
#define MAX_2 0x10000 |
#define MAX_4 0x110000 |
|
inlinestatic |
Adds a key-value pair to the GTF dictionary.
attributes | the dictionary to add the key-value pair to |
attr_keys | the cache of previously seen attribute keys |
attr_vals | the cache of previously seen attribute values |
key | the key to add |
keyLen | the length of the key |
value | the value to add |
valLen | the length of the value |
atrr_tp | Python Dict or None, if dict then its key->Callable |
This is a convenience function to improve readability of the handleGTFAttributes function
GtfDict * createGTFdict | ( | const occurrence_t * | token, |
PyObject * | attr_tp, | ||
hashmap_t *restrict | attr_keys, | ||
hashmap_t *restrict | attr_vals | ||
) |
char * gtf_percent_encode | ( | const char *restrict | str, |
size_t | len, | ||
size_t *restrict | outLen | ||
) |
Percent encodes restricted GTF characters.
str | the string to percent encode |
len | the length of the string |
outLen | the length of the output string |
|
inlinestatic |
Handles the attributes of a GTF line and adds them to the provided dict.
dict | the dict to which the attributes should be added |
lastoccurrence | the last occurrence of the attributes in the GTF line |
attr_tp | a mapping containing the callable to use to convert the attribute values to the correct type, or None |
attr_keys | set of previously seen attribute keys |
attr_vals | set of previously seen attribute values |
This function parses the key-value section of the GTF line. It utilizes a hashmap cache to store the keys of the attributes, and utilizes a unique encoding processing function
|
inlinestatic |
Converts a hex character to half a byte.
c | the character to convert |
|
inlinestatic |
Looks up a string in the hashmap, and if it doesn't exist, creates it.
map | the hashmap to look up the string in |
key | the key to look up |
len | the length of the key |
|
inlinestatic |
Processes a token as an integer.
token | the token to process |
|
inlinestatic |
Processes a token as a string.
token | the token to process |
attr_vals | the cache of previously seen attribute values |
This function processes a token as a string, meant to be used during core field parsing
|
static |
Converts a percent encoded string to a Python unicode string.
str | the token to convert |
len | the length of the token |