|
eccLib 1.3.0
Python library for bioinformatics written in C
|
Implementations for the GTF module. More...
#include "gtf.h"#include <ctype.h>#include <stdbool.h>#include <stdint.h>#include <stdlib.h>#include <string.h>#include <Python.h>#include "../classes/GtfDict/GtfDict.h"#include "../common.h"
Macros | |
| #define | GTF_NONE_VAL '.' |
| #define | MAX_2 0x10000 |
| #define | MAX_4 0x110000 |
| #define | IS_CONTINUATION(c) ((c & 0xC0) == 0x80) |
| #define | IS_1LEAD(c) (c >= 0) |
| #define | IS_2LEAD(c) ((c & 0xE0) == 0xC0) |
| #define | IS_4LEAD(c) ((c & 0xF8) == 0xF0) |
| #define | IS_URL_ENCODED(str, i, len) |
| #define | REALLOC_EXPR(buf, alloc, min_expr, alloc_expr) |
| Helper macro to reallocate a buffer and update the allocation size. | |
Functions | |
| bool | validGTFLineToParse (const char *line, size_t len) |
| Determines if the provided line is a valid GTF line that can be parsed safely. | |
| static uint8_t | hex_to_byte (char c) |
| Converts a hex character to half a byte. | |
| static PyObject * | PyUnicode_FromPercentEncoded (const char *str, size_t len) |
| Converts a percent encoded string to a Python unicode string. | |
| static PyObject * | lookup_str (hashmap_t *map, const char *key, size_t len) |
| Looks up a string in the hashmap, and if it doesn't exist, creates it. | |
| static int | add_key_value (hashmap_t *restrict attributes, hashmap_t *restrict attr_keys, hashmap_t *restrict attr_vals, const char *restrict key, size_t keyLen, const char *restrict value, size_t valLen, PyObject *atrr_tp) |
| Adds a key-value pair to the GTF dictionary. | |
| static int | handleGTFAttributes (GtfDict *dict, const occurrence_t *lastoccurrence, PyObject *attr_tp, hashmap_t *restrict attr_keys, hashmap_t *restrict attr_vals) |
| Handles the attributes of a GTF line and adds them to the provided dict. | |
| static PyObject * | process_token_str (const occurrence_t *token, hashmap_t *attr_vals) |
| Processes a token as a string. | |
| static PyObject * | process_token_int (const occurrence_t *token) |
| Processes a token as an integer. | |
| GtfDict * | createGTFdict (const occurrence_t *token, PyObject *attr_tp, hashmap_t *restrict attr_keys, hashmap_t *restrict attr_vals) |
| static char * | gtf_percent_encode (char *restrict buf, size_t *len, size_t *alloc, const char *restrict in, size_t inLen) |
| Percent encodes restricted GTF characters. | |
| static char * | write_unicode_encoded (PyObject *unicode, char *buf, size_t *size, size_t *alloc) |
| Returns the string representation of a Python string, with restricted characters percent encoded. | |
| char * | write_owned (char *restrict buf, size_t *restrict len, size_t *alloc, const char *restrict in, size_t inLen) |
| Writes a string to the output buffer, reallocing if necessary. | |
| static int | iterate_to_str (void *const context, struct hashmap_element_s *const e) |
| char * | GtfDict_dump (GtfDict *restrict const self, char *restrict in, size_t *restrict len, size_t *restrict alloc) |
| Dumps the contents of a GtfDict to a string. | |
| int | GtfDict_as_annotation (PyObject *restrict object, long *restrict start, long *restrict end) |
| Converts a GtfDict or mapping object to an annotation. | |
Variables | |
| static const char | missing = '.' |
| static const char | break_char = '\t' |
| static const char | attr_break [] = " \"" |
| static const char | attr_finish [] = "\";" |
| const char * | keywords [CORE_FIELD_COUNT] |
| Array containing the keywords of the GTF fields. | |
| const uint8_t | keyword_sizes [CORE_FIELD_COUNT] = {7, 6, 7, 5, 3, 5, 7, 5} |
| Array containing the sizes of the keywords. | |
| const PyTypeObject * | keyword_types [CORE_FIELD_COUNT] |
| Array containing the types of the keywords. | |
Implementations for the GTF module.
| #define GTF_NONE_VAL '.' |
| #define IS_1LEAD | ( | c | ) | (c >= 0) |
| #define IS_2LEAD | ( | c | ) | ((c & 0xE0) == 0xC0) |
| #define IS_4LEAD | ( | c | ) | ((c & 0xF8) == 0xF0) |
| #define IS_CONTINUATION | ( | c | ) | ((c & 0xC0) == 0x80) |
| #define IS_URL_ENCODED | ( | str, | |
| i, | |||
| len | |||
| ) |
| #define MAX_2 0x10000 |
| #define MAX_4 0x110000 |
| #define REALLOC_EXPR | ( | buf, | |
| alloc, | |||
| min_expr, | |||
| alloc_expr | |||
| ) |
Helper macro to reallocate a buffer and update the allocation size.
| buf | the buffer to reallocate |
| alloc | the allocation size to update |
| min_expr | the minimum allocation size |
| alloc_expr | the expression to calculate the new allocation size |
|
inlinestatic |
Adds a key-value pair to the GTF dictionary.
| attributes | the dictionary to add the key-value pair to |
| attr_keys | the cache of previously seen attribute keys |
| attr_vals | the cache of previously seen attribute values |
| key | the key to add |
| keyLen | the length of the key |
| value | the value to add |
| valLen | the length of the value |
| atrr_tp | Python Dict or None, if dict then its key->Callable |
This is a convenience function to improve readability of the handleGTFAttributes function


| GtfDict * createGTFdict | ( | const occurrence_t * | token, |
| PyObject * | attr_tp, | ||
| hashmap_t *restrict | attr_keys, | ||
| hashmap_t *restrict | attr_vals | ||
| ) |


|
static |
Percent encodes restricted GTF characters.
| buf | the buffer to store the encoded string |
| len | the length of the encoded string. Must be a valid pointer |
| alloc | the allocated size of the buffer |
| in | the string to percent encode |
| inLen | the length of the string to percent encode |


| int GtfDict_as_annotation | ( | PyObject *restrict | object, |
| long *restrict | start, | ||
| long *restrict | end | ||
| ) |
Converts a GtfDict or mapping object to an annotation.
| object | the object to convert |
| start | a pointer to the start of the annotation |
| end | a pointer to the end of the annotation |

| char * GtfDict_dump | ( | GtfDict *restrict const | self, |
| char *restrict | in, | ||
| size_t *restrict | len, | ||
| size_t *restrict | alloc | ||
| ) |
Dumps the contents of a GtfDict to a string.
| self | the GtfDict to dump |
| in | the input buffer to write to |
| len | a pointer to the length of the output string |
| alloc | a pointer to the size of the allocated output string |


|
inlinestatic |
Handles the attributes of a GTF line and adds them to the provided dict.
| dict | the dict to which the attributes should be added |
| lastoccurrence | the last occurrence of the attributes in the GTF line |
| attr_tp | a mapping containing the callable to use to convert the attribute values to the correct type, or None |
| attr_keys | set of previously seen attribute keys |
| attr_vals | set of previously seen attribute values |
This function parses the key-value section of the GTF line. It utilizes a hashmap cache to store the keys of the attributes, and utilizes a unique encoding processing function


|
inlinestatic |
Converts a hex character to half a byte.
| c | the character to convert |

|
static |


|
inlinestatic |
Looks up a string in the hashmap, and if it doesn't exist, creates it.
| map | the hashmap to look up the string in |
| key | the key to look up |
| len | the length of the key |


|
inlinestatic |
Processes a token as an integer.
| token | the token to process |

|
inlinestatic |
Processes a token as a string.
| token | the token to process |
| attr_vals | the cache of previously seen attribute values |
This function processes a token as a string, meant to be used during core field parsing


|
static |
Converts a percent encoded string to a Python unicode string.
| str | the token to convert |
| len | the length of the token |


| char * write_owned | ( | char *restrict | buf, |
| size_t *restrict | len, | ||
| size_t * | alloc, | ||
| const char *restrict | in, | ||
| size_t | inLen | ||
| ) |
Writes a string to the output buffer, reallocing if necessary.
| buf | The output buffer |
| len | The length of the output buffer |
| alloc | a pointer to the size of the allocated output buffer |
| in | The string to write |
| inLen | The length of the string to write |

|
static |
Returns the string representation of a Python string, with restricted characters percent encoded.
| unicode | the string to encode |
| buf | the buffer to write to |
| size | the size of the buffer |
| alloc | the size of the allocated buffer |


|
static |
|
static |
|
static |
|
static |