libdom
Loading...
Searching...
No Matches
string.c File Reference
#include <assert.h>
#include <ctype.h>
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
#include <parserutils/charset/utf8.h>
#include "core/string.h"
#include "core/document.h"
#include "utils/utils.h"

Classes

struct  dom_string_internal
 

Typedefs

typedef struct dom_string_internal dom_string_internal
 

Enumerations

enum  dom_string_type { DOM_STRING_CDATA = 0 , DOM_STRING_INTERNED = 1 }
 

Functions

void dom_string_destroy (dom_string *str)
 
dom_exception dom_string_create (const uint8_t *ptr, size_t len, dom_string **str)
 
dom_exception dom_string_create_interned (const uint8_t *ptr, size_t len, dom_string **str)
 
dom_exception dom_string_intern (dom_string *str, struct lwc_string_s **lwcstr)
 
bool dom_string_isequal (const dom_string *s1, const dom_string *s2)
 
bool dom_string_caseless_isequal (const dom_string *s1, const dom_string *s2)
 
bool dom_string_lwc_isequal (const dom_string *s1, lwc_string *s2)
 
bool dom_string_caseless_lwc_isequal (const dom_string *s1, lwc_string *s2)
 
uint32_t dom_string_index (dom_string *str, uint32_t chr)
 
uint32_t dom_string_rindex (dom_string *str, uint32_t chr)
 
uint32_t dom_string_length (dom_string *str)
 
dom_exception dom_string_at (dom_string *str, uint32_t index, uint32_t *ch)
 
dom_exception dom_string_concat (dom_string *s1, dom_string *s2, dom_string **result)
 
dom_exception dom_string_substr (dom_string *str, uint32_t i1, uint32_t i2, dom_string **result)
 
dom_exception dom_string_insert (dom_string *target, dom_string *source, uint32_t offset, dom_string **result)
 
dom_exception dom_string_replace (dom_string *target, dom_string *source, uint32_t i1, uint32_t i2, dom_string **result)
 
uint32_t dom_string_hash (dom_string *str)
 
dom_exception _dom_exception_from_lwc_error (lwc_error err)
 
const char * dom_string_data (const dom_string *str)
 
size_t dom_string_byte_length (const dom_string *str)
 
dom_exception dom_string_toupper (dom_string *source, bool ascii_only, dom_string **upper)
 
dom_exception dom_string_tolower (dom_string *source, bool ascii_only, dom_string **lower)
 
dom_exception dom_string_whitespace_op (dom_string *s, enum dom_whitespace_op op, dom_string **ret)
 

Typedef Documentation

◆ dom_string_internal

typedef struct dom_string_internal dom_string_internal

A DOM string

Strings are reference counted so destruction is performed correctly.

Enumeration Type Documentation

◆ dom_string_type

Type of a DOM string

Enumerator
DOM_STRING_CDATA 
DOM_STRING_INTERNED 

Function Documentation

◆ _dom_exception_from_lwc_error()

dom_exception _dom_exception_from_lwc_error ( lwc_error err)

Convert a lwc_error to a dom_exception

Parameters
errThe input lwc_error
Returns
the dom_exception

◆ dom_string_at()

dom_exception dom_string_at ( dom_string * str,
uint32_t index,
uint32_t * ch )

Get the UCS4 character at position index

Parameters
indexThe position of the charater
chThe UCS4 character
Returns
DOM_NO_ERR on success, appropriate dom_exception on failure.

◆ dom_string_byte_length()

size_t dom_string_byte_length ( const dom_string * str)

Get the byte length of this dom_string

Parameters
strThe dom_string object

◆ dom_string_caseless_isequal()

bool dom_string_caseless_isequal ( const dom_string * s1,
const dom_string * s2 )

Case insensitively compare two DOM strings

Parameters
s1The first string to compare
s2The second string to compare
Returns
true if strings match, false otherwise

◆ dom_string_caseless_lwc_isequal()

bool dom_string_caseless_lwc_isequal ( const dom_string * s1,
lwc_string * s2 )

Case insensitively compare DOM string with lwc_string

Parameters
s1The first string to compare
s2The second string to compare
Returns
true if strings match, false otherwise

Returns false if either are NULL.

◆ dom_string_concat()

dom_exception dom_string_concat ( dom_string * s1,
dom_string * s2,
dom_string ** result )

Concatenate two dom strings

Parameters
s1The first string
s2The second string
resultPointer to location to receive result
Returns
DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion

The returned string will be referenced. The client should dereference it once it has finished with it.

◆ dom_string_create()

dom_exception dom_string_create ( const uint8_t * ptr,
size_t len,
dom_string ** str )

Create a DOM string from a string of characters

Parameters
ptrPointer to string of characters
lenLength, in bytes, of string of characters
strPointer to location to receive result
Returns
DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion

The returned string will already be referenced, so there is no need to explicitly reference it.

The string of characters passed in will be copied for use by the returned DOM string.

◆ dom_string_create_interned()

dom_exception dom_string_create_interned ( const uint8_t * ptr,
size_t len,
dom_string ** str )

Create an interned DOM string from a string of characters

Parameters
ptrPointer to string of characters
lenLength, in bytes, of string of characters
strPointer to location to receive result
Returns
DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion

The returned string will already be referenced, so there is no need to explicitly reference it.

The string of characters passed in will be copied for use by the returned DOM string.

◆ dom_string_data()

const char * dom_string_data ( const dom_string * str)

Get the raw character data of the dom_string.

Parameters
strThe dom_string object
Returns
The C string pointer
Note
: This function is just provided for the convenience of accessing the raw C string character, no change on the result string is allowed.

◆ dom_string_destroy()

void dom_string_destroy ( dom_string * str)

◆ dom_string_hash()

uint32_t dom_string_hash ( dom_string * str)

Calculate a hash value from a dom string

Parameters
strThe string to calculate a hash of
Returns
The hash value associated with the string

◆ dom_string_index()

uint32_t dom_string_index ( dom_string * str,
uint32_t chr )

Get the index of the first occurrence of a character in a dom string

Parameters
strThe string to search in
chrUCS4 value to look for
Returns
Character index of found character, or -1 if none found

◆ dom_string_insert()

dom_exception dom_string_insert ( dom_string * target,
dom_string * source,
uint32_t offset,
dom_string ** result )

Insert data into a dom string at the given location

Parameters
targetPointer to string to insert into
sourcePointer to string to insert
offsetCharacter offset of location to insert at
resultPointer to location to receive result
Returns
DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion, DOM_INDEX_SIZE_ERR if ::offset > len(::target).

The returned string will have its reference count increased. The client should dereference it once it has finished with it.

◆ dom_string_intern()

dom_exception dom_string_intern ( dom_string * str,
struct lwc_string_s ** lwcstr )

Make the dom_string be interned

Parameters
strThe dom_string to be interned
lwcstrThe result lwc_string
Returns
DOM_NO_ERR on success, appropriate dom_exception on failure.

◆ dom_string_isequal()

bool dom_string_isequal ( const dom_string * s1,
const dom_string * s2 )

Case sensitively compare two DOM strings

Parameters
s1The first string to compare
s2The second string to compare
Returns
true if strings match, false otherwise

◆ dom_string_length()

uint32_t dom_string_length ( dom_string * str)

Get the length, in characters, of a dom string

Parameters
strThe string to measure the length of
Returns
The length of the string, in characters

◆ dom_string_lwc_isequal()

bool dom_string_lwc_isequal ( const dom_string * s1,
lwc_string * s2 )

Case sensitively compare DOM string with lwc_string

Parameters
s1The first string to compare
s2The second string to compare
Returns
true if strings match, false otherwise

Returns false if either are NULL.

◆ dom_string_replace()

dom_exception dom_string_replace ( dom_string * target,
dom_string * source,
uint32_t i1,
uint32_t i2,
dom_string ** result )

Replace a section of a dom string

Parameters
targetPointer to string of which to replace a section
sourcePointer to replacement string
i1Character index of start of region to replace
i2Character index of end of region to replace
resultPointer to location to receive result
Returns
DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion.

The returned string will have its reference count increased. The client should dereference it once it has finished with it.

◆ dom_string_rindex()

uint32_t dom_string_rindex ( dom_string * str,
uint32_t chr )

Get the index of the last occurrence of a character in a dom string

Parameters
strThe string to search in
chrUCS4 value to look for
Returns
Character index of found character, or -1 if none found

◆ dom_string_substr()

dom_exception dom_string_substr ( dom_string * str,
uint32_t i1,
uint32_t i2,
dom_string ** result )

Extract a substring from a dom string

Parameters
strThe string to extract from
i1The character index of the start of the substring
i2The character index of the end of the substring
resultPointer to location to receive result
Returns
DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion

The returned string will have its reference count increased. The client should dereference it once it has finished with it.

◆ dom_string_tolower()

dom_exception dom_string_tolower ( dom_string * source,
bool ascii_only,
dom_string ** lower )

Convert the given string to lowercase

Parameters
source
ascii_onlyWhether to only convert [a-z] to [A-Z]
lowerResult pointer for lowercase string. Caller owns ref
Returns
DOM_NO_ERR on success.
Note
Right now, will return DOM_NOT_SUPPORTED_ERR if ascii_only is false.

◆ dom_string_toupper()

dom_exception dom_string_toupper ( dom_string * source,
bool ascii_only,
dom_string ** upper )

Convert the given string to uppercase

Parameters
source
ascii_onlyWhether to only convert [a-z] to [A-Z]
upperResult pointer for uppercase string. Caller owns ref
Returns
DOM_NO_ERR on success.
Note
Right now, will return DOM_NOT_SUPPORTED_ERR if ascii_only is false.

◆ dom_string_whitespace_op()

dom_exception dom_string_whitespace_op ( dom_string * s,
enum dom_whitespace_op op,
dom_string ** ret )

Perform whitespace operations on given string

Parameters
sGiven string
opWhitespace operation(s) to perform
retNew string with whitespace ops performed. Caller owns ref
Returns
DOM_NO_ERR on success.
Note
Right now, will return DOM_NOT_SUPPORTED_ERR if ascii_only is false.