TI_FLAG_BYTE_CWP
#define get_thread_cwp() (__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_CWP])
#define set_thread_cwp(val) (__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_CWP] = (val))
#define TI_CWP (TI_FLAGS + TI_FLAG_BYTE_CWP)
cwp = __thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP];
__thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] =
__thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] =