diff options
Diffstat (limited to 'erts/emulator/sys')
26 files changed, 3106 insertions, 7262 deletions
diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c index 105b129065..f87196d724 100644 --- a/erts/emulator/sys/common/erl_check_io.c +++ b/erts/emulator/sys/common/erl_check_io.c @@ -1337,11 +1337,7 @@ print_select_op(erts_dsprintf_buf_t *dsbufp, { Port *pp = erts_drvport2port(ix); erts_dsprintf(dsbufp, -#ifdef __OSE__ - "driver_select(%p, %d,%s%s%s%s | %d, %d) " -#else "driver_select(%p, %d,%s%s%s%s, %d) " -#endif "by ", ix, (int) GET_FD(fd), @@ -1861,25 +1857,6 @@ stale_drv_select(Eterm id, ErtsDrvEventState *state, int mode) #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS -#ifdef __OSE__ -static SafeHashValue drv_ev_state_hash(void *des) -{ - ErtsSysFdType fd = ((ErtsDrvEventState *) des)->fd; - /* We use hash on signo ^ id in order for steal to happen when the - same signo + fd is selected on by two different ports */ - SafeHashValue val = (SafeHashValue)(fd->signo ^ fd->id); - return val ^ (val >> 8); -} - -static int drv_ev_state_cmp(void *des1, void *des2) -{ - ErtsSysFdType fd1 = ((ErtsDrvEventState *) des1)->fd; - ErtsSysFdType fd2 = ((ErtsDrvEventState *) des2)->fd; - if (fd1->signo == fd2->signo && fd1->id == fd2->id) - return 0; - return 1; -} -#else /* !__OSE__ && !ERTS_SYS_CONTINOUS_FD_NUMBERS i.e. probably windows */ static SafeHashValue drv_ev_state_hash(void *des) { SafeHashValue val = (SafeHashValue) ((ErtsDrvEventState *) des)->fd; @@ -1891,7 +1868,6 @@ static int drv_ev_state_cmp(void *des1, void *des2) return ( ((ErtsDrvEventState *) des1)->fd == ((ErtsDrvEventState *) des2)->fd ? 0 : 1); } -#endif static void *drv_ev_state_alloc(void *des_tmpl) { diff --git a/erts/emulator/sys/common/erl_mmap.c b/erts/emulator/sys/common/erl_mmap.c index 754047829f..03ca080c14 100644 --- a/erts/emulator/sys/common/erl_mmap.c +++ b/erts/emulator/sys/common/erl_mmap.c @@ -51,23 +51,22 @@ #endif /* - * `mmap_state.sa.bot` and `mmap_state.sua.top` are read only after + * `mm->sa.bot` and `mm->sua.top` are read only after * initialization, but the other pointers are not; i.e., only * ERTS_MMAP_IN_SUPERCARRIER() is allowed without the mutex held. */ #define ERTS_MMAP_IN_SUPERCARRIER(PTR) \ - (((UWord) (PTR)) - ((UWord) mmap_state.sa.bot) \ - < ((UWord) mmap_state.sua.top) - ((UWord) mmap_state.sa.bot)) + (((UWord) (PTR)) - ((UWord) mm->sa.bot) \ + < ((UWord) mm->sua.top) - ((UWord) mm->sa.bot)) #define ERTS_MMAP_IN_SUPERALIGNED_AREA(PTR) \ - (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mmap_state.mtx)), \ - (((UWord) (PTR)) - ((UWord) mmap_state.sa.bot) \ - < ((UWord) mmap_state.sa.top) - ((UWord) mmap_state.sa.bot))) + (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mm->mtx)), \ + (((UWord) (PTR)) - ((UWord) mm->sa.bot) \ + < ((UWord) mm->sa.top) - ((UWord) mm->sa.bot))) #define ERTS_MMAP_IN_SUPERUNALIGNED_AREA(PTR) \ - (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mmap_state.mtx)), \ - (((UWord) (PTR)) - ((UWord) mmap_state.sua.bot) \ - < ((UWord) mmap_state.sua.top) - ((UWord) mmap_state.sua.bot))) + (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mm->mtx)), \ + (((UWord) (PTR)) - ((UWord) mm->sua.bot) \ + < ((UWord) mm->sua.top) - ((UWord) mm->sua.bot))) -int erts_have_erts_mmap; UWord erts_page_inv_mask; #if defined(DEBUG) || defined(ERTS_MMAP_DEBUG) @@ -197,10 +196,10 @@ static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ]; #define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ) \ do { \ - erts_smp_mtx_lock(&mmap_state.mtx); \ + erts_smp_mtx_lock(&mm->mtx); \ ERTS_MMAP_OP_START((IN_SZ)); \ ERTS_MMAP_OP_END((RES), (OUT_SZ)); \ - erts_smp_mtx_unlock(&mmap_state.mtx); \ + erts_smp_mtx_unlock(&mm->mtx); \ } while (0) #define ERTS_MUNMAP_OP(PTR, SZ) \ @@ -219,9 +218,9 @@ static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ]; #define ERTS_MUNMAP_OP_LCK(PTR, SZ) \ do { \ - erts_smp_mtx_lock(&mmap_state.mtx); \ + erts_smp_mtx_lock(&mm->mtx); \ ERTS_MUNMAP_OP((PTR), (SZ)); \ - erts_smp_mtx_unlock(&mmap_state.mtx); \ + erts_smp_mtx_unlock(&mm->mtx); \ } while (0) #define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ) \ @@ -247,10 +246,10 @@ static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ]; #define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ) \ do { \ - erts_smp_mtx_lock(&mmap_state.mtx); \ + erts_smp_mtx_lock(&mm->mtx); \ ERTS_MREMAP_OP_START((OLD_PTR), (OLD_SZ), (IN_SZ)); \ ERTS_MREMAP_OP_END((RES), (OUT_SZ)); \ - erts_smp_mtx_unlock(&mmap_state.mtx); \ + erts_smp_mtx_unlock(&mm->mtx); \ } while (0) #define ERTS_MMAP_OP_ABORT() \ @@ -294,7 +293,7 @@ typedef struct { Uint nseg; }ErtsFreeSegMap; -static struct { +struct ErtsMemMapper_ { int (*reserve_physical)(char *, UWord); void (*unreserve_physical)(char *, UWord); int supercarrier; @@ -346,54 +345,62 @@ static struct { UWord used; } os; } size; -} mmap_state; +}; + +ErtsMemMapper erts_dflt_mmapper; + +#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) +ErtsMemMapper erts_literal_mmapper; +char* erts_literals_start; +UWord erts_literals_size; +#endif #define ERTS_MMAP_SIZE_SC_SA_INC(SZ) \ do { \ - mmap_state.size.supercarrier.used.total += (SZ); \ - mmap_state.size.supercarrier.used.sa += (SZ); \ - ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total \ - <= mmap_state.size.supercarrier.total); \ - ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sa \ - <= mmap_state.size.supercarrier.used.total); \ + mm->size.supercarrier.used.total += (SZ); \ + mm->size.supercarrier.used.sa += (SZ); \ + ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total \ + <= mm->size.supercarrier.total); \ + ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sa \ + <= mm->size.supercarrier.used.total); \ } while (0) #define ERTS_MMAP_SIZE_SC_SA_DEC(SZ) \ do { \ - ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total >= (SZ)); \ - mmap_state.size.supercarrier.used.total -= (SZ); \ - ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sa >= (SZ)); \ - mmap_state.size.supercarrier.used.sa -= (SZ); \ + ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total >= (SZ)); \ + mm->size.supercarrier.used.total -= (SZ); \ + ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sa >= (SZ)); \ + mm->size.supercarrier.used.sa -= (SZ); \ } while (0) #define ERTS_MMAP_SIZE_SC_SUA_INC(SZ) \ do { \ - mmap_state.size.supercarrier.used.total += (SZ); \ - mmap_state.size.supercarrier.used.sua += (SZ); \ - ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total \ - <= mmap_state.size.supercarrier.total); \ - ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sua \ - <= mmap_state.size.supercarrier.used.total); \ + mm->size.supercarrier.used.total += (SZ); \ + mm->size.supercarrier.used.sua += (SZ); \ + ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total \ + <= mm->size.supercarrier.total); \ + ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sua \ + <= mm->size.supercarrier.used.total); \ } while (0) #define ERTS_MMAP_SIZE_SC_SUA_DEC(SZ) \ do { \ - ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total >= (SZ)); \ - mmap_state.size.supercarrier.used.total -= (SZ); \ - ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sua >= (SZ)); \ - mmap_state.size.supercarrier.used.sua -= (SZ); \ + ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total >= (SZ)); \ + mm->size.supercarrier.used.total -= (SZ); \ + ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sua >= (SZ)); \ + mm->size.supercarrier.used.sua -= (SZ); \ } while (0) #define ERTS_MMAP_SIZE_OS_INC(SZ) \ do { \ - ERTS_MMAP_ASSERT(mmap_state.size.os.used + (SZ) >= (SZ)); \ - mmap_state.size.os.used += (SZ); \ + ERTS_MMAP_ASSERT(mm->size.os.used + (SZ) >= (SZ)); \ + mm->size.os.used += (SZ); \ } while (0) #define ERTS_MMAP_SIZE_OS_DEC(SZ) \ do { \ - ERTS_MMAP_ASSERT(mmap_state.size.os.used >= (SZ)); \ - mmap_state.size.os.used -= (SZ); \ + ERTS_MMAP_ASSERT(mm->size.os.used >= (SZ)); \ + mm->size.os.used -= (SZ); \ } while (0) static void -add_free_desc_area(char *start, char *end) +add_free_desc_area(ErtsMemMapper* mm, char *start, char *end) { ERTS_MMAP_ASSERT(end == (void *) 0 || end > start); if (sizeof(ErtsFreeSegDesc) <= ((UWord) end) - ((UWord) start)) { @@ -403,7 +410,7 @@ add_free_desc_area(char *start, char *end) no = 1; prev_desc = (ErtsFreeSegDesc *) start; - prev_desc->start = mmap_state.desc.free_list; + prev_desc->start = mm->desc.free_list; desc = (ErtsFreeSegDesc *) (start + sizeof(ErtsFreeSegDesc)); desc_end = start + 2*sizeof(ErtsFreeSegDesc); @@ -414,59 +421,59 @@ add_free_desc_area(char *start, char *end) desc_end += sizeof(ErtsFreeSegDesc); no++; } - mmap_state.desc.free_list = (char *) prev_desc; - mmap_state.no.free_seg_descs += no; + mm->desc.free_list = (char *) prev_desc; + mm->no.free_seg_descs += no; } } static ErtsFreeSegDesc * -add_unused_free_desc_area(void) +add_unused_free_desc_area(ErtsMemMapper* mm) { char *ptr; - if (!mmap_state.desc.unused_start) + if (!mm->desc.unused_start) return NULL; - ERTS_MMAP_ASSERT(mmap_state.desc.unused_end); + ERTS_MMAP_ASSERT(mm->desc.unused_end); ERTS_MMAP_ASSERT(ERTS_PAGEALIGNED_SIZE - <= mmap_state.desc.unused_end - mmap_state.desc.unused_start); + <= mm->desc.unused_end - mm->desc.unused_start); - ptr = mmap_state.desc.unused_start + ERTS_PAGEALIGNED_SIZE; - add_free_desc_area(mmap_state.desc.unused_start, ptr); + ptr = mm->desc.unused_start + ERTS_PAGEALIGNED_SIZE; + add_free_desc_area(mm, mm->desc.unused_start, ptr); - if ((mmap_state.desc.unused_end - ptr) >= ERTS_PAGEALIGNED_SIZE) - mmap_state.desc.unused_start = ptr; + if ((mm->desc.unused_end - ptr) >= ERTS_PAGEALIGNED_SIZE) + mm->desc.unused_start = ptr; else - mmap_state.desc.unused_end = mmap_state.desc.unused_start = NULL; + mm->desc.unused_end = mm->desc.unused_start = NULL; - ERTS_MMAP_ASSERT(mmap_state.desc.free_list); - return (ErtsFreeSegDesc *) mmap_state.desc.free_list; + ERTS_MMAP_ASSERT(mm->desc.free_list); + return (ErtsFreeSegDesc *) mm->desc.free_list; } static ERTS_INLINE ErtsFreeSegDesc * -alloc_desc(void) +alloc_desc(ErtsMemMapper* mm) { ErtsFreeSegDesc *res; - res = (ErtsFreeSegDesc *) mmap_state.desc.free_list; + res = (ErtsFreeSegDesc *) mm->desc.free_list; if (!res) { - res = add_unused_free_desc_area(); + res = add_unused_free_desc_area(mm); if (!res) return NULL; } - mmap_state.desc.free_list = res->start; - ASSERT(mmap_state.no.free_segs.curr < mmap_state.no.free_seg_descs); - mmap_state.no.free_segs.curr++; - if (mmap_state.no.free_segs.max < mmap_state.no.free_segs.curr) - mmap_state.no.free_segs.max = mmap_state.no.free_segs.curr; + mm->desc.free_list = res->start; + ASSERT(mm->no.free_segs.curr < mm->no.free_seg_descs); + mm->no.free_segs.curr++; + if (mm->no.free_segs.max < mm->no.free_segs.curr) + mm->no.free_segs.max = mm->no.free_segs.curr; return res; } static ERTS_INLINE void -free_desc(ErtsFreeSegDesc *desc) +free_desc(ErtsMemMapper* mm, ErtsFreeSegDesc *desc) { - desc->start = mmap_state.desc.free_list; - mmap_state.desc.free_list = (char *) desc; - ERTS_MMAP_ASSERT(mmap_state.no.free_segs.curr > 0); - mmap_state.no.free_segs.curr--; + desc->start = mm->desc.free_list; + mm->desc.free_list = (char *) desc; + ERTS_MMAP_ASSERT(mm->no.free_segs.curr > 0); + mm->no.free_segs.curr--; } static ERTS_INLINE ErtsFreeSegDesc* anode_to_desc(RBTNode* anode) @@ -1233,7 +1240,7 @@ Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map) # define ERTS_MMAP_FD (-1) # else # define ERTS_MMAP_FLAGS (MAP_PRIVATE) -# define ERTS_MMAP_FD mmap_state.mmap_fd +# define ERTS_MMAP_FD mm->mmap_fd # endif #endif @@ -1378,11 +1385,12 @@ static void unreserve_noop(char *ptr, UWord size) } static UWord -alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end) +alloc_desc_insert_free_seg(ErtsMemMapper* mm, + ErtsFreeSegMap *map, char* start, char* end) { char *ptr; ErtsFreeSegMap *da_map; - ErtsFreeSegDesc *desc = alloc_desc(); + ErtsFreeSegDesc *desc = alloc_desc(mm); if (desc) { insert_free_seg(map, desc, start, end); return 0; @@ -1395,13 +1403,13 @@ alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end) */ #if ERTS_HAVE_OS_MMAP - if (!mmap_state.no_os_mmap) { - ptr = os_mmap(mmap_state.desc.new_area_hint, ERTS_PAGEALIGNED_SIZE, 0); + if (!mm->no_os_mmap) { + ptr = os_mmap(mm->desc.new_area_hint, ERTS_PAGEALIGNED_SIZE, 0); if (ptr) { - mmap_state.desc.new_area_hint = ptr+ERTS_PAGEALIGNED_SIZE; + mm->desc.new_area_hint = ptr+ERTS_PAGEALIGNED_SIZE; ERTS_MMAP_SIZE_OS_INC(ERTS_PAGEALIGNED_SIZE); - add_free_desc_area(ptr, ptr+ERTS_PAGEALIGNED_SIZE); - desc = alloc_desc(); + add_free_desc_area(mm, ptr, ptr+ERTS_PAGEALIGNED_SIZE); + desc = alloc_desc(mm); ERTS_MMAP_ASSERT(desc); insert_free_seg(map, desc, start, end); return 0; @@ -1412,20 +1420,20 @@ alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end) /* * ...then try to find a good place in the supercarrier... */ - da_map = &mmap_state.sua.map; + da_map = &mm->sua.map; desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE); if (desc) { - if (mmap_state.reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE)) + if (mm->reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE)) ERTS_MMAP_SIZE_SC_SUA_INC(ERTS_PAGEALIGNED_SIZE); else desc = NULL; } else { - da_map = &mmap_state.sa.map; + da_map = &mm->sa.map; desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE); if (desc) { - if (mmap_state.reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE)) + if (mm->reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE)) ERTS_MMAP_SIZE_SC_SA_INC(ERTS_PAGEALIGNED_SIZE); else desc = NULL; @@ -1433,15 +1441,15 @@ alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end) } if (desc) { char *da_end = desc->start + ERTS_PAGEALIGNED_SIZE; - add_free_desc_area(desc->start, da_end); + add_free_desc_area(mm, desc->start, da_end); if (da_end != desc->end) resize_free_seg(da_map, desc, da_end, desc->end); else { delete_free_seg(da_map, desc); - free_desc(desc); + free_desc(mm, desc); } - desc = alloc_desc(); + desc = alloc_desc(mm); ERTS_MMAP_ASSERT(desc); insert_free_seg(map, desc, start, end); return 0; @@ -1454,10 +1462,10 @@ alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end) ptr = start + ERTS_PAGEALIGNED_SIZE; ERTS_MMAP_ASSERT(ptr <= end); - add_free_desc_area(start, ptr); + add_free_desc_area(mm, start, ptr); if (ptr != end) { - desc = alloc_desc(); + desc = alloc_desc(mm); ERTS_MMAP_ASSERT(desc); insert_free_seg(map, desc, ptr, end); } @@ -1466,46 +1474,46 @@ alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end) } void * -erts_mmap(Uint32 flags, UWord *sizep) +erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep) { char *seg; UWord asize = ERTS_PAGEALIGNED_CEILING(*sizep); /* Map in premapped supercarrier */ - if (mmap_state.supercarrier && !(ERTS_MMAPFLG_OS_ONLY & flags)) { + if (mm->supercarrier && !(ERTS_MMAPFLG_OS_ONLY & flags)) { char *end; ErtsFreeSegDesc *desc; Uint32 superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags); - erts_smp_mtx_lock(&mmap_state.mtx); + erts_smp_mtx_lock(&mm->mtx); ERTS_MMAP_OP_START(*sizep); if (!superaligned) { - desc = lookup_free_seg(&mmap_state.sua.map, asize); + desc = lookup_free_seg(&mm->sua.map, asize); if (desc) { seg = desc->start; end = seg+asize; - if (!mmap_state.reserve_physical(seg, asize)) + if (!mm->reserve_physical(seg, asize)) goto supercarrier_reserve_failure; if (desc->end == end) { - delete_free_seg(&mmap_state.sua.map, desc); - free_desc(desc); + delete_free_seg(&mm->sua.map, desc); + free_desc(mm, desc); } else { ERTS_MMAP_ASSERT(end < desc->end); - resize_free_seg(&mmap_state.sua.map, desc, end, desc->end); + resize_free_seg(&mm->sua.map, desc, end, desc->end); } ERTS_MMAP_SIZE_SC_SUA_INC(asize); goto supercarrier_success; } - if (asize <= mmap_state.sua.bot - mmap_state.sa.top) { - if (!mmap_state.reserve_physical(mmap_state.sua.bot - asize, + if (asize <= mm->sua.bot - mm->sa.top) { + if (!mm->reserve_physical(mm->sua.bot - asize, asize)) goto supercarrier_reserve_failure; - mmap_state.sua.bot -= asize; - seg = mmap_state.sua.bot; + mm->sua.bot -= asize; + seg = mm->sua.bot; ERTS_MMAP_SIZE_SC_SUA_INC(asize); goto supercarrier_success; } @@ -1513,84 +1521,84 @@ erts_mmap(Uint32 flags, UWord *sizep) asize = ERTS_SUPERALIGNED_CEILING(asize); - desc = lookup_free_seg(&mmap_state.sa.map, asize); + desc = lookup_free_seg(&mm->sa.map, asize); if (desc) { char *start = seg = desc->start; seg = (char *) ERTS_SUPERALIGNED_CEILING(seg); end = seg+asize; - if (!mmap_state.reserve_physical(start, (UWord) (end - start))) + if (!mm->reserve_physical(start, (UWord) (end - start))) goto supercarrier_reserve_failure; ERTS_MMAP_SIZE_SC_SA_INC(asize); if (desc->end == end) { if (start != seg) - resize_free_seg(&mmap_state.sa.map, desc, start, seg); + resize_free_seg(&mm->sa.map, desc, start, seg); else { - delete_free_seg(&mmap_state.sa.map, desc); - free_desc(desc); + delete_free_seg(&mm->sa.map, desc); + free_desc(mm, desc); } } else { ERTS_MMAP_ASSERT(end < desc->end); - resize_free_seg(&mmap_state.sa.map, desc, end, desc->end); + resize_free_seg(&mm->sa.map, desc, end, desc->end); if (start != seg) { UWord ad_sz; - ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map, + ad_sz = alloc_desc_insert_free_seg(mm, &mm->sua.map, start, seg); start += ad_sz; if (start != seg) - mmap_state.unreserve_physical(start, (UWord) (seg - start)); + mm->unreserve_physical(start, (UWord) (seg - start)); } } goto supercarrier_success; } if (superaligned) { - char *start = mmap_state.sa.top; + char *start = mm->sa.top; seg = (char *) ERTS_SUPERALIGNED_CEILING(start); - if (asize + (seg - start) <= mmap_state.sua.bot - start) { + if (asize + (seg - start) <= mm->sua.bot - start) { end = seg + asize; - if (!mmap_state.reserve_physical(start, (UWord) (end - start))) + if (!mm->reserve_physical(start, (UWord) (end - start))) goto supercarrier_reserve_failure; - mmap_state.sa.top = end; + mm->sa.top = end; ERTS_MMAP_SIZE_SC_SA_INC(asize); if (start != seg) { UWord ad_sz; - ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map, + ad_sz = alloc_desc_insert_free_seg(mm, &mm->sua.map, start, seg); start += ad_sz; if (start != seg) - mmap_state.unreserve_physical(start, (UWord) (seg - start)); + mm->unreserve_physical(start, (UWord) (seg - start)); } goto supercarrier_success; } - desc = lookup_free_seg(&mmap_state.sua.map, asize + ERTS_SUPERALIGNED_SIZE); + desc = lookup_free_seg(&mm->sua.map, asize + ERTS_SUPERALIGNED_SIZE); if (desc) { char *org_start = desc->start; char *org_end = desc->end; seg = (char *) ERTS_SUPERALIGNED_CEILING(org_start); end = seg + asize; - if (!mmap_state.reserve_physical(seg, (UWord) (org_end - seg))) + if (!mm->reserve_physical(seg, (UWord) (org_end - seg))) goto supercarrier_reserve_failure; ERTS_MMAP_SIZE_SC_SUA_INC(asize); if (org_start != seg) { ERTS_MMAP_ASSERT(org_start < seg); - resize_free_seg(&mmap_state.sua.map, desc, org_start, seg); + resize_free_seg(&mm->sua.map, desc, org_start, seg); desc = NULL; } if (end != org_end) { UWord ad_sz = 0; ERTS_MMAP_ASSERT(end < org_end); if (desc) - resize_free_seg(&mmap_state.sua.map, desc, end, org_end); + resize_free_seg(&mm->sua.map, desc, end, org_end); else - ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map, + ad_sz = alloc_desc_insert_free_seg(mm, &mm->sua.map, end, org_end); end += ad_sz; if (end != org_end) - mmap_state.unreserve_physical(end, + mm->unreserve_physical(end, (UWord) (org_end - end)); } goto supercarrier_success; @@ -1598,12 +1606,12 @@ erts_mmap(Uint32 flags, UWord *sizep) } ERTS_MMAP_OP_ABORT(); - erts_smp_mtx_unlock(&mmap_state.mtx); + erts_smp_mtx_unlock(&mm->mtx); } #if ERTS_HAVE_OS_MMAP /* Map using OS primitives */ - if (!(ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) && !mmap_state.no_os_mmap) { + if (!(ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) && !mm->no_os_mmap) { if (!(ERTS_MMAPFLG_SUPERALIGNED & flags)) { seg = os_mmap(NULL, asize, 0); if (!seg) @@ -1661,25 +1669,25 @@ supercarrier_success: #endif ERTS_MMAP_OP_END(seg, asize); - erts_smp_mtx_unlock(&mmap_state.mtx); + erts_smp_mtx_unlock(&mm->mtx); *sizep = asize; return (void *) seg; supercarrier_reserve_failure: - erts_smp_mtx_unlock(&mmap_state.mtx); + erts_smp_mtx_unlock(&mm->mtx); *sizep = 0; return NULL; } void -erts_munmap(Uint32 flags, void *ptr, UWord size) +erts_munmap(ErtsMemMapper* mm, Uint32 flags, void *ptr, UWord size) { ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr)); ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(size)); if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) { - ERTS_MMAP_ASSERT(!mmap_state.no_os_mmap); + ERTS_MMAP_ASSERT(!mm->no_os_mmap); #if ERTS_HAVE_OS_MMAP ERTS_MUNMAP_OP_LCK(ptr, size); ERTS_MMAP_SIZE_OS_DEC(size); @@ -1692,45 +1700,45 @@ erts_munmap(Uint32 flags, void *ptr, UWord size) ErtsFreeSegDesc *prev, *next, *desc; UWord ad_sz = 0; - ERTS_MMAP_ASSERT(mmap_state.supercarrier); + ERTS_MMAP_ASSERT(mm->supercarrier); start = (char *) ptr; end = start + size; - erts_smp_mtx_lock(&mmap_state.mtx); + erts_smp_mtx_lock(&mm->mtx); ERTS_MUNMAP_OP(ptr, size); if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) { - map = &mmap_state.sa.map; + map = &mm->sa.map; adjacent_free_seg(map, start, end, &prev, &next); ERTS_MMAP_SIZE_SC_SA_DEC(size); - if (end == mmap_state.sa.top) { + if (end == mm->sa.top) { ERTS_MMAP_ASSERT(!next); if (prev) { start = prev->start; delete_free_seg(map, prev); - free_desc(prev); + free_desc(mm, prev); } - mmap_state.sa.top = start; + mm->sa.top = start; goto supercarrier_success; } } else { - map = &mmap_state.sua.map; + map = &mm->sua.map; adjacent_free_seg(map, start, end, &prev, &next); ERTS_MMAP_SIZE_SC_SUA_DEC(size); - if (start == mmap_state.sua.bot) { + if (start == mm->sua.bot) { ERTS_MMAP_ASSERT(!prev); if (next) { end = next->end; delete_free_seg(map, next); - free_desc(next); + free_desc(mm, next); } - mmap_state.sua.bot = end; + mm->sua.bot = end; goto supercarrier_success; } } @@ -1742,7 +1750,7 @@ erts_munmap(Uint32 flags, void *ptr, UWord size) end = next->end; if (prev) { delete_free_seg(map, next); - free_desc(next); + free_desc(mm, next); goto save_prev; } desc = next; @@ -1756,7 +1764,7 @@ erts_munmap(Uint32 flags, void *ptr, UWord size) if (desc) resize_free_seg(map, desc, start, end); else - ad_sz = alloc_desc_insert_free_seg(map, start, end); + ad_sz = alloc_desc_insert_free_seg(mm, map, start, end); supercarrier_success: { UWord unres_sz; @@ -1764,30 +1772,32 @@ erts_munmap(Uint32 flags, void *ptr, UWord size) ERTS_MMAP_ASSERT(size >= ad_sz); unres_sz = size - ad_sz; if (unres_sz) - mmap_state.unreserve_physical(((char *) ptr) + ad_sz, unres_sz); + mm->unreserve_physical(((char *) ptr) + ad_sz, unres_sz); - erts_smp_mtx_unlock(&mmap_state.mtx); + erts_smp_mtx_unlock(&mm->mtx); } } } static void * -remap_move(Uint32 flags, void *ptr, UWord old_size, UWord *sizep) +remap_move(ErtsMemMapper* mm, + Uint32 flags, void *ptr, UWord old_size, UWord *sizep) { UWord size = *sizep; - void *new_ptr = erts_mmap(flags, &size); + void *new_ptr = erts_mmap(mm, flags, &size); if (!new_ptr) return NULL; *sizep = size; if (old_size < size) size = old_size; sys_memcpy(new_ptr, ptr, (size_t) size); - erts_munmap(flags, ptr, old_size); + erts_munmap(mm, flags, ptr, old_size); return new_ptr; } void * -erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep) +erts_mremap(ErtsMemMapper* mm, + Uint32 flags, void *ptr, UWord old_size, UWord *sizep) { void *new_ptr; Uint32 superaligned; @@ -1799,11 +1809,11 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep) if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) { - ERTS_MMAP_ASSERT(!mmap_state.no_os_mmap); + ERTS_MMAP_ASSERT(!mm->no_os_mmap); - if (!(ERTS_MMAPFLG_OS_ONLY & flags) && mmap_state.supercarrier) { - new_ptr = remap_move(ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags, ptr, - old_size, sizep); + if (!(ERTS_MMAPFLG_OS_ONLY & flags) && mm->supercarrier) { + new_ptr = remap_move(mm, ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags, + ptr, old_size, sizep); if (new_ptr) return new_ptr; } @@ -1850,7 +1860,7 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep) #endif #if ERTS_HAVE_OS_MREMAP if (superaligned) - return remap_move(flags, new_ptr, old_size, sizep); + return remap_move(mm, flags, new_ptr, old_size, sizep); else { new_ptr = os_mremap(ptr, old_size, asize, 0); if (!new_ptr) @@ -1872,10 +1882,10 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep) ErtsFreeSegDesc *prev, *next; UWord ad_sz = 0; - ERTS_MMAP_ASSERT(mmap_state.supercarrier); + ERTS_MMAP_ASSERT(mm->supercarrier); if (ERTS_MMAPFLG_OS_ONLY & flags) - return remap_move(flags, ptr, old_size, sizep); + return remap_move(mm, flags, ptr, old_size, sizep); superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags); @@ -1883,19 +1893,19 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep) ? ERTS_SUPERALIGNED_CEILING(*sizep) : ERTS_PAGEALIGNED_CEILING(*sizep)); - erts_smp_mtx_lock(&mmap_state.mtx); + erts_smp_mtx_lock(&mm->mtx); if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr) - ? (!superaligned && lookup_free_seg(&mmap_state.sua.map, asize)) - : (superaligned && lookup_free_seg(&mmap_state.sa.map, asize))) { - erts_smp_mtx_unlock(&mmap_state.mtx); + ? (!superaligned && lookup_free_seg(&mm->sua.map, asize)) + : (superaligned && lookup_free_seg(&mm->sa.map, asize))) { + erts_smp_mtx_unlock(&mm->mtx); /* * Segment currently in wrong area (due to a previous memory * shortage), move it to the right area. * (remap_move() will succeed) */ - return remap_move(ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags, ptr, - old_size, sizep); + return remap_move(mm, ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags, + ptr, old_size, sizep); } ERTS_MREMAP_OP_START(ptr, old_size, *sizep); @@ -1917,18 +1927,18 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep) UWord unres_sz; new_ptr = ptr; if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) { - map = &mmap_state.sua.map; + map = &mm->sua.map; ERTS_MMAP_SIZE_SC_SUA_DEC(old_size - asize); } else { - if (end == mmap_state.sa.top) { - mmap_state.sa.top = new_end; - mmap_state.unreserve_physical(((char *) ptr) + asize, + if (end == mm->sa.top) { + mm->sa.top = new_end; + mm->unreserve_physical(((char *) ptr) + asize, old_size - asize); goto supercarrier_resize_success; } ERTS_MMAP_SIZE_SC_SA_DEC(old_size - asize); - map = &mmap_state.sa.map; + map = &mm->sa.map; } adjacent_free_seg(map, start, end, &prev, &next); @@ -1936,11 +1946,11 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep) if (next) resize_free_seg(map, next, new_end, next->end); else - ad_sz = alloc_desc_insert_free_seg(map, new_end, end); + ad_sz = alloc_desc_insert_free_seg(mm, map, new_end, end); ERTS_MMAP_ASSERT(old_size - asize >= ad_sz); unres_sz = old_size - asize - ad_sz; if (unres_sz) - mmap_state.unreserve_physical(((char *) ptr) + asize + ad_sz, + mm->unreserve_physical(((char *) ptr) + asize + ad_sz, unres_sz); goto supercarrier_resize_success; } @@ -1950,17 +1960,17 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep) ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size)); ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize)); - adjacent_free_seg(&mmap_state.sua.map, start, end, &prev, &next); + adjacent_free_seg(&mm->sua.map, start, end, &prev, &next); if (next && new_end <= next->end) { - if (!mmap_state.reserve_physical(((char *) ptr) + old_size, + if (!mm->reserve_physical(((char *) ptr) + old_size, asize - old_size)) goto supercarrier_reserve_failure; if (new_end < next->end) - resize_free_seg(&mmap_state.sua.map, next, new_end, next->end); + resize_free_seg(&mm->sua.map, next, new_end, next->end); else { - delete_free_seg(&mmap_state.sua.map, next); - free_desc(next); + delete_free_seg(&mm->sua.map, next); + free_desc(mm, next); } new_ptr = ptr; ERTS_MMAP_SIZE_SC_SUA_INC(asize - old_size); @@ -1969,28 +1979,28 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep) } else { /* Superaligned area */ - if (end == mmap_state.sa.top) { - if (new_end <= mmap_state.sua.bot) { - if (!mmap_state.reserve_physical(((char *) ptr) + old_size, + if (end == mm->sa.top) { + if (new_end <= mm->sua.bot) { + if (!mm->reserve_physical(((char *) ptr) + old_size, asize - old_size)) goto supercarrier_reserve_failure; - mmap_state.sa.top = new_end; + mm->sa.top = new_end; new_ptr = ptr; ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size); goto supercarrier_resize_success; } } else { - adjacent_free_seg(&mmap_state.sa.map, start, end, &prev, &next); + adjacent_free_seg(&mm->sa.map, start, end, &prev, &next); if (next && new_end <= next->end) { - if (!mmap_state.reserve_physical(((char *) ptr) + old_size, + if (!mm->reserve_physical(((char *) ptr) + old_size, asize - old_size)) goto supercarrier_reserve_failure; if (new_end < next->end) - resize_free_seg(&mmap_state.sa.map, next, new_end, next->end); + resize_free_seg(&mm->sa.map, next, new_end, next->end); else { - delete_free_seg(&mmap_state.sa.map, next); - free_desc(next); + delete_free_seg(&mm->sa.map, next); + free_desc(mm, next); } new_ptr = ptr; ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size); @@ -2000,12 +2010,12 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep) } ERTS_MMAP_OP_ABORT(); - erts_smp_mtx_unlock(&mmap_state.mtx); + erts_smp_mtx_unlock(&mm->mtx); /* Failed to resize... */ } - return remap_move(flags, ptr, old_size, sizep); + return remap_move(mm, flags, ptr, old_size, sizep); supercarrier_resize_success: @@ -2022,25 +2032,24 @@ supercarrier_resize_success: #endif ERTS_MREMAP_OP_END(new_ptr, asize); - erts_smp_mtx_unlock(&mmap_state.mtx); + erts_smp_mtx_unlock(&mm->mtx); *sizep = asize; return new_ptr; supercarrier_reserve_failure: ERTS_MREMAP_OP_END(NULL, old_size); - erts_smp_mtx_unlock(&mmap_state.mtx); + erts_smp_mtx_unlock(&mm->mtx); *sizep = old_size; return NULL; } -int erts_mmap_in_supercarrier(void *ptr) +int erts_mmap_in_supercarrier(ErtsMemMapper* mm, void *ptr) { return ERTS_MMAP_IN_SUPERCARRIER(ptr); } - static struct { Eterm total; Eterm total_sa; @@ -2103,8 +2112,9 @@ static void hard_dbg_mseg_init(void); #endif void -erts_mmap_init(ErtsMMapInit *init) +erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init) { + static int is_first_call = 1; int virtual_map = 0; char *start = NULL, *end = NULL; UWord pagesize; @@ -2131,20 +2141,20 @@ erts_mmap_init(ErtsMMapInit *init) ERTS_MMAP_OP_RINGBUF_INIT(); - erts_have_erts_mmap = 0; - - mmap_state.supercarrier = 0; - mmap_state.reserve_physical = reserve_noop; - mmap_state.unreserve_physical = unreserve_noop; + mm->supercarrier = 0; + mm->reserve_physical = reserve_noop; + mm->unreserve_physical = unreserve_noop; #if HAVE_MMAP && !defined(MAP_ANON) - mmap_state.mmap_fd = open("/dev/zero", O_RDWR); - if (mmap_state.mmap_fd < 0) + mm->mmap_fd = open("/dev/zero", O_RDWR); + if (mm->mmap_fd < 0) erl_exit(-1, "erts_mmap: Failed to open /dev/zero\n"); #endif - erts_smp_mtx_init(&mmap_state.mtx, "erts_mmap"); - erts_mtx_init(&am.init_mutex, "mmap_init_atoms"); + erts_smp_mtx_init(&mm->mtx, "erts_mmap"); + if (is_first_call) { + erts_mtx_init(&am.init_mutex, "mmap_init_atoms"); + } #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION if (init->virtual_range.start) { @@ -2160,8 +2170,8 @@ erts_mmap_init(ErtsMMapInit *init) sz = start - ptr; if (sz) os_munmap(end, sz); - mmap_state.reserve_physical = os_reserve_physical; - mmap_state.unreserve_physical = os_unreserve_physical; + mm->reserve_physical = os_reserve_physical; + mm->unreserve_physical = os_unreserve_physical; virtual_map = 1; } else @@ -2179,8 +2189,8 @@ erts_mmap_init(ErtsMMapInit *init) #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION if (!init->scrpm) { start = os_mmap_virtual(NULL, sz); - mmap_state.reserve_physical = os_reserve_physical; - mmap_state.unreserve_physical = os_unreserve_physical; + mm->reserve_physical = os_reserve_physical; + mm->unreserve_physical = os_unreserve_physical; virtual_map = 1; } else @@ -2206,34 +2216,32 @@ erts_mmap_init(ErtsMMapInit *init) } #endif } - if (!mmap_state.no_os_mmap) - erts_have_erts_mmap |= ERTS_HAVE_ERTS_OS_MMAP; #endif - mmap_state.no.free_seg_descs = 0; - mmap_state.no.free_segs.curr = 0; - mmap_state.no.free_segs.max = 0; + mm->no.free_seg_descs = 0; + mm->no.free_segs.curr = 0; + mm->no.free_segs.max = 0; - mmap_state.size.supercarrier.total = 0; - mmap_state.size.supercarrier.used.total = 0; - mmap_state.size.supercarrier.used.sa = 0; - mmap_state.size.supercarrier.used.sua = 0; - mmap_state.size.os.used = 0; + mm->size.supercarrier.total = 0; + mm->size.supercarrier.used.total = 0; + mm->size.supercarrier.used.sa = 0; + mm->size.supercarrier.used.sua = 0; + mm->size.os.used = 0; - mmap_state.desc.new_area_hint = NULL; + mm->desc.new_area_hint = NULL; if (!start) { - mmap_state.sa.bot = NULL; - mmap_state.sua.top = NULL; - mmap_state.sa.bot = NULL; - mmap_state.sua.top = NULL; - mmap_state.no_os_mmap = 0; - mmap_state.supercarrier = 0; + mm->sa.bot = NULL; + mm->sua.top = NULL; + mm->sa.bot = NULL; + mm->sua.top = NULL; + mm->no_os_mmap = 0; + mm->supercarrier = 0; } else { size_t desc_size; - mmap_state.no_os_mmap = init->sco; + mm->no_os_mmap = init->sco; desc_size = init->scrfsd; if (desc_size < 100) @@ -2244,66 +2252,73 @@ erts_mmap_init(ErtsMMapInit *init) + ERTS_PAGEALIGNED_SIZE) > end - start) erl_exit(-1, "erts_mmap: No space for segments in super carrier\n"); - mmap_state.sa.bot = start; - mmap_state.sa.bot += desc_size; - mmap_state.sa.bot = (char *) ERTS_SUPERALIGNED_CEILING(mmap_state.sa.bot); - mmap_state.sa.top = mmap_state.sa.bot; - mmap_state.sua.top = end; - mmap_state.sua.bot = mmap_state.sua.top; + mm->sa.bot = start; + mm->sa.bot += desc_size; + mm->sa.bot = (char *) ERTS_SUPERALIGNED_CEILING(mm->sa.bot); + mm->sa.top = mm->sa.bot; + mm->sua.top = end; + mm->sua.bot = mm->sua.top; - mmap_state.size.supercarrier.used.total += (UWord) (mmap_state.sa.bot - start); + mm->size.supercarrier.used.total += (UWord) (mm->sa.bot - start); - mmap_state.desc.free_list = NULL; - mmap_state.desc.reserved = 0; + mm->desc.free_list = NULL; + mm->desc.reserved = 0; if (end == (void *) 0) { /* * Very unlikely, but we need a guarantee - * that `mmap_state.sua.top` always will + * that `mm->sua.top` always will * compare as larger than all segment pointers * into the super carrier... */ - mmap_state.sua.top -= ERTS_PAGEALIGNED_SIZE; - mmap_state.size.supercarrier.used.total += ERTS_PAGEALIGNED_SIZE; + mm->sua.top -= ERTS_PAGEALIGNED_SIZE; + mm->size.supercarrier.used.total += ERTS_PAGEALIGNED_SIZE; #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION - if (!virtual_map || os_reserve_physical(mmap_state.sua.top, ERTS_PAGEALIGNED_SIZE)) + if (!virtual_map || os_reserve_physical(mm->sua.top, ERTS_PAGEALIGNED_SIZE)) #endif - add_free_desc_area(mmap_state.sua.top, end); - mmap_state.desc.reserved += (end - mmap_state.sua.top) / sizeof(ErtsFreeSegDesc); + add_free_desc_area(mm, mm->sua.top, end); + mm->desc.reserved += (end - mm->sua.top) / sizeof(ErtsFreeSegDesc); } - mmap_state.size.supercarrier.total = (UWord) (mmap_state.sua.top - start); + mm->size.supercarrier.total = (UWord) (mm->sua.top - start); /* * Area before (and after) super carrier * will be used for free segment descritors. */ #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION - if (virtual_map && !os_reserve_physical(start, mmap_state.sa.bot - start)) + if (virtual_map && !os_reserve_physical(start, mm->sa.bot - start)) erl_exit(-1, "erts_mmap: Failed to reserve physical memory for descriptors\n"); #endif - mmap_state.desc.unused_start = start; - mmap_state.desc.unused_end = mmap_state.sa.bot; - mmap_state.desc.reserved += ((mmap_state.desc.unused_end - start) + mm->desc.unused_start = start; + mm->desc.unused_end = mm->sa.bot; + mm->desc.reserved += ((mm->desc.unused_end - start) / sizeof(ErtsFreeSegDesc)); - init_free_seg_map(&mmap_state.sa.map, SA_SZ_ADDR_ORDER); - init_free_seg_map(&mmap_state.sua.map, SZ_REVERSE_ADDR_ORDER); + init_free_seg_map(&mm->sa.map, SA_SZ_ADDR_ORDER); + init_free_seg_map(&mm->sua.map, SZ_REVERSE_ADDR_ORDER); - mmap_state.supercarrier = 1; - erts_have_erts_mmap |= ERTS_HAVE_ERTS_SUPERCARRIER_MMAP; + mm->supercarrier = 1; - mmap_state.desc.new_area_hint = end; + mm->desc.new_area_hint = end; } #if !ERTS_HAVE_OS_MMAP - mmap_state.no_os_mmap = 1; + mm->no_os_mmap = 1; #endif #ifdef HARD_DEBUG_MSEG hard_dbg_mseg_init(); #endif + +#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) + if (mm == &erts_literal_mmapper) { + erts_literals_start = erts_literal_mmapper.sa.bot; + erts_literals_size = erts_literal_mmapper.sua.top - erts_literals_start; + } +#endif + is_first_call = 0; } @@ -2313,7 +2328,8 @@ add_2tup(Uint **hpp, Uint *szp, Eterm *lp, Eterm el1, Eterm el2) *lp = erts_bld_cons(hpp, szp, erts_bld_tuple(hpp, szp, 2, el1, el2), *lp); } -Eterm erts_mmap_info(int *print_to_p, +Eterm erts_mmap_info(ErtsMemMapper* mm, + int *print_to_p, void *print_to_arg, Eterm** hpp, Uint* szp, struct erts_mmap_info_struct* emis) @@ -2328,29 +2344,29 @@ Eterm erts_mmap_info(int *print_to_p, Eterm res = THE_NON_VALUE; if (!hpp) { - erts_smp_mtx_lock(&mmap_state.mtx); - emis->sizes[0] = mmap_state.size.supercarrier.total; - emis->sizes[1] = mmap_state.sa.top - mmap_state.sa.bot; - emis->sizes[2] = mmap_state.sua.top - mmap_state.sua.bot; - emis->sizes[3] = mmap_state.size.supercarrier.used.total; - emis->sizes[4] = mmap_state.size.supercarrier.used.sa; - emis->sizes[5] = mmap_state.size.supercarrier.used.sua; + erts_smp_mtx_lock(&mm->mtx); + emis->sizes[0] = mm->size.supercarrier.total; + emis->sizes[1] = mm->sa.top - mm->sa.bot; + emis->sizes[2] = mm->sua.top - mm->sua.bot; + emis->sizes[3] = mm->size.supercarrier.used.total; + emis->sizes[4] = mm->size.supercarrier.used.sa; + emis->sizes[5] = mm->size.supercarrier.used.sua; - emis->segs[0] = mmap_state.no.free_segs.curr; - emis->segs[1] = mmap_state.no.free_segs.max; - emis->segs[2] = mmap_state.no.free_seg_descs; - emis->segs[3] = mmap_state.desc.reserved; - emis->segs[4] = mmap_state.sa.map.nseg; - emis->segs[5] = mmap_state.sua.map.nseg; + emis->segs[0] = mm->no.free_segs.curr; + emis->segs[1] = mm->no.free_segs.max; + emis->segs[2] = mm->no.free_seg_descs; + emis->segs[3] = mm->desc.reserved; + emis->segs[4] = mm->sa.map.nseg; + emis->segs[5] = mm->sua.map.nseg; - emis->os_used = mmap_state.size.os.used; - erts_smp_mtx_unlock(&mmap_state.mtx); + emis->os_used = mm->size.os.used; + erts_smp_mtx_unlock(&mm->mtx); } if (print_to_p) { int to = *print_to_p; void *arg = print_to_arg; - if (mmap_state.supercarrier) { + if (mm->supercarrier) { const char* prefix = "supercarrier "; erts_print(to, arg, "%stotal size: %bpu\n", prefix, emis->sizes[0]); erts_print(to, arg, "%stotal sa size: %bpu\n", prefix, emis->sizes[1]); @@ -2365,7 +2381,7 @@ Eterm erts_mmap_info(int *print_to_p, erts_print(to, arg, "%ssa free segs: %bpu\n", prefix, emis->segs[4]); erts_print(to, arg, "%ssua free segs: %bpu\n", prefix, emis->segs[5]); } - if (!mmap_state.no_os_mmap) { + if (!mm->no_os_mmap) { erts_print(to, arg, "os mmap size used: %bpu\n", emis->os_used); } } @@ -2377,7 +2393,7 @@ Eterm erts_mmap_info(int *print_to_p, } lix = 0; - if (mmap_state.supercarrier) { + if (mm->supercarrier) { group[0] = erts_bld_atom_uword_2tup_list(hpp, szp, sizeof(size_tags)/sizeof(Eterm), size_tags, emis->sizes); @@ -2389,7 +2405,7 @@ Eterm erts_mmap_info(int *print_to_p, lix++; } - if (!mmap_state.no_os_mmap) { + if (!mm->no_os_mmap) { group[0] = erts_bld_atom_uword_2tup_list(hpp, szp, 1, &am.used, &emis->os_used); list[lix] = erts_bld_2tup_list(hpp, szp, 1, group_tags, group); @@ -2401,25 +2417,26 @@ Eterm erts_mmap_info(int *print_to_p, return res; } -Eterm erts_mmap_info_options(char *prefix, +Eterm erts_mmap_info_options(ErtsMemMapper* mm, + char *prefix, int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp) { - const UWord scs = mmap_state.sua.top - mmap_state.sa.bot; - const Eterm sco = mmap_state.no_os_mmap ? am_true : am_false; - const Eterm scrpm = (mmap_state.reserve_physical == reserve_noop) ? am_true : am_false; + const UWord scs = mm->sua.top - mm->sa.bot; + const Eterm sco = mm->no_os_mmap ? am_true : am_false; + const Eterm scrpm = (mm->reserve_physical == reserve_noop) ? am_true : am_false; Eterm res = THE_NON_VALUE; if (print_to_p) { int to = *print_to_p; void *arg = print_to_arg; erts_print(to, arg, "%sscs: %bpu\n", prefix, scs); - if (mmap_state.supercarrier) { + if (mm->supercarrier) { erts_print(to, arg, "%ssco: %T\n", prefix, sco); erts_print(to, arg, "%sscrpm: %T\n", prefix, scrpm); - erts_print(to, arg, "%sscrfsd: %beu\n", prefix, mmap_state.desc.reserved); + erts_print(to, arg, "%sscrfsd: %beu\n", prefix, mm->desc.reserved); } } @@ -2429,9 +2446,9 @@ Eterm erts_mmap_info_options(char *prefix, } res = NIL; - if (mmap_state.supercarrier) { + if (mm->supercarrier) { add_2tup(hpp, szp, &res, am.scrfsd, - erts_bld_uint(hpp,szp, mmap_state.desc.reserved)); + erts_bld_uint(hpp,szp, mm->desc.reserved)); add_2tup(hpp, szp, &res, am.scrpm, scrpm); add_2tup(hpp, szp, &res, am.sco, sco); } @@ -2441,9 +2458,9 @@ Eterm erts_mmap_info_options(char *prefix, } -Eterm erts_mmap_debug_info(Process* p) +Eterm erts_mmap_debug_info(ErtsMemMapper* mm, Process* p) { - if (mmap_state.supercarrier) { + if (mm->supercarrier) { ERTS_DECL_AM(sabot); ERTS_DECL_AM(satop); ERTS_DECL_AM(suabot); @@ -2453,18 +2470,17 @@ Eterm erts_mmap_debug_info(Process* p) UWord values[4]; Eterm *hp, *hp_end; Uint may_need; - const Uint PTR_BIG_SZ = HALFWORD_HEAP ? 3 : 2; - - erts_smp_mtx_lock(&mmap_state.mtx); - values[0] = (UWord)mmap_state.sa.bot; - values[1] = (UWord)mmap_state.sa.top; - values[2] = (UWord)mmap_state.sua.bot; - values[3] = (UWord)mmap_state.sua.top; - sa_list = build_free_seg_list(p, &mmap_state.sa.map); - sua_list = build_free_seg_list(p, &mmap_state.sua.map); - erts_smp_mtx_unlock(&mmap_state.mtx); - - may_need = 4*(2+3+PTR_BIG_SZ) + 2*(2+3); + + erts_smp_mtx_lock(&mm->mtx); + values[0] = (UWord)mm->sa.bot; + values[1] = (UWord)mm->sa.top; + values[2] = (UWord)mm->sua.bot; + values[3] = (UWord)mm->sua.top; + sa_list = build_free_seg_list(p, &mm->sa.map); + sua_list = build_free_seg_list(p, &mm->sua.map); + erts_smp_mtx_unlock(&mm->mtx); + + may_need = 4*(2+3+2) + 2*(2+3); hp = HAlloc(p, may_need); hp_end = hp + may_need; diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h index 66619c5161..61d912fd28 100644 --- a/erts/emulator/sys/common/erl_mmap.h +++ b/erts/emulator/sys/common/erl_mmap.h @@ -30,9 +30,6 @@ #define ERTS_MMAPFLG_SUPERCARRIER_ONLY (((Uint32) 1) << 1) #define ERTS_MMAPFLG_SUPERALIGNED (((Uint32) 1) << 2) -#define ERTS_HAVE_ERTS_OS_MMAP (1 << 0) -#define ERTS_HAVE_ERTS_SUPERCARRIER_MMAP (1 << 1) -extern int erts_have_erts_mmap; extern UWord erts_page_inv_mask; typedef struct { @@ -53,23 +50,29 @@ typedef struct { #define ERTS_MMAP_INIT_DEFAULT_INITER \ {{NULL, NULL}, {NULL, NULL}, 0, 1, (1 << 16), 1} -void *erts_mmap(Uint32 flags, UWord *sizep); -void erts_munmap(Uint32 flags, void *ptr, UWord size); -void *erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep); -int erts_mmap_in_supercarrier(void *ptr); -void erts_mmap_init(ErtsMMapInit*); +#define ERTS_MMAP_INIT_LITERAL_INITER \ + {{NULL, NULL}, {NULL, NULL}, 1024*1024*1024, 1, (1 << 16), 0} + +typedef struct ErtsMemMapper_ ErtsMemMapper; + +void *erts_mmap(ErtsMemMapper*, Uint32 flags, UWord *sizep); +void erts_munmap(ErtsMemMapper*, Uint32 flags, void *ptr, UWord size); +void *erts_mremap(ErtsMemMapper*, Uint32 flags, void *ptr, UWord old_size, UWord *sizep); +int erts_mmap_in_supercarrier(ErtsMemMapper*, void *ptr); +void erts_mmap_init(ErtsMemMapper*, ErtsMMapInit*); struct erts_mmap_info_struct { UWord sizes[6]; UWord segs[6]; UWord os_used; }; -Eterm erts_mmap_info(int *print_to_p, void *print_to_arg, +Eterm erts_mmap_info(ErtsMemMapper*, int *print_to_p, void *print_to_arg, Eterm** hpp, Uint* szp, struct erts_mmap_info_struct*); -Eterm erts_mmap_info_options(char *prefix, int *print_to_p, void *print_to_arg, +Eterm erts_mmap_info_options(ErtsMemMapper*, + char *prefix, int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp); struct process; -Eterm erts_mmap_debug_info(struct process*); +Eterm erts_mmap_debug_info(ErtsMemMapper*, struct process*); #define ERTS_SUPERALIGNED_SIZE \ (1 << ERTS_MMAP_SUPERALIGNED_BITS) @@ -121,6 +124,11 @@ Eterm erts_mmap_debug_info(struct process*); # define ERTS_HAVE_OS_MMAP 1 #endif +extern ErtsMemMapper erts_dflt_mmapper; +#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) +extern ErtsMemMapper erts_literal_mmapper; +#endif + /*#define HARD_DEBUG_MSEG*/ #ifdef HARD_DEBUG_MSEG # define HARD_DBG_INSERT_MSEG hard_dbg_insert_mseg diff --git a/erts/emulator/sys/common/erl_mseg.c b/erts/emulator/sys/common/erl_mseg.c index 0d51aad863..20695899eb 100644 --- a/erts/emulator/sys/common/erl_mseg.c +++ b/erts/emulator/sys/common/erl_mseg.c @@ -99,17 +99,12 @@ static const int debruijn[32] = { static int atoms_initialized; -typedef struct mem_kind_t MemKind; - const ErtsMsegOpt_t erts_mseg_default_opt = { 1, /* Use cache */ 1, /* Preserv data */ 0, /* Absolute shrink threshold */ 0, /* Relative shrink threshold */ 0 /* Scheduler specific */ -#if HALFWORD_HEAP - ,0 /* need low memory */ -#endif }; @@ -142,7 +137,14 @@ struct cache_t_ { typedef struct ErtsMsegAllctr_t_ ErtsMsegAllctr_t; -struct mem_kind_t { +struct ErtsMsegAllctr_t_ { + int ix; + + int is_init_done; + int is_thread_safe; + erts_mtx_t mtx; + + int is_cache_check_scheduled; cache_t cache[MAX_CACHE_SIZE]; cache_t cache_unpowered_node; @@ -168,29 +170,6 @@ struct mem_kind_t { } max_ever; } segments; - ErtsMsegAllctr_t *ma; - const char* name; - MemKind* next; -};/*MemKind*/ - -struct ErtsMsegAllctr_t_ { - int ix; - - int is_init_done; - int is_thread_safe; - erts_mtx_t mtx; - - int is_cache_check_scheduled; - - MemKind* mk_list; - -#if HALFWORD_HEAP - MemKind low_mem; - MemKind hi_mem; -#else - MemKind the_mem; -#endif - Uint max_cache_size; Uint abs_max_cache_bad_fit; Uint rel_max_cache_bad_fit; @@ -302,22 +281,17 @@ schedule_cache_check(ErtsMsegAllctr_t *ma) { /* #define ERTS_PRINT_ERTS_MMAP */ static ERTS_INLINE void * -mseg_create(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, UWord *sizep) +mseg_create(ErtsMsegAllctr_t *ma, Uint flags, UWord *sizep) { #ifdef ERTS_PRINT_ERTS_MMAP UWord req_size = *sizep; #endif void *seg; Uint32 mmap_flags = 0; -#if HALFWORD_HEAP - mmap_flags |= ((mk == &ma->low_mem) - ? ERTS_MMAPFLG_SUPERCARRIER_ONLY - : ERTS_MMAPFLG_OS_ONLY); -#endif if (MSEG_FLG_IS_2POW(flags)) mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; - seg = erts_mmap(mmap_flags, sizep); + seg = erts_mmap(&erts_dflt_mmapper, mmap_flags, sizep); #ifdef ERTS_PRINT_ERTS_MMAP erts_fprintf(stderr, "%p = erts_mmap(%s, {%bpu, %bpu});\n", seg, @@ -331,18 +305,13 @@ mseg_create(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, UWord *sizep) } static ERTS_INLINE void -mseg_destroy(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, void *seg_p, UWord size) { +mseg_destroy(ErtsMsegAllctr_t *ma, Uint flags, void *seg_p, UWord size) { Uint32 mmap_flags = 0; -#if HALFWORD_HEAP - mmap_flags |= ((mk == &ma->low_mem) - ? ERTS_MMAPFLG_SUPERCARRIER_ONLY - : ERTS_MMAPFLG_OS_ONLY); -#endif if (MSEG_FLG_IS_2POW(flags)) mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; - erts_munmap(mmap_flags, seg_p, size); + erts_munmap(&erts_dflt_mmapper, mmap_flags, seg_p, size); #ifdef ERTS_PRINT_ERTS_MMAP erts_fprintf(stderr, "erts_munmap(%s, %p, %bpu);\n", (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua", @@ -353,22 +322,17 @@ mseg_destroy(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, void *seg_p, UWord s } static ERTS_INLINE void * -mseg_recreate(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, void *old_seg, UWord old_size, UWord *sizep) +mseg_recreate(ErtsMsegAllctr_t *ma, Uint flags, void *old_seg, UWord old_size, UWord *sizep) { #ifdef ERTS_PRINT_ERTS_MMAP UWord req_size = *sizep; #endif void *new_seg; Uint32 mmap_flags = 0; -#if HALFWORD_HEAP - mmap_flags |= ((mk == &ma->low_mem) - ? ERTS_MMAPFLG_SUPERCARRIER_ONLY - : ERTS_MMAPFLG_OS_ONLY); -#endif if (MSEG_FLG_IS_2POW(flags)) mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; - new_seg = erts_mremap(mmap_flags, old_seg, old_size, sizep); + new_seg = erts_mremap(&erts_dflt_mmapper, mmap_flags, old_seg, old_size, sizep); #ifdef ERTS_PRINT_ERTS_MMAP erts_fprintf(stderr, "%p = erts_mremap(%s, %p, %bpu, {%bpu, %bpu});\n", @@ -392,11 +356,8 @@ do { \ || erts_smp_thr_progress_is_blocking() \ || ERTS_IS_CRASH_DUMPING); \ } while (0) -#define ERTS_DBG_MK_CHK_THR_ACCESS(MK) \ - ERTS_DBG_MA_CHK_THR_ACCESS((MK)->ma) #else #define ERTS_DBG_MA_CHK_THR_ACCESS(MA) -#define ERTS_DBG_MK_CHK_THR_ACCESS(MK) #endif /* Cache interface */ @@ -409,10 +370,10 @@ static ERTS_INLINE void mseg_cache_clear_node(cache_t *c) { c->prev = c; } -static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, UWord size, Uint flags) { +static ERTS_INLINE int cache_bless_segment(ErtsMsegAllctr_t *ma, void *seg, UWord size, Uint flags) { cache_t *c; - ERTS_DBG_MK_CHK_THR_ACCESS(mk); + ERTS_DBG_MA_CHK_THR_ACCESS(ma); ASSERT(!MSEG_FLG_IS_2POW(flags) || (MSEG_FLG_IS_2POW(flags) && MAP_IS_ALIGNED(seg) && IS_2POW(size))); @@ -421,11 +382,11 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, UWord size, U * Large blocks has no such cache and it is up to mseg to cache them to speed things up. */ - if (!erts_circleq_is_empty(&(mk->cache_free))) { + if (!erts_circleq_is_empty(&(ma->cache_free))) { /* We have free slots, use one to cache the segment */ - c = erts_circleq_head(&(mk->cache_free)); + c = erts_circleq_head(&(ma->cache_free)); erts_circleq_remove(c); c->seg = seg; @@ -437,29 +398,28 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, UWord size, U ASSERT(ix < CACHE_AREAS); ASSERT((1 << (ix + MSEG_ALIGN_BITS)) == size); - erts_circleq_push_head(&(mk->cache_powered_node[ix]), c); + erts_circleq_push_head(&(ma->cache_powered_node[ix]), c); } else - erts_circleq_push_head(&(mk->cache_unpowered_node), c); + erts_circleq_push_head(&(ma->cache_unpowered_node), c); - mk->cache_size++; - ASSERT(mk->cache_size <= mk->ma->max_cache_size); + ma->cache_size++; return 1; - } else if (!MSEG_FLG_IS_2POW(flags) && !erts_circleq_is_empty(&(mk->cache_unpowered_node))) { + } else if (!MSEG_FLG_IS_2POW(flags) && !erts_circleq_is_empty(&(ma->cache_unpowered_node))) { /* No free slots. * Evict oldest slot from unpowered cache so we can cache an unpowered (sbc) segment */ - c = erts_circleq_tail(&(mk->cache_unpowered_node)); + c = erts_circleq_tail(&(ma->cache_unpowered_node)); erts_circleq_remove(c); - mseg_destroy(mk->ma, ERTS_MSEG_FLG_NONE, mk, c->seg, c->size); + mseg_destroy(ma, ERTS_MSEG_FLG_NONE, c->seg, c->size); mseg_cache_clear_node(c); c->seg = seg; c->size = size; - erts_circleq_push_head(&(mk->cache_unpowered_node), c); + erts_circleq_push_head(&(ma->cache_unpowered_node), c); return 1; } else if (!MSEG_FLG_IS_2POW(flags)) { @@ -473,20 +433,20 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, UWord size, U int i; for( i = 0; i < CACHE_AREAS; i++) { - if (erts_circleq_is_empty(&(mk->cache_powered_node[i]))) + if (erts_circleq_is_empty(&(ma->cache_powered_node[i]))) continue; - c = erts_circleq_tail(&(mk->cache_powered_node[i])); + c = erts_circleq_tail(&(ma->cache_powered_node[i])); erts_circleq_remove(c); - mseg_destroy(mk->ma, ERTS_MSEG_FLG_2POW, mk, c->seg, c->size); + mseg_destroy(ma, ERTS_MSEG_FLG_2POW, c->seg, c->size); mseg_cache_clear_node(c); c->seg = seg; c->size = size; - erts_circleq_push_head(&(mk->cache_unpowered_node), c); + erts_circleq_push_head(&(ma->cache_unpowered_node), c); return 1; } @@ -495,11 +455,11 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, UWord size, U return 0; } -static ERTS_INLINE void *cache_get_segment(MemKind *mk, UWord *size_p, Uint flags) { +static ERTS_INLINE void *cache_get_segment(ErtsMsegAllctr_t *ma, UWord *size_p, Uint flags) { UWord size = *size_p; - ERTS_DBG_MK_CHK_THR_ACCESS(mk); + ERTS_DBG_MA_CHK_THR_ACCESS(ma); if (MSEG_FLG_IS_2POW(flags)) { @@ -512,10 +472,10 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, UWord *size_p, Uint flag for( i = ix; i < CACHE_AREAS; i++) { - if (erts_circleq_is_empty(&(mk->cache_powered_node[i]))) + if (erts_circleq_is_empty(&(ma->cache_powered_node[i]))) continue; - c = erts_circleq_head(&(mk->cache_powered_node[i])); + c = erts_circleq_head(&(ma->cache_powered_node[i])); erts_circleq_remove(c); ASSERT(IS_2POW(c->size)); @@ -524,31 +484,31 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, UWord *size_p, Uint flag csize = c->size; seg = (char*) c->seg; - mk->cache_size--; - mk->cache_hits++; + ma->cache_size--; + ma->cache_hits++; /* link to free cache list */ mseg_cache_clear_node(c); - erts_circleq_push_head(&(mk->cache_free), c); + erts_circleq_push_head(&(ma->cache_free), c); - ASSERT(!(mk->cache_size < 0)); + ASSERT(!(ma->cache_size < 0)); if (csize != size) - mseg_destroy(mk->ma, ERTS_MSEG_FLG_2POW, mk, seg + size, csize - size); + mseg_destroy(ma, ERTS_MSEG_FLG_2POW, seg + size, csize - size); return seg; } } - else if (!erts_circleq_is_empty(&(mk->cache_unpowered_node))) { + else if (!erts_circleq_is_empty(&(ma->cache_unpowered_node))) { void *seg; cache_t *c; cache_t *best = NULL; UWord bdiff = 0; UWord csize; - UWord bad_max_abs = mk->ma->abs_max_cache_bad_fit; - UWord bad_max_rel = mk->ma->rel_max_cache_bad_fit; + UWord bad_max_abs = ma->abs_max_cache_bad_fit; + UWord bad_max_rel = ma->rel_max_cache_bad_fit; - erts_circleq_foreach(c, &(mk->cache_unpowered_node)) { + erts_circleq_foreach(c, &(ma->cache_unpowered_node)) { csize = c->size; if (csize >= size) { if (((csize - size)*100 < bad_max_rel*size) && (csize - size) < bad_max_abs ) { @@ -557,11 +517,11 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, UWord *size_p, Uint flag erts_circleq_remove(c); - mk->cache_size--; - mk->cache_hits++; + ma->cache_size--; + ma->cache_hits++; mseg_cache_clear_node(c); - erts_circleq_push_head(&(mk->cache_free), c); + erts_circleq_push_head(&(ma->cache_free), c); *size_p = csize; @@ -584,7 +544,7 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, UWord *size_p, Uint flag ASSERT(best->seg); ASSERT(best->size > 0); - mk->cache_hits++; + ma->cache_hits++; /* Use current cache placement for remaining segment space */ @@ -608,7 +568,7 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, UWord *size_p, Uint flag * using callbacks from aux-work in the scheduler. */ -static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, Uint flags, cache_t *head) { +static ERTS_INLINE Uint mseg_drop_one_cache_size(ErtsMsegAllctr_t *ma, Uint flags, cache_t *head) { cache_t *c = NULL; c = erts_circleq_tail(head); @@ -617,19 +577,19 @@ static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, Uint flags if (erts_mtrace_enabled) erts_mtrace_crr_free(SEGTYPE, SEGTYPE, c->seg); - mseg_destroy(mk->ma, flags, mk, c->seg, c->size); + mseg_destroy(ma, flags, c->seg, c->size); mseg_cache_clear_node(c); - erts_circleq_push_head(&(mk->cache_free), c); + erts_circleq_push_head(&(ma->cache_free), c); - mk->segments.current.watermark--; - mk->cache_size--; + ma->segments.current.watermark--; + ma->cache_size--; - ASSERT( mk->cache_size >= 0 ); + ASSERT(ma->cache_size >= 0); - return mk->cache_size; + return ma->cache_size; } -static ERTS_INLINE Uint mseg_drop_memkind_cache_size(MemKind *mk, Uint flags, cache_t *head) { +static ERTS_INLINE Uint mseg_drop_cache_size(ErtsMsegAllctr_t *ma, Uint flags, cache_t *head) { cache_t *c = NULL; while (!erts_circleq_is_empty(head)) { @@ -640,58 +600,52 @@ static ERTS_INLINE Uint mseg_drop_memkind_cache_size(MemKind *mk, Uint flags, ca if (erts_mtrace_enabled) erts_mtrace_crr_free(SEGTYPE, SEGTYPE, c->seg); - mseg_destroy(mk->ma, flags, mk, c->seg, c->size); + mseg_destroy(ma, flags, c->seg, c->size); mseg_cache_clear_node(c); - erts_circleq_push_head(&(mk->cache_free), c); - - mk->segments.current.watermark--; - mk->cache_size--; + erts_circleq_push_head(&(ma->cache_free), c); + ma->segments.current.watermark--; + ma->cache_size--; } - ASSERT( mk->cache_size >= 0 ); + ASSERT(ma->cache_size >= 0); - return mk->cache_size; + return ma->cache_size; } -/* mseg_check_memkind_cache - * - Check if we can empty some cached segments in this - * MemKind. +/* mseg_check_cache + * - Check if we can empty some cached segments in this allocator */ -static Uint mseg_check_memkind_cache(MemKind *mk) { +static Uint mseg_check_cache(ErtsMsegAllctr_t *ma) { int i; - ERTS_DBG_MK_CHK_THR_ACCESS(mk); + ERTS_DBG_MA_CHK_THR_ACCESS(ma); for (i = 0; i < CACHE_AREAS; i++) { - if (!erts_circleq_is_empty(&(mk->cache_powered_node[i]))) - return mseg_drop_one_memkind_cache_size(mk, ERTS_MSEG_FLG_2POW, &(mk->cache_powered_node[i])); + if (!erts_circleq_is_empty(&(ma->cache_powered_node[i]))) + return mseg_drop_one_cache_size(ma, ERTS_MSEG_FLG_2POW, &(ma->cache_powered_node[i])); } - if (!erts_circleq_is_empty(&(mk->cache_unpowered_node))) - return mseg_drop_one_memkind_cache_size(mk, ERTS_MSEG_FLG_NONE, &(mk->cache_unpowered_node)); + if (!erts_circleq_is_empty(&(ma->cache_unpowered_node))) + return mseg_drop_one_cache_size(ma, ERTS_MSEG_FLG_NONE, &(ma->cache_unpowered_node)); return 0; } /* mseg_cache_check * - Check if we have some cache we can purge - * in any of the memkinds. */ static void mseg_cache_check(ErtsMsegAllctr_t *ma) { - MemKind* mk; Uint empty_cache = 1; ERTS_MSEG_LOCK(ma); - for (mk = ma->mk_list; mk; mk = mk->next) { - if (mseg_check_memkind_cache(mk)) - empty_cache = 0; - } + if (mseg_check_cache(ma)) + empty_cache = 0; /* If all MemKinds caches are empty, * remove aux-work callback @@ -709,7 +663,7 @@ static void mseg_cache_check(ErtsMsegAllctr_t *ma) { /* erts_mseg_cache_check * - This is a callback that is scheduled as aux-work from * schedulers and is called at some interval if we have a cache - * on this mseg-allocator and memkind. + * on this mseg-allocator. * - Purpose: Empty cache slowly so we don't collect mapped areas * and bloat memory. */ @@ -719,42 +673,32 @@ void erts_mseg_cache_check(void) { } -/* *_mseg_clear_*_cache +/* mseg_clear_cache * Remove cached segments from the allocator completely */ -static void mseg_clear_memkind_cache(MemKind *mk) { + +static void mseg_clear_cache(ErtsMsegAllctr_t *ma) { int i; + ERTS_MSEG_LOCK(ma); + ERTS_DBG_MA_CHK_THR_ACCESS(ma); /* drop pow2 caches */ for (i = 0; i < CACHE_AREAS; i++) { - if (erts_circleq_is_empty(&(mk->cache_powered_node[i]))) + if (erts_circleq_is_empty(&(ma->cache_powered_node[i]))) continue; - mseg_drop_memkind_cache_size(mk, ERTS_MSEG_FLG_2POW, &(mk->cache_powered_node[i])); - ASSERT(erts_circleq_is_empty(&(mk->cache_powered_node[i]))); + mseg_drop_cache_size(ma, ERTS_MSEG_FLG_2POW, &(ma->cache_powered_node[i])); + ASSERT(erts_circleq_is_empty(&(ma->cache_powered_node[i]))); } /* drop varied caches */ - if (!erts_circleq_is_empty(&(mk->cache_unpowered_node))) - mseg_drop_memkind_cache_size(mk, ERTS_MSEG_FLG_NONE, &(mk->cache_unpowered_node)); - - ASSERT(erts_circleq_is_empty(&(mk->cache_unpowered_node))); - ASSERT(mk->cache_size == 0); -} - -static void mseg_clear_cache(ErtsMsegAllctr_t *ma) { - MemKind* mk; - - ERTS_MSEG_LOCK(ma); - ERTS_DBG_MA_CHK_THR_ACCESS(ma); + if (!erts_circleq_is_empty(&(ma->cache_unpowered_node))) + mseg_drop_cache_size(ma, ERTS_MSEG_FLG_NONE, &(ma->cache_unpowered_node)); - - for (mk = ma->mk_list; mk; mk = mk->next) { - mseg_clear_memkind_cache(mk); - } + ASSERT(erts_circleq_is_empty(&(ma->cache_unpowered_node))); + ASSERT(ma->cache_size == 0); INC_CC(ma, clear_cache); - ERTS_MSEG_UNLOCK(ma); } @@ -763,25 +707,12 @@ void erts_mseg_clear_cache(void) { mseg_clear_cache(ERTS_MSEG_ALLCTR_IX(0)); } - - -static ERTS_INLINE MemKind* memkind(ErtsMsegAllctr_t *ma, - const ErtsMsegOpt_t *opt) -{ -#if HALFWORD_HEAP - return opt->low_mem ? &ma->low_mem : &ma->hi_mem; -#else - return &ma->the_mem; -#endif -} - static void * mseg_alloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, UWord *size_p, Uint flags, const ErtsMsegOpt_t *opt) { UWord size; void *seg; - MemKind* mk = memkind(ma, opt); INC_CC(ma, alloc); @@ -795,10 +726,10 @@ mseg_alloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, UWord *size_p, } } - if (opt->cache && mk->cache_size > 0 && (seg = cache_get_segment(mk, &size, flags)) != NULL) + if (opt->cache && ma->cache_size > 0 && (seg = cache_get_segment(ma, &size, flags)) != NULL) goto done; - seg = mseg_create(ma, flags, mk, &size); + seg = mseg_create(ma, flags, &size); if (!seg) *size_p = 0; @@ -808,7 +739,7 @@ done: if (erts_mtrace_enabled) erts_mtrace_crr_alloc(seg, atype, ERTS_MTRACE_SEGMENT_ID, size); - ERTS_MSEG_ALLOC_STAT(mk,size); + ERTS_MSEG_ALLOC_STAT(ma,size); } return seg; @@ -819,11 +750,9 @@ static void mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, UWord size, Uint flags, const ErtsMsegOpt_t *opt) { - MemKind* mk = memkind(ma, opt); + ERTS_MSEG_DEALLOC_STAT(ma,size); - ERTS_MSEG_DEALLOC_STAT(mk,size); - - if (opt->cache && cache_bless_segment(mk, seg, size, flags)) { + if (opt->cache && cache_bless_segment(ma, seg, size, flags)) { schedule_cache_check(ma); goto done; } @@ -831,7 +760,7 @@ mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, UWord size, if (erts_mtrace_enabled) erts_mtrace_crr_free(atype, SEGTYPE, seg); - mseg_destroy(ma, flags, mk, seg, size); + mseg_destroy(ma, flags, seg, size); done: @@ -842,7 +771,6 @@ static void * mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, UWord old_size, UWord *new_size_p, Uint flags, const ErtsMsegOpt_t *opt) { - MemKind* mk; void *new_seg; UWord new_size; @@ -861,7 +789,6 @@ mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, return NULL; } - mk = memkind(ma, opt); new_seg = seg; if (!MSEG_FLG_IS_2POW(flags)) @@ -876,7 +803,7 @@ mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, if (new_size > old_size) { if (opt->preserv) { - new_seg = mseg_recreate(ma, flags, mk, (void *) seg, old_size, &new_size); + new_seg = mseg_recreate(ma, flags, (void *) seg, old_size, &new_size); if (!new_seg) new_size = old_size; } @@ -896,7 +823,7 @@ mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, new_size = old_size; } else { - new_seg = mseg_recreate(ma, flags, mk, (void *) seg, old_size, &new_size); + new_seg = mseg_recreate(ma, flags, (void *) seg, old_size, &new_size); if (!new_seg) new_size = old_size; } @@ -910,7 +837,7 @@ mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, ASSERT(!MSEG_FLG_IS_2POW(flags) || IS_2POW(new_size)); *new_size_p = new_size; - ERTS_MSEG_REALLOC_STAT(mk, old_size, new_size); + ERTS_MSEG_REALLOC_STAT(ma, old_size, new_size); return new_seg; } @@ -1070,7 +997,8 @@ info_options(ErtsMsegAllctr_t *ma, { Eterm res; - res = erts_mmap_info_options(prefix, print_to_p, print_to_arg, hpp, szp); + res = erts_mmap_info_options(&erts_dflt_mmapper, + prefix, print_to_p, print_to_arg, hpp, szp); if (print_to_p) { int to = *print_to_p; @@ -1180,63 +1108,63 @@ info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp } static Eterm -info_status(ErtsMsegAllctr_t *ma, MemKind* mk, int *print_to_p, void *print_to_arg, +info_status(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, int begin_new_max_period, Uint **hpp, Uint *szp) { Eterm res = THE_NON_VALUE; - if (mk->segments.max_ever.no < mk->segments.max.no) - mk->segments.max_ever.no = mk->segments.max.no; - if (mk->segments.max_ever.sz < mk->segments.max.sz) - mk->segments.max_ever.sz = mk->segments.max.sz; + if (ma->segments.max_ever.no < ma->segments.max.no) + ma->segments.max_ever.no = ma->segments.max.no; + if (ma->segments.max_ever.sz < ma->segments.max.sz) + ma->segments.max_ever.sz = ma->segments.max.sz; if (print_to_p) { int to = *print_to_p; void *arg = print_to_arg; - erts_print(to, arg, "cached_segments: %beu\n", mk->cache_size); - erts_print(to, arg, "cache_hits: %beu\n", mk->cache_hits); + erts_print(to, arg, "cached_segments: %beu\n", ma->cache_size); + erts_print(to, arg, "cache_hits: %beu\n", ma->cache_hits); erts_print(to, arg, "segments: %beu %beu %beu\n", - mk->segments.current.no, mk->segments.max.no, mk->segments.max_ever.no); + ma->segments.current.no, ma->segments.max.no, ma->segments.max_ever.no); erts_print(to, arg, "segments_size: %beu %beu %beu\n", - mk->segments.current.sz, mk->segments.max.sz, mk->segments.max_ever.sz); + ma->segments.current.sz, ma->segments.max.sz, ma->segments.max_ever.sz); erts_print(to, arg, "segments_watermark: %beu\n", - mk->segments.current.watermark); + ma->segments.current.watermark); } if (hpp || szp) { res = NIL; add_2tup(hpp, szp, &res, am.segments_watermark, - bld_unstable_uint(hpp, szp, mk->segments.current.watermark)); + bld_unstable_uint(hpp, szp, ma->segments.current.watermark)); add_4tup(hpp, szp, &res, am.segments_size, - bld_unstable_uint(hpp, szp, mk->segments.current.sz), - bld_unstable_uint(hpp, szp, mk->segments.max.sz), - bld_unstable_uint(hpp, szp, mk->segments.max_ever.sz)); + bld_unstable_uint(hpp, szp, ma->segments.current.sz), + bld_unstable_uint(hpp, szp, ma->segments.max.sz), + bld_unstable_uint(hpp, szp, ma->segments.max_ever.sz)); add_4tup(hpp, szp, &res, am.segments, - bld_unstable_uint(hpp, szp, mk->segments.current.no), - bld_unstable_uint(hpp, szp, mk->segments.max.no), - bld_unstable_uint(hpp, szp, mk->segments.max_ever.no)); + bld_unstable_uint(hpp, szp, ma->segments.current.no), + bld_unstable_uint(hpp, szp, ma->segments.max.no), + bld_unstable_uint(hpp, szp, ma->segments.max_ever.no)); add_2tup(hpp, szp, &res, am.cache_hits, - bld_unstable_uint(hpp, szp, mk->cache_hits)); + bld_unstable_uint(hpp, szp, ma->cache_hits)); add_2tup(hpp, szp, &res, am.cached_segments, - bld_unstable_uint(hpp, szp, mk->cache_size)); + bld_unstable_uint(hpp, szp, ma->cache_size)); } if (begin_new_max_period) { - mk->segments.max.no = mk->segments.current.no; - mk->segments.max.sz = mk->segments.current.sz; + ma->segments.max.no = ma->segments.current.no; + ma->segments.max.sz = ma->segments.current.sz; } return res; } -static Eterm info_memkind(ErtsMsegAllctr_t *ma, MemKind* mk, int *print_to_p, void *print_to_arg, +static Eterm info_memkind(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, int begin_max_per, Uint **hpp, Uint *szp) { Eterm res = THE_NON_VALUE; @@ -1244,15 +1172,15 @@ static Eterm info_memkind(ErtsMsegAllctr_t *ma, MemKind* mk, int *print_to_p, vo Eterm values[3]; if (print_to_p) { - erts_print(*print_to_p, print_to_arg, "memory kind: %s\n", mk->name); + erts_print(*print_to_p, print_to_arg, "memory kind: %s\n", "all memory"); } if (hpp || szp) { atoms[0] = am.name; atoms[1] = am.status; atoms[2] = am.calls; - values[0] = erts_bld_string(hpp, szp, mk->name); + values[0] = erts_bld_string(hpp, szp, "all memory"); } - values[1] = info_status(ma, mk, print_to_p, print_to_arg, begin_max_per, hpp, szp); + values[1] = info_status(ma, print_to_p, print_to_arg, begin_max_per, hpp, szp); values[2] = info_calls(ma, print_to_p, print_to_arg, hpp, szp); if (hpp || szp) @@ -1261,7 +1189,6 @@ static Eterm info_memkind(ErtsMsegAllctr_t *ma, MemKind* mk, int *print_to_p, vo return res; } - static Eterm info_version(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp) { @@ -1326,12 +1253,7 @@ erts_mseg_info(int ix, ERTS_MSEG_LOCK(ma); ERTS_DBG_MA_CHK_THR_ACCESS(ma); -#if HALFWORD_HEAP - values[n++] = info_memkind(ma, &ma->low_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp); - values[n++] = info_memkind(ma, &ma->hi_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp); -#else - values[n++] = info_memkind(ma, &ma->the_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp); -#endif + values[n++] = info_memkind(ma, print_to_p, print_to_arg, begin_max_per, hpp, szp); if (hpp || szp) res = bld_2tup_list(hpp, szp, n, atoms, values); @@ -1408,13 +1330,10 @@ Uint erts_mseg_no(const ErtsMsegOpt_t *opt) { ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt); - MemKind* mk; - Uint n = 0; + Uint n; ERTS_MSEG_LOCK(ma); ERTS_DBG_MA_CHK_THR_ACCESS(ma); - for (mk=ma->mk_list; mk; mk=mk->next) { - n += mk->segments.current.no; - } + n = ma->segments.current.no; ERTS_MSEG_UNLOCK(ma); return n; } @@ -1426,16 +1345,16 @@ erts_mseg_unit_size(void) } -static void mem_kind_init(ErtsMsegAllctr_t *ma, MemKind* mk, const char* name) +static void mem_cache_init(ErtsMsegAllctr_t *ma) { int i; /* Clear all cache headers */ - mseg_cache_clear_node(&(mk->cache_free)); - mseg_cache_clear_node(&(mk->cache_unpowered_node)); + mseg_cache_clear_node(&(ma->cache_free)); + mseg_cache_clear_node(&(ma->cache_unpowered_node)); for (i = 0; i < CACHE_AREAS; i++) { - mseg_cache_clear_node(&(mk->cache_powered_node[i])); + mseg_cache_clear_node(&(ma->cache_powered_node[i])); } /* Populate cache free list */ @@ -1443,25 +1362,20 @@ static void mem_kind_init(ErtsMsegAllctr_t *ma, MemKind* mk, const char* name) ASSERT(ma->max_cache_size <= MAX_CACHE_SIZE); for (i = 0; i < ma->max_cache_size; i++) { - mseg_cache_clear_node(&(mk->cache[i])); - erts_circleq_push_head(&(mk->cache_free), &(mk->cache[i])); + mseg_cache_clear_node(&(ma->cache[i])); + erts_circleq_push_head(&(ma->cache_free), &(ma->cache[i])); } - mk->cache_size = 0; - mk->cache_hits = 0; - - mk->segments.current.watermark = 0; - mk->segments.current.no = 0; - mk->segments.current.sz = 0; - mk->segments.max.no = 0; - mk->segments.max.sz = 0; - mk->segments.max_ever.no = 0; - mk->segments.max_ever.sz = 0; - - mk->ma = ma; - mk->name = name; - mk->next = ma->mk_list; - ma->mk_list = mk; + ma->cache_size = 0; + ma->cache_hits = 0; + + ma->segments.current.watermark = 0; + ma->segments.current.no = 0; + ma->segments.current.sz = 0; + ma->segments.max.no = 0; + ma->segments.max.sz = 0; + ma->segments.max_ever.no = 0; + ma->segments.max_ever.sz = 0; } void @@ -1488,16 +1402,7 @@ erts_mseg_init(ErtsMsegInit_t *init) erts_mtx_init(&init_atoms_mutex, "mseg_init_atoms"); -#if HALFWORD_HEAP - if (sizeof(void *) != 8) - erl_exit(-1,"Halfword emulator cannot be run in 32bit mode"); - - init->mmap.virtual_range.start = (char *) sbrk(0); - init->mmap.virtual_range.end = (char *) 0x100000000UL; - init->mmap.sco = 0; -#endif - - erts_mmap_init(&init->mmap); + erts_mmap_init(&erts_dflt_mmapper, &init->dflt_mmap); if (!IS_2POW(GET_PAGE_SIZE)) erl_exit(ERTS_ABORT_EXIT, "erts_mseg: Unexpected page_size %beu\n", GET_PAGE_SIZE); @@ -1529,14 +1434,7 @@ erts_mseg_init(ErtsMsegInit_t *init) if (ma->max_cache_size > MAX_CACHE_SIZE) ma->max_cache_size = MAX_CACHE_SIZE; - ma->mk_list = NULL; - -#if HALFWORD_HEAP - mem_kind_init(ma, &ma->low_mem, "low memory"); - mem_kind_init(ma, &ma->hi_mem, "high memory"); -#else - mem_kind_init(ma, &ma->the_mem, "all memory"); -#endif + mem_cache_init(ma); sys_memzero((void *) &ma->calls, sizeof(ErtsMsegCalls)); } @@ -1545,13 +1443,8 @@ erts_mseg_init(ErtsMsegInit_t *init) static ERTS_INLINE Uint tot_cache_size(ErtsMsegAllctr_t *ma) { - MemKind* mk; - Uint sz = 0; ERTS_DBG_MA_CHK_THR_ACCESS(ma); - for (mk=ma->mk_list; mk; mk=mk->next) { - sz += mk->cache_size; - } - return sz; + return ma->cache_size; } /* diff --git a/erts/emulator/sys/common/erl_mseg.h b/erts/emulator/sys/common/erl_mseg.h index ba04e919fc..2acd8f8505 100644 --- a/erts/emulator/sys/common/erl_mseg.h +++ b/erts/emulator/sys/common/erl_mseg.h @@ -42,16 +42,6 @@ #if ERTS_HAVE_MSEG_SUPER_ALIGNED # define MSEG_ALIGN_BITS ERTS_MMAP_SUPERALIGNED_BITS -#else -/* If we don't use super aligned multiblock carriers - * we will mmap with page size alignment (and thus use corresponding - * align bits). - * - * Current implementation needs this to be a constant and - * only uses this for user dev testing so setting page size - * to 4096 (12 bits) is fine. - */ -# define MSEG_ALIGN_BITS (12) #endif #if HAVE_ERTS_MSEG @@ -69,7 +59,8 @@ typedef struct { Uint rmcbf; Uint mcs; Uint nos; - ErtsMMapInit mmap; + ErtsMMapInit dflt_mmap; + ErtsMMapInit literal_mmap; } ErtsMsegInit_t; #define ERTS_MSEG_INIT_DEFAULT_INITIALIZER \ @@ -78,7 +69,8 @@ typedef struct { 20, /* rmcbf: Relative max cache bad fit */ \ 10, /* mcs: Max cache size */ \ 1000, /* cci: Cache check interval */ \ - ERTS_MMAP_INIT_DEFAULT_INITER \ + ERTS_MMAP_INIT_DEFAULT_INITER, \ + ERTS_MMAP_INIT_LITERAL_INITER \ } typedef struct { @@ -87,9 +79,6 @@ typedef struct { UWord abs_shrink_th; UWord rel_shrink_th; int sched_spec; -#if HALFWORD_HEAP - int low_mem; -#endif } ErtsMsegOpt_t; extern const ErtsMsegOpt_t erts_mseg_default_opt; diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h index bd3a46ef0f..bc2c681876 100644 --- a/erts/emulator/sys/common/erl_poll.h +++ b/erts/emulator/sys/common/erl_poll.h @@ -93,7 +93,7 @@ # if defined(ERTS_USE_POLL) # undef ERTS_POLL_USE_POLL # define ERTS_POLL_USE_POLL 1 -# elif !defined(__WIN32__) && !defined(__OSE__) +# elif !defined(__WIN32__) # undef ERTS_POLL_USE_SELECT # define ERTS_POLL_USE_SELECT 1 # endif @@ -104,31 +104,13 @@ typedef Uint32 ErtsPollEvents; #undef ERTS_POLL_EV_E2N -#if defined(__WIN32__) || defined(__OSE__) /* --- win32 or ose -------- */ +#if defined(__WIN32__) /* --- win32 --------------------------------------- */ #define ERTS_POLL_EV_IN 1 #define ERTS_POLL_EV_OUT 2 #define ERTS_POLL_EV_ERR 4 #define ERTS_POLL_EV_NVAL 8 -#ifdef __OSE__ - -typedef struct ErtsPollOseMsgList_ { - struct ErtsPollOseMsgList_ *next; - union SIGNAL *data; -} ErtsPollOseMsgList; - -struct erts_sys_fd_type { - SIGSELECT signo; - ErlDrvOseEventId id; - ErtsPollOseMsgList *msgs; - ErlDrvOseEventId (*resolve_signal)(union SIGNAL *sig); - ethr_mutex mtx; - void *extra; -}; - -#endif - #elif ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ #include <sys/epoll.h> diff --git a/erts/emulator/sys/ose/beam.lmconf b/erts/emulator/sys/ose/beam.lmconf deleted file mode 100644 index 4ad46b01d9..0000000000 --- a/erts/emulator/sys/ose/beam.lmconf +++ /dev/null @@ -1,26 +0,0 @@ -OSE_LM_STACK_SIZES=256,512,1024,2048,4096,8192,16384,65536 -OSE_LM_SIGNAL_SIZES=31,63,127,255,1023,4095,16383,65535 -OSE_LM_POOL_SIZE=0x200000 -OSE_LM_MAIN_NAME=main -OSE_LM_MAIN_STACK_SIZE=0xF000 -OSE_LM_MAIN_PRIORITY=20 -## Has to be of a type that allows MAM -OSE_LM_PROGRAM_TYPE=APP_RAM -OSE_LM_DATA_INIT=YES -OSE_LM_BSS_INIT=YES -OSE_LM_EXEC_MODEL=SHARED -HEAP_MAX_SIZE=1000000000 -HEAP_SMALL_BUF_INIT_SIZE=20971520 -HEAP_LARGE_BUF_THRESHOLD=16000000 -HEAP_LOCK_TYPE=2 - -ERTS_DEFAULT_PRIO=24 -ERTS_SCHEDULER_PRIO=24 -ERTS_ASYNC_PRIO=22 -ERTS_AUX_PRIO=24 -ERTS_SYS_MSG_DISPATCHER_PRIO=21 - -# Setting the environment variable EFS_RESOLVE_TMO on the block to 0. -# This will eliminiate delays when trying to open files on not mounted -# volumes. -EFS_RESOLVE_TMO=0 diff --git a/erts/emulator/sys/ose/driver_int.h b/erts/emulator/sys/ose/driver_int.h deleted file mode 100644 index 4a5b7171d1..0000000000 --- a/erts/emulator/sys/ose/driver_int.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 1997-2009. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - */ -/* - * System dependant driver declarations - */ - -#ifndef __DRIVER_INT_H__ -#define __DRIVER_INT_H__ - -#ifdef HAVE_SYS_UIO_H -#include <sys/types.h> -#include <sys/uio.h> - -typedef struct iovec SysIOVec; - -#else - -typedef struct { - char* iov_base; - int iov_len; -} SysIOVec; - -#endif - -#endif diff --git a/erts/emulator/sys/ose/erl_main.c b/erts/emulator/sys/ose/erl_main.c deleted file mode 100644 index 877e85f43a..0000000000 --- a/erts/emulator/sys/ose/erl_main.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 2000-2009. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - */ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif -#include <stdlib.h> - -#include "sys.h" -#include "erl_vm.h" -#include "global.h" -#include "ose.h" - -int -main(int argc, char **argv) { - - (void)stdin;(void)stdout;(void)stderr; - - /* When starting using pm_create -c ARGV="-- -root ..", argv[0] is the first - part of ARGV and not the name of the executable. So we shuffle some - pointers here to make erl_start happy. */ - if (argv[0][0] == '-') { - int i; - char **tmp_argv = malloc(sizeof(char*)*(argc+1)); - for (i = 0; i < argc; i++) - tmp_argv[i+1] = argv[i]; - tmp_argv[0] = "beam"; - erl_start(argc+1,tmp_argv); - free(tmp_argv); - } else { - erl_start(argc,argv); - } - - stop(current_process()); - - return 0; -} diff --git a/erts/emulator/sys/ose/erl_ose_sys.h b/erts/emulator/sys/ose/erl_ose_sys.h deleted file mode 100644 index d0cd3180bf..0000000000 --- a/erts/emulator/sys/ose/erl_ose_sys.h +++ /dev/null @@ -1,356 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 1997-2011. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - * - * This file handles differences between different Unix systems. - * This should be the only place with conditional compilation - * depending on the type of OS. - */ - -#ifndef _ERL_OSE_SYS_H -#define _ERL_OSE_SYS_H - -#include "ose.h" -#undef NIL -#include "ramlog.h" -#include "erts.sig" - -#include "fcntl.h" -#include "math.h" -#include "stdio.h" -#include "stdlib.h" -#include "string.h" -#include "sys/param.h" -#include "sys/time.h" -#include "time.h" -#include "dirent.h" -#include "ethread.h" - -/* FIXME: configuration options */ -#define ERTS_SCHED_MIN_SPIN 1 -#define ERTS_SCHED_ONLY_POLL_SCHED_1 1 -#define ERTS_SCHED_FAIR 1 -#define NO_SYSCONF 1 -#define OPEN_MAX FOPEN_MAX - -#define MAP_ANON MAP_ANONYMOUS - -#ifndef HAVE_MMAP -# define HAVE_MMAP 0 -#endif - -#if HAVE_MMAP -# include "sys/mman.h" -#endif - -/* - * Min number of async threads - */ -#define ERTS_MIN_NO_OF_ASYNC_THREADS 1 - -/* - * Our own type of "FD's" - */ -#define ERTS_SYS_FD_TYPE struct erts_sys_fd_type* -#define NO_FSTAT_ON_SYS_FD_TYPE 1 /* They are signals, not files */ - -#include "sys/stat.h" - -/* FIXME mremap is not defined in OSE - POSIX issue */ -extern void *mremap (void *__addr, size_t __old_len, size_t __new_len, - int __flags, ...); - -/* FIXME: mremap constants */ -#define MREMAP_MAYMOVE 1 -#define MREMAP_FIXED 2 - -typedef void *GETENV_STATE; - -/* -** For the erl_timer_sup module. -*/ -#define HAVE_GETHRTIME - -typedef long long SysHrTime; -extern SysHrTime sys_gethrtime(void); - -void sys_init_hrtime(void); - -typedef time_t erts_time_t; - -typedef struct timeval SysTimeval; - -#define sys_gettimeofday(Arg) ((void) gettimeofday((Arg), NULL)) - -typedef struct { - clock_t tms_utime; - clock_t tms_stime; - clock_t tms_cutime; - clock_t tms_cstime; -} SysTimes; - -extern int erts_ticks_per_sec; - -#define SYS_CLK_TCK (erts_ticks_per_sec) - -extern clock_t sys_times(SysTimes *buffer); - -/* No use in having other resolutions than 1 Ms. */ -#define SYS_CLOCK_RESOLUTION 1 - -#define erts_isfinite finite - -#ifdef NO_FPE_SIGNALS - -#define erts_get_current_fp_exception() NULL -#ifdef ERTS_SMP -#define erts_thread_init_fp_exception() do{}while(0) -#endif -# define __ERTS_FP_CHECK_INIT(fpexnp) do {} while (0) -# define __ERTS_FP_ERROR(fpexnp, f, Action) if (!finite(f)) { Action; } else {} -# define __ERTS_FP_ERROR_THOROUGH(fpexnp, f, Action) __ERTS_FP_ERROR(fpexnp, f, Action) -# define __ERTS_SAVE_FP_EXCEPTION(fpexnp) -# define __ERTS_RESTORE_FP_EXCEPTION(fpexnp) - -#define erts_sys_block_fpe() 0 -#define erts_sys_unblock_fpe(x) do{}while(0) - -#else /* !NO_FPE_SIGNALS */ - -extern volatile unsigned long *erts_get_current_fp_exception(void); -#ifdef ERTS_SMP -extern void erts_thread_init_fp_exception(void); -#endif -# if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__) -# define erts_fwait(fpexnp,f) \ - __asm__ __volatile__("fwait" : "=m"(*(fpexnp)) : "m"(f)) -# elif (defined(__powerpc__) || defined(__ppc__)) && defined(__GNUC__) -# define erts_fwait(fpexnp,f) \ - __asm__ __volatile__("" : "=m"(*(fpexnp)) : "fm"(f)) -# elif defined(__sparc__) && defined(__linux__) && defined(__GNUC__) -# define erts_fwait(fpexnp,f) \ - __asm__ __volatile__("" : "=m"(*(fpexnp)) : "em"(f)) -# else -# define erts_fwait(fpexnp,f) \ - __asm__ __volatile__("" : "=m"(*(fpexnp)) : "g"(f)) -# endif -# if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__) - extern void erts_restore_fpu(void); -# else -# define erts_restore_fpu() /*empty*/ -# endif -# if (!defined(__GNUC__) || \ - (__GNUC__ < 2) || \ - (__GNUC__ == 2 && __GNUC_MINOR < 96)) && \ - !defined(__builtin_expect) -# define __builtin_expect(x, expected_value) (x) -# endif -static __inline__ int erts_check_fpe(volatile unsigned long *fp_exception, double f) -{ - erts_fwait(fp_exception, f); - if (__builtin_expect(*fp_exception == 0, 1)) - return 0; - *fp_exception = 0; - erts_restore_fpu(); - return 1; -} -# undef erts_fwait -# undef erts_restore_fpu -extern void erts_fp_check_init_error(volatile unsigned long *fp_exception); -static __inline__ void __ERTS_FP_CHECK_INIT(volatile unsigned long *fp_exception) -{ - if (__builtin_expect(*fp_exception == 0, 1)) - return; - erts_fp_check_init_error(fp_exception); -} -# define __ERTS_FP_ERROR(fpexnp, f, Action) do { if (erts_check_fpe((fpexnp),(f))) { Action; } } while (0) -# define __ERTS_SAVE_FP_EXCEPTION(fpexnp) unsigned long old_erl_fp_exception = *(fpexnp) -# define __ERTS_RESTORE_FP_EXCEPTION(fpexnp) \ - do { *(fpexnp) = old_erl_fp_exception; } while (0) - /* This is for library calls where we don't trust the external - code to always throw floating-point exceptions on errors. */ -static __inline__ int erts_check_fpe_thorough(volatile unsigned long *fp_exception, double f) -{ - return erts_check_fpe(fp_exception, f) || !finite(f); -} -# define __ERTS_FP_ERROR_THOROUGH(fpexnp, f, Action) \ - do { if (erts_check_fpe_thorough((fpexnp),(f))) { Action; } } while (0) - -int erts_sys_block_fpe(void); -void erts_sys_unblock_fpe(int); - -#endif /* !NO_FPE_SIGNALS */ - -#define ERTS_FP_CHECK_INIT(p) __ERTS_FP_CHECK_INIT(&(p)->fp_exception) -#define ERTS_FP_ERROR(p, f, A) __ERTS_FP_ERROR(&(p)->fp_exception, f, A) -#define ERTS_FP_ERROR_THOROUGH(p, f, A) __ERTS_FP_ERROR_THOROUGH(&(p)->fp_exception, f, A) - -/* FIXME: force HAVE_GETPAGESIZE and stub getpagesize */ -#ifndef HAVE_GETPAGESIZE -#define HAVE_GETPAGESIZE 1 -#endif - -extern int getpagesize(void); - -#ifndef HZ -#define HZ 60 -#endif - -/* OSE5 doesn't provide limits.h so a number of macros should be - * added manually */ - -#ifndef CHAR_BIT -#define CHAR_BIT 8 -#endif - -/* Minimum and maximum values a `signed int' can hold. */ -#ifndef INT_MAX -#define INT_MAX 2147483647 -#endif - -#ifndef INT_MIN -#define INT_MIN (-INT_MAX - 1) -#endif - -#ifndef UINT_MAX -# define UINT_MAX 4294967295U -#endif - -/* -static void erts_ose_sys_send(union SIGNAL **signal,PROCESS dst, - char* file,int line) { - SIGSELECT **ziggy = (SIGSELECT**)signal; - printf("%s:%d 0x%x Send signal 0x%x(0x%x) to 0x%x\r\n", - file,line,current_process(),ziggy[0][0],*ziggy,dst); - send(signal,dst); -} -#define send(signal,dst) erts_ose_sys_send(signal,dst,__FILE__,__LINE__) - -static void erts_ose_sys_send_w_sender(union SIGNAL **signal, - PROCESS sender,PROCESS dst, - char* file,int line) { - SIGSELECT **ziggy = (SIGSELECT**)signal; - printf("%s:%d 0x%x Send signal 0x%x(0x%x) to 0x%x as 0x%x\r\n", - file,line,current_process(),ziggy[0][0],*ziggy,dst,sender); - send_w_sender(signal,sender,dst); -} -#define send_w_sender(signal,sender,dst) \ - erts_ose_sys_send_w_sender(signal,sender,dst,__FILE__,__LINE__) - - -static union SIGNAL *erts_ose_sys_receive(SIGSELECT *sigsel, - char *file, - int line) { - SIGSELECT *sig; - int i; - - printf("%s:%d 0x%x receive({%d,",file,line,current_process(),sigsel[0]); - for (i = 1; i < sigsel[0]; i++) - printf("0x%x, ",sigsel[i]); - if (sigsel[0] != 0) - printf("0x%x",sigsel[i]); - printf("})\n"); - sig = (SIGSELECT*)receive(sigsel); - printf("%s:%d 0x%x got 0x%x from 0x%x\n",file,line,current_process(), - *sig,sender((union SIGNAL**)(&sig))); - return (union SIGNAL*)sig; -} -#define receive(SIGSEL) erts_ose_sys_receive(SIGSEL,__FILE__,__LINE__) - -static union SIGNAL *erts_ose_sys_receive_w_tmo(OSTIME tmo,SIGSELECT *sigsel, - char *file,int line) { - SIGSELECT *sig; - int i; - if (tmo == 0) { - sig = (SIGSELECT*)receive_w_tmo(tmo,sigsel); - if (sig != NULL) { - printf("%s:%d 0x%x receive_w_tmo(0,{%d,",file,line,current_process(), - sigsel[0]); - for (i = 1; i < sigsel[0]; i++) - printf("0x%x, ",sigsel[i]); - if (sigsel[0] != 0) - printf("0x%x",sigsel[i]); - printf("})\n"); - printf("%s:%d 0x%x got 0x%x from 0x%x\n",file,line,current_process(), - *sig,sender((union SIGNAL**)(&sig))); - } - } else { - printf("%s:%d 0x%x receive_w_tmo(%u,{%d,",file,line,current_process(),tmo, - sigsel[0]); - for (i = 1; i < sigsel[0]; i++) - printf("0x%x, ",sigsel[i]); - if (sigsel[0] != 0) - printf("0x%x",sigsel[i]); - printf("})\n"); - sig = (SIGSELECT*)receive_w_tmo(tmo,sigsel); - printf("%s:%d 0x%x got ",file,line,current_process()); - if (sig == NULL) - printf("TIMEOUT\n"); - else - printf("0x%x from 0x%x\n",*sig,sender((union SIGNAL**)(&sig))); - } - - return (union SIGNAL*)sig; -} - -#define receive_w_tmo(tmo,sigsel) erts_ose_sys_receive_w_tmo(tmo,sigsel, \ - __FILE__,__LINE__) - -static union SIGNAL *erts_ose_sys_receive_fsem(OSTIME tmo,SIGSELECT *sigsel, - OSFSEMVAL fsem, - char *file,int line) { - SIGSELECT *sig; - int i; - if (tmo == 0) { - sig = (SIGSELECT*)receive_fsem(tmo,sigsel,fsem); - if (sig != NULL && sig != OS_RCV_FSEM) { - printf("%s:%d 0x%x receive_fsem(0,{%d,",file,line,current_process(), - sigsel[0]); - for (i = 1; i < sigsel[0]; i++) - printf("0x%x, ",sigsel[i]); - if (sigsel[0] != 0) - printf("0x%x",sigsel[i]); - printf("},%d)\n",fsem); - printf("%s:%d 0x%x got 0x%x from 0x%x\n",file,line,current_process(), - *sig,sender((union SIGNAL**)(&sig))); - } - } else { - printf("%s:%d 0x%x receive_fsem(%u,{%d,",file,line,current_process(),tmo, - sigsel[0]); - for (i = 1; i < sigsel[0]; i++) - printf("0x%x, ",sigsel[i]); - if (sigsel[0] != 0) - printf("0x%x",sigsel[i]); - printf("},%d)\n",fsem); - sig = (SIGSELECT*)receive_fsem(tmo,sigsel,fsem); - printf("%s:%d 0x%x got ",file,line,current_process()); - if (sig == NULL) - printf("TIMEOUT\n"); - else if (sig == OS_RCV_FSEM) - printf("FSEM\n"); - else - printf("0x%x from 0x%x\n",*sig,sender((union SIGNAL**)(&sig))); - } - - return (union SIGNAL*)sig; -} - -#define receive_fsem(tmo,sigsel,fsem) \ - erts_ose_sys_receive_fsem(tmo,sigsel,fsem,__FILE__,__LINE__) -*/ -#endif /* _ERL_OSE_SYS_H */ diff --git a/erts/emulator/sys/ose/erl_ose_sys_ddll.c b/erts/emulator/sys/ose/erl_ose_sys_ddll.c deleted file mode 100644 index 5051f7fcc1..0000000000 --- a/erts/emulator/sys/ose/erl_ose_sys_ddll.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 2006-2013. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - */ - -/* - * Interface functions to the dynamic linker using dl* functions. - * (No support in OSE, we use static linkage instead) - */ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include "sys.h" -#include "erl_vm.h" -#include "global.h" - - -void erl_sys_ddll_init(void) { -} - -/* - * Open a shared object - */ -int erts_sys_ddll_open(const char *full_name, void **handle, ErtsSysDdllError* err) -{ - return ERL_DE_ERROR_NO_DDLL_FUNCTIONALITY; -} - -int erts_sys_ddll_open_noext(char *dlname, void **handle, ErtsSysDdllError* err) -{ - return ERL_DE_ERROR_NO_DDLL_FUNCTIONALITY; -} - -/* - * Find a symbol in the shared object - */ -int erts_sys_ddll_sym2(void *handle, const char *func_name, void **function, - ErtsSysDdllError* err) -{ - return ERL_DE_ERROR_NO_DDLL_FUNCTIONALITY; -} - -/* XXX:PaN These two will be changed with new driver interface! */ - -/* - * Load the driver init function, might appear under different names depending on object arch... - */ - -int erts_sys_ddll_load_driver_init(void *handle, void **function) -{ - void *fn; - int res; - if ((res = erts_sys_ddll_sym2(handle, "driver_init", &fn, NULL)) != ERL_DE_NO_ERROR) { - res = erts_sys_ddll_sym2(handle, "_driver_init", &fn, NULL); - } - if (res == ERL_DE_NO_ERROR) { - *function = fn; - } - return res; -} - -int erts_sys_ddll_load_nif_init(void *handle, void **function, ErtsSysDdllError* err) -{ - void *fn; - int res; - if ((res = erts_sys_ddll_sym2(handle, "nif_init", &fn, err)) != ERL_DE_NO_ERROR) { - res = erts_sys_ddll_sym2(handle, "_nif_init", &fn, err); - } - if (res == ERL_DE_NO_ERROR) { - *function = fn; - } - return res; -} - -/* - * Call the driver_init function, whatever it's really called, simple on unix... -*/ -void *erts_sys_ddll_call_init(void *function) { - void *(*initfn)(void) = function; - return (*initfn)(); -} -void *erts_sys_ddll_call_nif_init(void *function) { - return erts_sys_ddll_call_init(function); -} - - - -/* - * Close a chared object - */ -int erts_sys_ddll_close2(void *handle, ErtsSysDdllError* err) -{ - return ERL_DE_ERROR_NO_DDLL_FUNCTIONALITY; -} - - -/* - * Return string that describes the (current) error - */ -char *erts_sys_ddll_error(int code) -{ - return "Unspecified error"; -} - -void erts_sys_ddll_free_error(ErtsSysDdllError* err) -{ - if (err->str != NULL) { - erts_free(ERTS_ALC_T_DDLL_TMP_BUF, err->str); - } -} diff --git a/erts/emulator/sys/ose/erl_poll.c b/erts/emulator/sys/ose/erl_poll.c deleted file mode 100644 index 5cee582a00..0000000000 --- a/erts/emulator/sys/ose/erl_poll.c +++ /dev/null @@ -1,818 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 2006-2012. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - */ - -/* - * Description: Poll interface suitable for ERTS on OSE with or without - * SMP support. - * - * The interface is currently implemented using: - * - receive + receive_fsem - * - * Author: Lukas Larsson - */ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include "erl_thr_progress.h" -#include "erl_driver.h" -#include "erl_alloc.h" -#include "erl_poll.h" - -#define NOFILE 4096 - -/* - * Some debug macros - */ - -/* #define HARDDEBUG -#define HARDTRACE*/ -#ifdef HARDDEBUG -#ifdef HARDTRACE -#define HARDTRACEF(X, ...) { fprintf(stderr, X, __VA_ARGS__); fprintf(stderr,"\r\n"); } -#else -#define HARDTRACEF(...) -#endif - -#else -#define HARDTRACEF(X,...) -#define HARDDEBUGF(...) -#endif - -#if 0 -#define ERTS_POLL_DEBUG_PRINT -#endif - -#if defined(DEBUG) && 0 -#define HARD_DEBUG -#endif - -# define SEL_ALLOC erts_alloc -# define SEL_REALLOC realloc_wrap -# define SEL_FREE erts_free - -#ifdef ERTS_SMP - -#define ERTS_POLLSET_LOCK(PS) \ - erts_smp_mtx_lock(&(PS)->mtx) -#define ERTS_POLLSET_UNLOCK(PS) \ - erts_smp_mtx_unlock(&(PS)->mtx) - -#else - -#define ERTS_POLLSET_LOCK(PS) -#define ERTS_POLLSET_UNLOCK(PS) - -#endif - -/* - * --- Data types ------------------------------------------------------------ - */ - -union SIGNAL { - SIGSELECT sig_no; -}; - -typedef struct erts_sigsel_item_ ErtsSigSelItem; - -struct erts_sigsel_item_ { - ErtsSigSelItem *next; - ErtsSysFdType fd; - ErtsPollEvents events; -}; - -typedef struct erts_sigsel_info_ ErtsSigSelInfo; - -struct erts_sigsel_info_ { - ErtsSigSelInfo *next; - SIGSELECT signo; - ErlDrvOseEventId (*decode)(union SIGNAL* sig); - ErtsSigSelItem *fds; -}; - -struct ErtsPollSet_ { - SIGSELECT *sigs; - ErtsSigSelInfo *info; - Uint sig_count; - Uint item_count; - PROCESS interrupt; - erts_atomic32_t wakeup_state; - erts_atomic64_t timeout_time; -#ifdef ERTS_SMP - erts_smp_mtx_t mtx; -#endif -}; - -static int max_fds = -1; - -static ERTS_INLINE void -init_timeout_time(ErtsPollSet ps) -{ - erts_atomic64_init_nob(&ps->timeout_time, - (erts_aint64_t) ERTS_MONOTONIC_TIME_MAX); -} - -static ERTS_INLINE void -set_timeout_time(ErtsPollSet ps, ErtsMonotonicTime time) -{ - erts_atomic64_set_relb(&ps->timeout_time, - (erts_aint64_t) time); -} - -static ERTS_INLINE ErtsMonotonicTime -get_timeout_time(ErtsPollSet ps) -{ - return (ErtsMonotonicTime) erts_atomic64_read_acqb(&ps->timeout_time); -} - -#define ERTS_POLL_NOT_WOKEN ((erts_aint32_t) (1 << 0)) -#define ERTS_POLL_WOKEN_INTR ((erts_aint32_t) (1 << 1)) -#define ERTS_POLL_WOKEN_TIMEDOUT ((erts_aint32_t) (1 << 2)) -#define ERTS_POLL_WOKEN_IO_READY ((erts_aint32_t) (1 << 3)) -#define ERTS_POLL_SLEEPING ((erts_aint32_t) (1 << 4)) - -/* signal list prototypes */ -static ErtsSigSelInfo *get_sigsel_info(ErtsPollSet ps, SIGSELECT signo); -static ErtsSigSelItem *get_sigsel_item(ErtsPollSet ps, ErtsSysFdType fd); -static ErtsSigSelInfo *add_sigsel_info(ErtsPollSet ps, ErtsSysFdType fd, - ErlDrvOseEventId (*decode)(union SIGNAL* sig)); -static ErtsSigSelItem *add_sigsel_item(ErtsPollSet ps, ErtsSysFdType fd, - ErlDrvOseEventId (*decode)(union SIGNAL* sig)); -static int del_sigsel_info(ErtsPollSet ps, ErtsSigSelInfo *info); -static int del_sigsel_item(ErtsPollSet ps, ErtsSigSelItem *item); -static int update_sigsel(ErtsPollSet ps); - -static ErtsSigSelInfo * -get_sigsel_info(ErtsPollSet ps, SIGSELECT signo) { - ErtsSigSelInfo *curr = ps->info; - while (curr != NULL) { - if (curr->signo == signo) - return curr; - curr = curr->next; - } - return NULL; -} - -static ErtsSigSelItem * -get_sigsel_item(ErtsPollSet ps, ErtsSysFdType fd) { - ErtsSigSelInfo *info = get_sigsel_info(ps,fd->signo); - ErtsSigSelItem *curr; - - if (info == NULL) - return NULL; - - curr = info->fds; - - while (curr != NULL) { - if (curr->fd->id == fd->id) { - ASSERT(curr->fd->signo == fd->signo); - return curr; - } - curr = curr->next; - } - return NULL; -} - -static ErtsSigSelInfo * -add_sigsel_info(ErtsPollSet ps, ErtsSysFdType fd, - ErlDrvOseEventId (*decode)(union SIGNAL* sig)) { - ErtsSigSelInfo *info = SEL_ALLOC(ERTS_ALC_T_POLLSET, - sizeof(ErtsSigSelInfo)); - info->next = ps->info; - info->fds = NULL; - info->signo = fd->signo; - info->decode = decode; - ps->info = info; - ps->sig_count++; - return info; -} - -static ErtsSigSelItem * -add_sigsel_item(ErtsPollSet ps, ErtsSysFdType fd, - ErlDrvOseEventId (*decode)(union SIGNAL* sig)) { - ErtsSigSelInfo *info = get_sigsel_info(ps,fd->signo); - ErtsSigSelItem *item = SEL_ALLOC(ERTS_ALC_T_POLLSET, - sizeof(ErtsSigSelItem)); - if (info == NULL) - info = add_sigsel_info(ps, fd, decode); - if (info->decode != decode) { - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - erts_dsprintf(dsbufp, "erts_poll_control() inconsistency: multiple resolve_signal functions for same signal (%d)\n", - fd->signo); - erts_send_error_to_logger_nogl(dsbufp); - } - ASSERT(info->decode == decode); - item->next = info->fds; - item->fd = fd; - item->events = 0; - info->fds = item; - ps->item_count++; - return item; -} - -static int del_sigsel_info(ErtsPollSet ps, ErtsSigSelInfo *info) { - ErtsSigSelInfo *curr, *prev; - - if (ps->info == info) { - ps->info = ps->info->next; - } else { - curr = ps->info->next; - prev = ps->info; - - while (curr != info) { - if (curr == NULL) - return 1; - prev = curr; - curr = curr->next; - } - prev->next = curr->next; - } - - ps->sig_count--; - SEL_FREE(ERTS_ALC_T_POLLSET, info); - return 0; -} - -static int del_sigsel_item(ErtsPollSet ps, ErtsSigSelItem *item) { - ErtsSigSelInfo *info = get_sigsel_info(ps,item->fd->signo); - ErtsSigSelItem *curr, *prev; - - ps->item_count--; - ASSERT(ps->item_count >= 0); - - if (info->fds == item) { - info->fds = info->fds->next; - SEL_FREE(ERTS_ALC_T_POLLSET,item); - if (info->fds == NULL) - return del_sigsel_info(ps,info); - return 0; - } - - curr = info->fds->next; - prev = info->fds; - - while (curr != item) { - if (curr == NULL) { - /* We did not find an item to delete so we have to - * increment item count again. - */ - ps->item_count++; - return 1; - } - prev = curr; - curr = curr->next; - } - prev->next = curr->next; - SEL_FREE(ERTS_ALC_T_POLLSET,item); - return 0; -} - -#ifdef ERTS_SMP - -static void update_redir_tables(ErtsPollSet ps) { - struct OS_redir_entry *redir_table; - PROCESS sched_1 = ERTS_SCHEDULER_IX(0)->tid.id; - int i; - redir_table = SEL_ALLOC(ERTS_ALC_T_POLLSET, - sizeof(struct OS_redir_entry)*(ps->sig_count+1)); - - redir_table[0].sig = ps->sig_count+1; - redir_table[0].pid = 0; - - for (i = 1; i < ps->sig_count+1; i++) { - redir_table[i].sig = ps->sigs[i]; - redir_table[i].pid = sched_1; - } - - for (i = 1; i < erts_no_schedulers; i++) { - ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(i); - set_redirection(esdp->tid.id,redir_table); - } - - SEL_FREE(ERTS_ALC_T_POLLSET,redir_table); -} - -#endif - -static int update_sigsel(ErtsPollSet ps) { - ErtsSigSelInfo *info = ps->info; - - int i; - - if (ps->sigs != NULL) - SEL_FREE(ERTS_ALC_T_POLLSET,ps->sigs); - - if (ps->sig_count == 0) { - /* If there are no signals we place a non-valid signal to make sure that - * we do not trigger on a any unrelated signals which are sent to the - * process. - */ - ps->sigs = SEL_ALLOC(ERTS_ALC_T_POLLSET,sizeof(SIGSELECT)*(2)); - ps->sigs[0] = 1; - ps->sigs[1] = ERTS_SIGNAL_INVALID; - return 0; - } - - ps->sigs = SEL_ALLOC(ERTS_ALC_T_POLLSET,sizeof(SIGSELECT)*(ps->sig_count+1)); - ps->sigs[0] = ps->sig_count; - - for (i = 1; info != NULL; i++, info = info->next) - ps->sigs[i] = info->signo; - -#ifdef ERTS_SMP - update_redir_tables(ps); -#endif - - return 0; -} - -static ERTS_INLINE void -wake_poller(ErtsPollSet ps) -{ - erts_aint32_t wakeup_state; - - ERTS_THR_MEMORY_BARRIER; - wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state); - while (wakeup_state != ERTS_POLL_WOKEN_IO_READY - && wakeup_state != ERTS_POLL_WOKEN_INTR) { - erts_aint32_t act = erts_atomic32_cmpxchg_nob(&ps->wakeup_state, - ERTS_POLL_WOKEN_INTR, - wakeup_state); - if (act == wakeup_state) { - wakeup_state = act; - break; - } - wakeup_state = act; - } - if (wakeup_state == ERTS_POLL_SLEEPING) { - /* - * Since we don't know the internals of signal_fsem() we issue - * a memory barrier as a safety precaution ensuring that - * the store we just made to wakeup_state wont be reordered - * with loads in signal_fsem(). - */ - ERTS_THR_MEMORY_BARRIER; - signal_fsem(ps->interrupt); - } -} - -static ERTS_INLINE void -reset_interrupt(ErtsPollSet ps) -{ - /* We need to keep io-ready if set */ - erts_aint32_t wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state); - while (wakeup_state != ERTS_POLL_NOT_WOKEN && - wakeup_state != ERTS_POLL_SLEEPING) { - erts_aint32_t act = erts_atomic32_cmpxchg_nob(&ps->wakeup_state, - ERTS_POLL_NOT_WOKEN, - wakeup_state); - if (wakeup_state == act) - break; - wakeup_state = act; - } - ERTS_THR_MEMORY_BARRIER; -} - -static ERTS_INLINE void -set_interrupt(ErtsPollSet ps) -{ - wake_poller(ps); -} - -void erts_poll_interrupt(ErtsPollSet ps,int set) { - HARDTRACEF("erts_poll_interrupt called!\n"); - - if (!set) - reset_interrupt(ps); - else - set_interrupt(ps); - -} - -void erts_poll_interrupt_timed(ErtsPollSet ps, - int set, - ErtsTimeoutTime timeout_time) { - HARDTRACEF("erts_poll_interrupt_timed called!\n"); - - if (!set) - reset_interrupt(ps); - else if (get_timeout_time(ps) > timeout_time) - set_interrupt(ps); -} - -ErtsPollEvents erts_poll_control(ErtsPollSet ps, ErtsSysFdType fd, - ErtsPollEvents pe, int on, int* do_wake) { - ErtsSigSelItem *curr; - ErtsPollEvents new_events; - int old_sig_count; - - HARDTRACEF( - "%ux: In erts_poll_control, fd = %d, pe = %d, on = %d, *do_wake = %d, curr = 0x%xu", - ps, fd, pe, on, do_wake, curr); - - ERTS_POLLSET_LOCK(ps); - - if (on && (pe & ERTS_POLL_EV_IN) && (pe & ERTS_POLL_EV_OUT)) { - /* Check to make sure both in and out are not used at the same time */ - new_events = ERTS_POLL_EV_NVAL; - goto done; - } - - curr = get_sigsel_item(ps, fd); - old_sig_count = ps->sig_count; - - if (curr == NULL && on) { - curr = add_sigsel_item(ps, fd, fd->resolve_signal); - } else if (curr == NULL && !on) { - new_events = ERTS_POLL_EV_NVAL; - goto done; - } - - new_events = curr->events; - - if (pe == 0) { - *do_wake = 0; - goto done; - } - - if (on) { - new_events |= pe; - curr->events = new_events; - } else { - new_events &= ~pe; - curr->events = new_events; - if (new_events == 0 && del_sigsel_item(ps, curr)) { - new_events = ERTS_POLL_EV_NVAL; - goto done; - } - } - - if (ps->sig_count != old_sig_count) { - if (update_sigsel(ps)) - new_events = ERTS_POLL_EV_NVAL; - } -done: - ERTS_POLLSET_UNLOCK(ps); - HARDTRACEF("%ux: Out erts_poll_control", ps); - return new_events; -} - -int erts_poll_wait(ErtsPollSet ps, - ErtsPollResFd pr[], - int *len, - ErtsMonotonicTime timeout_time) -{ - int res = ETIMEDOUT, no_fds, currid = 0; - OSTIME timeout; - union SIGNAL *sig; - ErtsMonotonicTime current_time, diff_time, timeout; - // HARDTRACEF("%ux: In erts_poll_wait",ps); - if (ps->interrupt == (PROCESS)0) - ps->interrupt = current_process(); - - ASSERT(current_process() == ps->interrupt); - ASSERT(get_fsem(current_process()) == 0); - ASSERT(erts_atomic32_read_nob(&ps->wakeup_state) & - (ERTS_POLL_NOT_WOKEN | ERTS_POLL_WOKEN_INTR)); - /* Max no of spots avable in pr */ - no_fds = *len; - - *len = 0; - - /* erts_printf("Entering erts_poll_wait(), timeout_time=%bps\n", - timeout_time); */ - - if (timeout_time == ERTS_POLL_NO_TIMEOUT) { - no_timeout: - timeout = (OSTIME) 0; - save_timeout_time = ERTS_MONOTONIC_TIME_MIN; - } - else { - ErtsMonotonicTime current_time, diff_time; - current_time = erts_get_monotonic_time(NULL); - diff_time = timeout_time - current_time; - if (diff_time <= 0) - goto no_timeout; - diff_time = (ERTS_MONOTONIC_TO_MSEC(diff_time - 1) + 1); - if (diff_time > INT_MAX) - diff_time = INT_MAX; - timeout = (OSTIME) diff_time; - save_timeout_time = current_time; - save_timeout_time += ERTS_MSEC_TO_MONOTONIC(diff_time); - } - - set_timeout_time(ps, save_timeout_time); - - while (currid < no_fds) { - if (timeout > 0) { - erts_aint32_t act = erts_atomic32_cmpxchg_nob(&ps->wakeup_state, - ERTS_POLL_SLEEPING, - ERTS_POLL_NOT_WOKEN); - if (act == ERTS_POLL_NOT_WOKEN) { -#ifdef ERTS_SMP - erts_thr_progress_prepare_wait(NULL); -#endif - sig = receive_fsem(timeout, ps->sigs, 1); -#ifdef ERTS_SMP - erts_thr_progress_finalize_wait(NULL); -#endif - } else { - ASSERT(act == ERTS_POLL_WOKEN_INTR); - sig = OS_RCV_FSEM; - } - } else - sig = receive_w_tmo(0, ps->sigs); - - if (sig == NULL) { - if (timeout > 0) { - erts_aint32_t act = erts_atomic32_cmpxchg_nob(&ps->wakeup_state, - ERTS_POLL_WOKEN_TIMEDOUT, - ERTS_POLL_SLEEPING); - if (act == ERTS_POLL_WOKEN_INTR) - /* Restore fsem as it was signaled but we got a timeout */ - wait_fsem(1); - } else - erts_atomic32_cmpxchg_nob(&ps->wakeup_state, - ERTS_POLL_WOKEN_TIMEDOUT, - ERTS_POLL_NOT_WOKEN); - break; - } else if (sig == OS_RCV_FSEM) { - ASSERT(erts_atomic32_read_nob(&ps->wakeup_state) == ERTS_POLL_WOKEN_INTR); - break; - } - { - ErtsSigSelInfo *info = get_sigsel_info(ps, sig->sig_no); - struct erts_sys_fd_type fd = { sig->sig_no, info->decode(sig) }; - ErtsSigSelItem *item = get_sigsel_item(ps, &fd); - - ASSERT(sig); - if (currid == 0 && timeout > 0) { - erts_aint32_t act = erts_atomic32_cmpxchg_nob(&ps->wakeup_state, - ERTS_POLL_WOKEN_IO_READY, - ERTS_POLL_SLEEPING); - if (act == ERTS_POLL_WOKEN_INTR) { - /* Restore fsem as it was signaled but we got a msg */ - wait_fsem(1); - act = erts_atomic32_cmpxchg_nob(&ps->wakeup_state, - ERTS_POLL_WOKEN_IO_READY, - ERTS_POLL_WOKEN_INTR); - } - } else if (currid == 0) { - erts_atomic32_set_nob(&ps->wakeup_state, - ERTS_POLL_WOKEN_IO_READY); - } - - if (item == NULL) { - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - erts_dsprintf( - dsbufp, - "erts_poll_wait() failed: found unkown signal id %d (signo %u) " - "(curr_proc 0x%x)\n", - fd.id, fd.signo, current_process()); - erts_send_error_to_logger_nogl(dsbufp); - timeout = 0; - /* Under normal circumstances the signal is deallocated by the - * driver that issued the select operation. But in this case - * there's no driver waiting for such signal so we have to - * deallocate it here */ - if (sig) - free_buf(&sig); - } else { - int i; - struct erts_sys_fd_type *fd = NULL; - ErtsPollOseMsgList *tl,*new; - - /* Check if this fd has already been triggered by a previous signal */ - for (i = 0; i < currid;i++) { - if (pr[i].fd == item->fd) { - fd = pr[i].fd; - pr[i].events |= item->events; - break; - } - } - - /* First time this fd is triggered */ - if (fd == NULL) { - pr[currid].fd = item->fd; - pr[currid].events = item->events; - fd = item->fd; - timeout = 0; - currid++; - } - - /* Insert new signal in approriate list */ - new = erts_alloc(ERTS_ALC_T_FD_SIG_LIST,sizeof(ErtsPollOseMsgList)); - new->next = NULL; - new->data = sig; - - ethr_mutex_lock(&fd->mtx); - tl = fd->msgs; - - if (tl == NULL) { - fd->msgs = new; - } else { - while (tl->next != NULL) - tl = tl->next; - tl->next = new; - } - ethr_mutex_unlock(&fd->mtx); - } - - } - } - - { - erts_aint32_t wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state); - - switch (wakeup_state) { - case ERTS_POLL_WOKEN_IO_READY: - res = 0; - break; - case ERTS_POLL_WOKEN_INTR: - res = EINTR; - break; - case ERTS_POLL_WOKEN_TIMEDOUT: - res = ETIMEDOUT; - break; - case ERTS_POLL_NOT_WOKEN: - /* This happens when we get an invalid signal only */ - res = EINVAL; - break; - default: - res = 0; - erl_exit(ERTS_ABORT_EXIT, - "%s:%d: Internal error: Invalid wakeup_state=%d\n", - __FILE__, __LINE__, (int) wakeup_state); - } - } - - erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN); - set_timeout_time(ps, ERTS_MONOTONIC_TIME_MAX); - - *len = currid; - - // HARDTRACEF("%ux: Out erts_poll_wait",ps); - return res; -} - -int erts_poll_max_fds(void) -{ - - HARDTRACEF("In/Out erts_poll_max_fds -> %d",max_fds); - return max_fds; -} - -void erts_poll_info(ErtsPollSet ps, - ErtsPollInfo *pip) -{ - Uint size = 0; - Uint num_events = 0; - - size += sizeof(struct ErtsPollSet_); - size += sizeof(ErtsSigSelInfo)*ps->sig_count; - size += sizeof(ErtsSigSelItem)*ps->item_count; - size += sizeof(SIGSELECT)*(ps->sig_count+1); - - pip->primary = "receive_fsem"; - - pip->fallback = NULL; - - pip->kernel_poll = NULL; - - pip->memory_size = size; - - pip->poll_set_size = num_events; - - pip->fallback_poll_set_size = 0; - - pip->lazy_updates = 0; - - pip->pending_updates = 0; - - pip->batch_updates = 0; - - pip->concurrent_updates = 0; - - - pip->max_fds = erts_poll_max_fds(); - HARDTRACEF("%ux: Out erts_poll_info",ps); - -} - -ErtsPollSet erts_poll_create_pollset(void) -{ - ErtsPollSet ps = SEL_ALLOC(ERTS_ALC_T_POLLSET, - sizeof(struct ErtsPollSet_)); - - ps->sigs = NULL; - ps->sig_count = 0; - ps->item_count = 0; - ps->info = NULL; - ps->interrupt = (PROCESS)0; - erts_atomic32_init_nob(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN); - init_timeout_time(ps); -#ifdef ERTS_SMP - erts_smp_mtx_init(&ps->mtx, "pollset"); -#endif - update_sigsel(ps); - HARDTRACEF("%ux: Out erts_poll_create_pollset",ps); - return ps; -} - -void erts_poll_destroy_pollset(ErtsPollSet ps) -{ - ErtsSigSelInfo *info; - for (info = ps->info; ps->info != NULL; info = ps->info, ps->info = ps->info->next) { - ErtsSigSelItem *item; - for (item = info->fds; info->fds != NULL; item = info->fds, info->fds = info->fds->next) - SEL_FREE(ERTS_ALC_T_POLLSET, item); - SEL_FREE(ERTS_ALC_T_POLLSET, info); - } - - SEL_FREE(ERTS_ALC_T_POLLSET,ps->sigs); - -#ifdef ERTS_SMP - erts_smp_mtx_destroy(&ps->mtx); -#endif - - SEL_FREE(ERTS_ALC_T_POLLSET,ps); -} - -void erts_poll_init(void) -{ - HARDTRACEF("In %s", __FUNCTION__); - max_fds = 256; - - HARDTRACEF("Out %s", __FUNCTION__); -} - - -/* OSE driver functions */ - -union SIGNAL *erl_drv_ose_get_signal(ErlDrvEvent drv_ev) { - struct erts_sys_fd_type *ev = (struct erts_sys_fd_type *)drv_ev; - ethr_mutex_lock(&ev->mtx); - if (ev->msgs == NULL) { - ethr_mutex_unlock(&ev->mtx); - return NULL; - } else { - ErtsPollOseMsgList *msg = ev->msgs; - union SIGNAL *sig = (union SIGNAL*)msg->data; - ASSERT(msg->data); - ev->msgs = msg->next; - ethr_mutex_unlock(&ev->mtx); - erts_free(ERTS_ALC_T_FD_SIG_LIST,msg); - restore(sig); - return sig; - } -} - -ErlDrvEvent -erl_drv_ose_event_alloc(SIGSELECT signo, ErlDrvOseEventId id, - ErlDrvOseEventId (*resolve_signal)(union SIGNAL *sig), void *extra) { - struct erts_sys_fd_type *ev = erts_alloc(ERTS_ALC_T_DRV_EV, - sizeof(struct erts_sys_fd_type)); - ev->signo = signo; - ev->extra = extra; - ev->id = id; - ev->msgs = NULL; - ev->resolve_signal = resolve_signal; - ethr_mutex_init(&ev->mtx); - return (ErlDrvEvent)ev; -} - -void erl_drv_ose_event_free(ErlDrvEvent drv_ev) { - struct erts_sys_fd_type *ev = (struct erts_sys_fd_type *)drv_ev; - ASSERT(ev->msgs == NULL); - ethr_mutex_destroy(&ev->mtx); - erts_free(ERTS_ALC_T_DRV_EV,ev); -} - -void erl_drv_ose_event_fetch(ErlDrvEvent drv_ev, SIGSELECT *signo, - ErlDrvOseEventId *id, void **extra) { - struct erts_sys_fd_type *ev = (struct erts_sys_fd_type *)drv_ev; - if (signo) - *signo = ev->signo; - if (extra) - *extra = ev->extra; - if (id) - *id = ev->id; -} diff --git a/erts/emulator/sys/ose/erts.sig b/erts/emulator/sys/ose/erts.sig deleted file mode 100644 index 78b883ee6c..0000000000 --- a/erts/emulator/sys/ose/erts.sig +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef ERTS_OSE_SIGNALS -#define ERTS_OSE_SIGNALS - -#ifndef ERTS_OSE_SIGNAL_BASE -#define ERTS_OSE_SIGNAL_BASE 0x01900280 -#endif - -#define ERTS_SIGNAL_INVALID ERTS_OSE_SIGNAL_BASE -#define ERTS_SIGNAL_FD_DRV_CONFIG ERTS_OSE_SIGNAL_BASE+1 -#define ERTS_SIGNAL_FD_DRV_ASYNC ERTS_OSE_SIGNAL_BASE+2 -#define ERTS_SIGNAL_OSE_DRV_ATTACH ERTS_OSE_SIGNAL_BASE+3 -#define ERTS_SIGNAL_OSE_DRV_HUNT ERTS_OSE_SIGNAL_BASE+4 - -#define ERTS_SIGNAL_RUN_ERL_SETUP ERTS_OSE_SIGNAL_BASE+100 -#define ERTS_SIGNAL_RUN_ERL_DAEMON ERTS_OSE_SIGNAL_BASE+101 - -#endif diff --git a/erts/emulator/sys/ose/gcc_4.4.3_lm_ppc.lcf b/erts/emulator/sys/ose/gcc_4.4.3_lm_ppc.lcf deleted file mode 100644 index a19d23facf..0000000000 --- a/erts/emulator/sys/ose/gcc_4.4.3_lm_ppc.lcf +++ /dev/null @@ -1,182 +0,0 @@ -/******************************************************************************* - * Copyright (C) 2013-2014 by Enea Software AB, - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - ******************************************************************************/ - -OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc") -OUTPUT_ARCH("powerpc") -ENTRY("crt0_lm") -MEMORY -{ - rom : ORIGIN = 0x01000000, LENGTH = 0x01000000 - ram : ORIGIN = 0x02000000, LENGTH = 0x01000000 -} -PHDRS -{ - ph_conf PT_LOAD ; - ph_rom PT_LOAD ; - ph_ram PT_LOAD ; -} -SECTIONS -{ - .text : - { - *(.text_first) - *(.text) - *(.text.*) - *(.stub) - *(oscode) - *(.init*) - *(.fini*) - *(.gnu.warning) - *(.gnu.linkonce.t.*) - *(.glue_7t) - *(.glue_7) - } > rom :ph_rom = 0 - .ose_sfk_biosentry : - { - *(.ose_sfk_biosentry) - } > rom :ph_rom - .ctors : - { - __CTOR_LIST__ = .; - *(.ctors) - *(SORT(.ctors.*)) - __CTOR_END__ = .; - } > rom :ph_rom - .dtors : - { - __DTOR_LIST__ = .; - *(.dtors) - *(SORT(.dtors.*)) - __DTOR_END__ = .; - } > rom :ph_rom - OSESYMS : - { - *(.osesyms) - } > rom :ph_rom - .rodata : - { - *(.rodata) - *(.rodata.*) - *(.gnu.linkonce.r.*) - } > rom :ph_rom - .eh_frame_hdr : - { - *(.eh_frame_hdr) - } > rom :ph_rom - .eh_frame : - { - __EH_FRAME_BEGIN__ = .; - *(.eh_frame) - LONG(0) - __EH_FRAME_END__ = .; - } > rom :ph_rom - .gcc_except_table : - { - *(.gcc_except_table .gcc_except_table.*) - } > rom :ph_rom - .sdata2 : - { - PROVIDE (_SDA2_BASE_ = .); - *(.sdata2) - *(.sdata2.*) - *(.gnu.linkonce.s2.*) - } > rom :ph_rom - .sbss2 : - { - *(.sbss2) - *(.sbss2.*) - *(.gnu.linkonce.sb2.*) - } > rom :ph_rom - LMCONF : - { - obj/?*?/ose_confd.o(.rodata) - *(LMCONF) - } > rom :ph_conf - .data : - { - LONG(0xDEADBABE) - *(.data) - *(.data.*) - *(.gnu.linkonce.d.*) - SORT(CONSTRUCTORS) - . = ALIGN(0x10); - } > ram :ph_ram = 0 - .sdata2 : - { - _SDA2_BASE_ = .; - *(.sdata2 .sdata2.* .gnu.linkonce.s2.*) - }> ram :ph_ram - .sdata : - { - PROVIDE (_SDA_BASE_ = .); - *(.sdata) - *(.sdata.*) - *(.gnu.linkonce.s.*) - } > ram :ph_ram - .sbss : - { - *(.sbss) - *(.sbss.*) - *(.scommon) - *(.gnu.linkonce.sb.*) - } > ram :ph_ram - .bss (NOLOAD) : - { - *(.bss) - *(.bss.*) - *(COMMON) - *(.gnu.linkonce.b.*) - *(.osvars) - } > ram :ph_ram - .ignore (NOLOAD) : - { - *(.rel.dyn) - } > ram :ph_ram - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - .debug_info 0 : { *(.debug_info) *(.gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } -} -__OSESYMS_START = ADDR(OSESYMS); -__OSESYMS_END = ADDR(OSESYMS) + SIZEOF(OSESYMS); diff --git a/erts/emulator/sys/ose/gcc_4.6.3_lm_ppc.lcf b/erts/emulator/sys/ose/gcc_4.6.3_lm_ppc.lcf deleted file mode 100644 index 3440c2961b..0000000000 --- a/erts/emulator/sys/ose/gcc_4.6.3_lm_ppc.lcf +++ /dev/null @@ -1,242 +0,0 @@ -/******************************************************************************* - * Copyright (C) 2013-2014 by Enea Software AB, - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - ******************************************************************************/ - -OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc") -OUTPUT_ARCH("powerpc") - -ENTRY("crt0_lm") - -/* Note: - * You may have to increase the length of the "rom" memory region and the - * origin and length of the "ram" memory region below depending on the size - * of the code and data in your load module. - */ - -MEMORY -{ - conf : ORIGIN = 0x00100000, LENGTH = 0x00030000 - rom : ORIGIN = 0x01000000, LENGTH = 0x01000000 - ram : ORIGIN = 0x03000000, LENGTH = 0x01000000 -} - -PHDRS -{ - ph_conf PT_LOAD ; - ph_rom PT_LOAD ; - ph_ram PT_LOAD ; -} - -SECTIONS -{ -/*--------------------------------------------------------------------------- - * Load module configuration area - *-------------------------------------------------------------------------*/ - - /* Load module configuration section. */ - LMCONF : - { - obj/?*?/ose_confd.o(.rodata) - *(LMCONF) - } > conf :ph_conf - -/*--------------------------------------------------------------------------- - * Read-only area - *-------------------------------------------------------------------------*/ - - /* Code section. */ - .text : - { - *(.text) - *(.text.*) - *(.stub) - *(oscode) - *(.init*) - *(.fini*) - *(.gnu.warning) - *(.gnu.linkonce.t.*) - } > rom :ph_rom = 0 - - /* OSE symbols section. */ - OSESYMS : - { - *(.osesyms) - } > rom :ph_rom - - /* Read-only data section. */ - .rodata : - { - *(.rodata) - *(.rodata.*) - *(.gnu.linkonce.r.*) - } > rom :ph_rom - - /* C++ exception handling section. */ - .eh_frame : - { - __EH_FRAME_BEGIN__ = .; - *(.eh_frame) - LONG(0) - __EH_FRAME_END__ = .; - } > rom :ph_rom - - /* C++ exception handling section. */ - .gcc_except_table : - { - *(.gcc_except_table .gcc_except_table.*) - } > rom :ph_rom - - /* PowerPC EABI initialized read-only data section. */ - .sdata2 : - { - PROVIDE (_SDA2_BASE_ = .); - *(.sdata2) - *(.sdata2.*) - *(.gnu.linkonce.s2.*) - } > rom :ph_rom - - /* PowerPC EABI uninitialized read-only data section. */ - .sbss2 : - { - *(.sbss2) - *(.sbss2.*) - *(.gnu.linkonce.sb2.*) - } > rom :ph_rom - -/*--------------------------------------------------------------------------- - * Read-write area - *-------------------------------------------------------------------------*/ - - /*------------------------------------------------------------------- - * Initialized data (copied by PM) - *-----------------------------------------------------------------*/ - - /* Data section. */ - .data : - { - *(.data) - *(.data.*) - *(.gnu.linkonce.d.*) - SORT(CONSTRUCTORS) - } > ram :ph_ram - - /* C++ constructor section. */ - .ctors : - { - __CTOR_LIST__ = .; - *(.ctors) - *(SORT(.ctors.*)) - __CTOR_END__ = .; - } > ram :ph_ram - - /* C++ destructor section. */ - .dtors : - { - __DTOR_LIST__ = .; - *(.dtors) - *(SORT(.dtors.*)) - __DTOR_END__ = .; - } > ram :ph_ram - - - /* Small data section. */ - .sdata ALIGN(0x10) : - { - PROVIDE (_SDA_BASE_ = .); - *(.sdata) - *(.sdata.*) - *(.gnu.linkonce.s.*) - } > ram :ph_ram - - /*------------------------------------------------------------------- - * Uninitialized data (cleared by PM) - *-----------------------------------------------------------------*/ - - /* Small bss section. */ - .sbss : - { - *(.sbss) - *(.sbss.*) - *(.scommon) - *(.gnu.linkonce.sb.*) - } > ram :ph_ram - - /* Bss section. */ - .bss : - { - *(.bss) - *(.bss.*) - *(COMMON) - *(.gnu.linkonce.b.*) - } > ram :ph_ram - -/*--------------------------------------------------------------------------- - * Debug information - *-------------------------------------------------------------------------*/ - - /* - * Stabs debug sections. - */ - - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - */ - - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info) *(.gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } -} diff --git a/erts/emulator/sys/ose/sys.c b/erts/emulator/sys/ose/sys.c deleted file mode 100644 index bcd0ffa0b6..0000000000 --- a/erts/emulator/sys/ose/sys.c +++ /dev/null @@ -1,1847 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 1996-2013. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - */ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif -#include "sys/time.h" -#include "time.h" -#include "sys/uio.h" -#include "termios.h" -#include "ctype.h" -#include "termios.h" - -#ifdef HAVE_FCNTL_H -#include "fcntl.h" -#endif - -#ifdef HAVE_SYS_IOCTL_H -#include "sys/ioctl.h" -#endif - -#define ERTS_WANT_BREAK_HANDLING -#define WANT_NONBLOCKING -#include "sys.h" -#include "erl_thr_progress.h" - -#ifdef USE_THREADS -#include "erl_threads.h" -#endif - -#include "erl_mseg.h" - -#include "unistd.h" -#include "efs.h" -#include "erl_printf.h" -#include "aio.h" -#include "pm.h" -#include "fcntl.h" - -/* Set the define to 1 to get some logging */ -#if 0 -#include "ramlog.h" -#define LOG(output) ramlog_printf output -#else -#define LOG(output) -#endif - -extern char **environ; -static erts_smp_rwmtx_t environ_rwmtx; -static PROCESS sig_proxy_pid = 0; - -#define MAX_VSIZE 16 /* Max number of entries allowed in an I/O - * vector sock_sendv(). - */ -/* - * Don't need global.h, but bif_table.h (included by bif.h), - * won't compile otherwise - */ -#include "global.h" -#include "bif.h" - -#include "erl_sys_driver.h" -#include "erl_check_io.h" -#include "erl_cpu_topology.h" - -/* The priority for reader/writer processes */ -#define FD_PROC_PRI get_pri(current_process()) - -typedef struct ErtsSysReportExit_ ErtsSysReportExit; -struct ErtsSysReportExit_ { - ErtsSysReportExit *next; - Eterm port; - int pid; - int ifd; - int ofd; - ErlDrvEvent attach_event; - ErlDrvEvent input_event; - ErlDrvEvent output_event; -}; - -/* This data is shared by these drivers - initialized by spawn_init() */ -static struct driver_data { - ErlDrvPort port_num; - int ofd; - int ifd; - int packet_bytes; - ErtsSysReportExit *report_exit; - int pid; - int alive; - int status; - ErlDrvEvent input_event; - ErlDrvEvent output_event; - struct aiocb aiocb; - FmHandle handle; - char *install_handle; -} *driver_data; /* indexed by fd */ - -struct async { - SIGSELECT signo; - ErlDrvTermData port; - ErlDrvTermData proc; - PROCESS spid; - PROCESS target; - Uint32 ref; -}; - -static ErtsSysReportExit *report_exit_list; -static ERTS_INLINE void report_exit_status(ErtsSysReportExit *rep, int status); - -extern int driver_interrupt(int, int); -extern void do_break(void); - -extern void erl_sys_args(int*, char**); - -/* The following two defs should probably be moved somewhere else */ - -extern void erts_sys_init_float(void); - -extern void erl_crash_dump(char* file, int line, char* fmt, ...); - -#define DIR_SEPARATOR_CHAR '/' - -#if defined(DEBUG) -#define ERL_BUILD_TYPE_MARKER ".debug" -#else /* opt */ -#define ERL_BUILD_TYPE_MARKER -#endif - -#define CHILD_SETUP_PROG_NAME "child_setup" ERL_BUILD_TYPE_MARKER - -#ifdef DEBUG -static int debug_log = 0; -#endif - -#ifdef ERTS_SMP -static erts_smp_atomic32_t have_prepared_crash_dump; -#define ERTS_PREPARED_CRASH_DUMP \ - ((int) erts_smp_atomic32_xchg_nob(&have_prepared_crash_dump, 1)) -#else -static volatile int have_prepared_crash_dump; -#define ERTS_PREPARED_CRASH_DUMP \ - (have_prepared_crash_dump++) -#endif - -static erts_smp_atomic_t sys_misc_mem_sz; - -#if defined(ERTS_SMP) -erts_mtx_t chld_stat_mtx; -#endif - -#if defined(ERTS_SMP) /* ------------------------------------------------- */ -#define CHLD_STAT_LOCK erts_mtx_lock(&chld_stat_mtx) -#define CHLD_STAT_UNLOCK erts_mtx_unlock(&chld_stat_mtx) - -#else /* ------------------------------------------------------------------- */ -#define CHLD_STAT_LOCK -#define CHLD_STAT_UNLOCK -static volatile int children_died; -#endif - -#define SET_AIO(REQ,FD,SIZE,BUFF) \ - memset(&(REQ),0,sizeof(REQ)); \ - (REQ).aio_fildes = FD; \ - (REQ).aio_offset = FM_POSITION_CURRENT; \ - (REQ).aio_nbytes = SIZE; \ - (REQ).aio_buf = BUFF; \ - (REQ).aio_sigevent.sigev_notify = SIGEV_NONE - -/* the first sizeof(struct aiocb *) bytes of the write buffer - * will contain the pointer to the aiocb struct, this needs - * to be freed between asynchronous writes. - * A write of 0 bytes is ignored. */ -#define WRITE_AIO(FD,SIZE,BUFF) do { \ - if (SIZE > 0) { \ - struct aiocb *write_req = driver_alloc(sizeof(struct aiocb)); \ - char *write_buff = driver_alloc((sizeof(char)*SIZE)+1+ \ - (sizeof(struct aiocb *))); \ - *(struct aiocb **)write_buff = (struct aiocb *)write_req; \ - write_buff += sizeof(struct aiocb *); \ - memcpy(write_buff,BUFF,SIZE+1); \ - SET_AIO(*write_req,FD,SIZE,write_buff); \ - if (aio_write(write_req)) \ - ramlog_printf("%s:%d: write failed with %d\n", \ - __FILE__,__LINE__,errno); \ - } \ -} while(0) - -/* free the write_buffer and write_req - * created in the WRITE_AIO() request macro */ -#define FREE_AIO(ptr) do { \ - struct aiocb *aiocb_ptr; \ - char *buffer_ptr; \ - aiocb_ptr = *(struct aiocb **)((ptr)-sizeof(struct aiocb *)); \ - buffer_ptr = (((char*)ptr)-sizeof(struct aiocb *)); \ - driver_free(aiocb_ptr); \ - driver_free(buffer_ptr); \ -} while(0) - -#define DISPATCH_AIO(sig) do { \ - if (aio_dispatch(sig)) \ - ramlog_printf("%s:%d: dispatch failed with %d\n", \ - __FILE__,__LINE__,errno); \ - } while(0) - -#define AIO_PIPE_SIZE 1024 - -/* debug print macros */ -#define DEBUG_RES 0 - -#ifdef DEBUG_RES -#define DEBUG_CHECK_RES(actual, expected) \ - do { \ - if (actual != expected ) { \ - ramlog_printf("Result check failed" \ - " got: 0x%08x expected:0x%08x\nat: %s:%d\n", \ - actual, expected, __FILE__, __LINE__); \ - abort(); /* This might perhaps be too harsh? */ \ - } \ - } while(0) -#else -#define DEBUG_CHECK_RES -#endif - -static struct fd_data { - char pbuf[4]; /* hold partial packet bytes */ - int psz; /* size of pbuf */ - char *buf; - char *cpos; - int sz; - int remain; /* for input on fd */ -} *fd_data; /* indexed by fd */ - -/********************* General functions ****************************/ - -/* This is used by both the drivers and general I/O, must be set early */ -static int max_files = -1; - -/* - * a few variables used by the break handler - */ -#ifdef ERTS_SMP -erts_smp_atomic32_t erts_break_requested; -#define ERTS_SET_BREAK_REQUESTED \ - erts_smp_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 1) -#define ERTS_UNSET_BREAK_REQUESTED \ - erts_smp_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 0) -#else -volatile int erts_break_requested = 0; -#define ERTS_SET_BREAK_REQUESTED (erts_break_requested = 1) -#define ERTS_UNSET_BREAK_REQUESTED (erts_break_requested = 0) -#endif -/* set early so the break handler has access to initial mode */ -static struct termios initial_tty_mode; -static int replace_intr = 0; -/* assume yes initially, ttsl_init will clear it */ -int using_oldshell = 1; -static PROCESS get_signal_proxy_pid(void); - -static void -init_check_io(void) -{ - erts_init_check_io(); - max_files = erts_check_io_max_files(); -} - -#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT -#define ERTS_CHK_IO_AS_INTR() erts_check_io_async_sig_interrupt() -#else -#define ERTS_CHK_IO_AS_INTR() erts_check_io_interrupt(1) -#endif -#define ERTS_CHK_IO_INTR erts_check_io_interrupt -#define ERTS_CHK_IO_INTR_TMD erts_check_io_interrupt_timed -#define ERTS_CHK_IO erts_check_io -#define ERTS_CHK_IO_SZ erts_check_io_size - - -void -erts_sys_schedule_interrupt(int set) -{ - ERTS_CHK_IO_INTR(set); -} - -#ifdef ERTS_SMP -void -erts_sys_schedule_interrupt_timed(int set, ErtsMonotonicTime timeout_time) -{ - ERTS_CHK_IO_INTR_TMD(set, timeout_time); -} -#endif - -Uint -erts_sys_misc_mem_sz(void) -{ - Uint res = ERTS_CHK_IO_SZ(); - res += erts_smp_atomic_read_mb(&sys_misc_mem_sz); - return res; -} - -/* - * reset the terminal to the original settings on exit - */ -void sys_tty_reset(int exit_code) -{ - if (using_oldshell && !replace_intr) { - SET_BLOCKING(0); - } - else if (isatty(0)) { - tcsetattr(0,TCSANOW,&initial_tty_mode); - } -} - -#ifdef USE_THREADS - -typedef struct { - int sched_bind_data; -} erts_thr_create_data_t; - -/* - * thr_create_prepare() is called in parent thread before thread creation. - * Returned value is passed as argument to thr_create_cleanup(). - */ -static void * -thr_create_prepare(void) -{ - erts_thr_create_data_t *tcdp; - - tcdp = erts_alloc(ERTS_ALC_T_TMP, sizeof(erts_thr_create_data_t)); - - tcdp->sched_bind_data = erts_sched_bind_atthrcreate_prepare(); - - return (void *) tcdp; -} - - -/* thr_create_cleanup() is called in parent thread after thread creation. */ -static void -thr_create_cleanup(void *vtcdp) -{ - erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp; - - erts_sched_bind_atthrcreate_parent(tcdp->sched_bind_data); - - erts_free(ERTS_ALC_T_TMP, tcdp); -} - -static void -thr_create_prepare_child(void *vtcdp) -{ - erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp; - -#ifdef ERTS_ENABLE_LOCK_COUNT - erts_lcnt_thread_setup(); -#endif - - erts_sched_bind_atthrcreate_child(tcdp->sched_bind_data); -} - -#endif /* #ifdef USE_THREADS */ - -/* The two functions below are stolen from win_con.c - They have to use malloc/free/realloc directly becasue - we want to do able to do erts_printf very early on. - */ -#define VPRINTF_BUF_INC_SIZE 128 -static erts_dsprintf_buf_t * -grow_vprintf_buf(erts_dsprintf_buf_t *dsbufp, size_t need) -{ - char *buf; - size_t size; - - ASSERT(dsbufp); - - if (!dsbufp->str) { - size = (((need + VPRINTF_BUF_INC_SIZE - 1) - / VPRINTF_BUF_INC_SIZE) - * VPRINTF_BUF_INC_SIZE); - buf = (char *) malloc(size * sizeof(char)); - } - else { - size_t free_size = dsbufp->size - dsbufp->str_len; - - if (need <= free_size) - return dsbufp; - - size = need - free_size + VPRINTF_BUF_INC_SIZE; - size = (((size + VPRINTF_BUF_INC_SIZE - 1) - / VPRINTF_BUF_INC_SIZE) - * VPRINTF_BUF_INC_SIZE); - size += dsbufp->size; - buf = (char *) realloc((void *) dsbufp->str, - size * sizeof(char)); - } - if (!buf) - return NULL; - if (buf != dsbufp->str) - dsbufp->str = buf; - dsbufp->size = size; - return dsbufp; -} - -static int erts_sys_ramlog_printf(char *format, va_list arg_list) -{ - int res,i; - erts_dsprintf_buf_t dsbuf = ERTS_DSPRINTF_BUF_INITER(grow_vprintf_buf); - res = erts_vdsprintf(&dsbuf, format, arg_list); - if (res >= 0) { - for (i = 0; i < dsbuf.str_len; i+= 50) - /* We print 50 characters at a time because otherwise - the ramlog looks broken */ - ramlog_printf("%.*s",dsbuf.str_len-50 < 0?dsbuf.str_len:50,dsbuf.str+i); - } - if (dsbuf.str) - free((void *) dsbuf.str); - return res; -} - -void -erts_sys_pre_init(void) -{ - erts_printf_add_cr_to_stdout = 1; - erts_printf_add_cr_to_stderr = 1; -#ifdef USE_THREADS - { - erts_thr_init_data_t eid = ERTS_THR_INIT_DATA_DEF_INITER; - - eid.thread_create_child_func = thr_create_prepare_child; - /* Before creation in parent */ - eid.thread_create_prepare_func = thr_create_prepare; - /* After creation in parent */ - eid.thread_create_parent_func = thr_create_cleanup, - - erts_thr_init(&eid); - - report_exit_list = NULL; - -#ifdef ERTS_ENABLE_LOCK_COUNT - erts_lcnt_init(); -#endif - -#if defined(ERTS_SMP) - erts_mtx_init(&chld_stat_mtx, "child_status"); -#endif - } -#ifdef ERTS_SMP - erts_smp_atomic32_init_nob(&erts_break_requested, 0); - erts_smp_atomic32_init_nob(&have_prepared_crash_dump, 0); -#else - erts_break_requested = 0; - have_prepared_crash_dump = 0; -#endif -#if !defined(ERTS_SMP) - children_died = 0; -#endif -#endif /* USE_THREADS */ - - erts_printf_stdout_func = erts_sys_ramlog_printf; - - erts_smp_atomic_init_nob(&sys_misc_mem_sz, 0); -} - -void -erl_sys_init(void) -{ - -#ifdef USE_SETLINEBUF - setlinebuf(stdout); -#else - setvbuf(stdout, (char *)NULL, _IOLBF, BUFSIZ); -#endif - - erts_sys_init_float(); - - /* we save this so the break handler can set and reset it properly */ - /* also so that we can reset on exit (break handler or not) */ - if (isatty(0)) { - tcgetattr(0,&initial_tty_mode); - } - tzset(); /* Required at least for NetBSD with localtime_r() */ -} - -static ERTS_INLINE int -prepare_crash_dump(int secs) -{ -#define NUFBUF (3) - int i, max; - char env[21]; /* enough to hold any 64-bit integer */ - size_t envsz; - /*DeclareTmpHeapNoproc(heap,NUFBUF);*/ - /*Eterm *hp = heap;*/ - /*Eterm list = NIL;*/ - int has_heart = 0; - - UseTmpHeapNoproc(NUFBUF); - - if (ERTS_PREPARED_CRASH_DUMP) - return 0; /* We have already been called */ - - - /* Positive secs means an alarm must be set - * 0 or negative means no alarm - * - * Set alarm before we try to write to a port - * we don't want to hang on a port write with - * no alarm. - * - */ - -#if 0 /*ose TBD!!!*/ - if (secs >= 0) { - alarm((unsigned int)secs); - } -#endif - - /* Make sure we unregister at epmd (unknown fd) and get at least - one free filedescriptor (for erl_crash.dump) */ - - max = max_files; - if (max < 1024) - max = 1024; - for (i = 3; i < max; i++) { - close(i); - } - - envsz = sizeof(env); - i = erts_sys_getenv__("ERL_CRASH_DUMP_NICE", env, &envsz); - if (i >= 0) { - int nice_val; - nice_val = i != 0 ? 0 : atoi(env); - if (nice_val > 39) { - nice_val = 39; - } - set_pri(nice_val); - } - - UnUseTmpHeapNoproc(NUFBUF); -#undef NUFBUF - return has_heart; -} - -int erts_sys_prepare_crash_dump(int secs) -{ - return prepare_crash_dump(secs); -} - -static ERTS_INLINE void -break_requested(void) -{ - /* - * just set a flag - checked for and handled by - * scheduler threads erts_check_io() (not signal handler). - */ -#ifdef DEBUG - fprintf(stderr,"break!\n"); -#endif - if (ERTS_BREAK_REQUESTED) - erl_exit(ERTS_INTR_EXIT, ""); - - ERTS_SET_BREAK_REQUESTED; - ERTS_CHK_IO_AS_INTR(); /* Make sure we don't sleep in poll */ -} - -/* Disable break */ -void erts_set_ignore_break(void) { - -} - -/* Don't use ctrl-c for break handler but let it be - used by the shell instead (see user_drv.erl) */ -void erts_replace_intr(void) { - struct termios mode; - - if (isatty(0)) { - tcgetattr(0, &mode); - - /* here's an example of how to replace ctrl-c with ctrl-u */ - /* mode.c_cc[VKILL] = 0; - mode.c_cc[VINTR] = CKILL; */ - - mode.c_cc[VINTR] = 0; /* disable ctrl-c */ - tcsetattr(0, TCSANOW, &mode); - replace_intr = 1; - } -} - -void init_break_handler(void) -{ - -} - -int sys_max_files(void) -{ - return(max_files); -} - - -/************************** OS info *******************************/ - -/* Used by erlang:info/1. */ -/* (This code was formerly in drv.XXX/XXX_os_drv.c) */ - -char os_type[] = "ose"; - -void -os_flavor(char* namebuf, /* Where to return the name. */ - unsigned size) /* Size of name buffer. */ -{ -#if 0 - struct utsname uts; /* Information about the system. */ - char* s; - - (void) uname(&uts); - for (s = uts.sysname; *s; s++) { - if (isupper((int) *s)) { - *s = tolower((int) *s); - } - } - strcpy(namebuf, uts.sysname); -#else - strncpy(namebuf, "release", size); -#endif -} - -void -os_version(pMajor, pMinor, pBuild) -int* pMajor; /* Pointer to major version. */ -int* pMinor; /* Pointer to minor version. */ -int* pBuild; /* Pointer to build number. */ -{ - *pMajor = 5; - *pMinor = 7; - *pBuild = 0; -} - -void init_getenv_state(GETENV_STATE *state) -{ - erts_smp_rwmtx_rlock(&environ_rwmtx); - *state = NULL; -} - -char **environ; /*ose - needs replacement*/ - -char *getenv_string(GETENV_STATE *state0) -{ - char **state = (char **) *state0; - char *cp; - - ERTS_SMP_LC_ASSERT(erts_smp_lc_rwmtx_is_rlocked(&environ_rwmtx)); - - if (state == NULL) - state = environ; - - cp = *state++; - *state0 = (GETENV_STATE) state; - - return cp; -} - -void fini_getenv_state(GETENV_STATE *state) -{ - *state = NULL; - erts_smp_rwmtx_runlock(&environ_rwmtx); -} - - -/************************** Port I/O *******************************/ - -/* I. Common stuff */ - -union SIGNAL { - SIGSELECT sig_no; - struct FmReadPtr fm_read_reply; - struct FmWritePtr fm_write_reply; - struct async async; -}; - -/* II. The spawn/fd drivers */ - -/* - * Decreasing the size of it below 16384 is not allowed. - */ -#define ERTS_SYS_READ_BUF_SZ (64*1024) - -/* Driver interfaces */ -static ErlDrvData spawn_start(ErlDrvPort, char*, SysDriverOpts*); -static ErlDrvData fd_start(ErlDrvPort, char*, SysDriverOpts*); -static ErlDrvSSizeT fd_control(ErlDrvData, unsigned int, char *, ErlDrvSizeT, - char **, ErlDrvSizeT); -static int spawn_init(void); -static void fd_stop(ErlDrvData); -static void erl_stop(ErlDrvData); -static void ready_input(ErlDrvData, ErlDrvEvent); -static void ready_output(ErlDrvData, ErlDrvEvent); -static void output(ErlDrvData, char*, ErlDrvSizeT); -static void stop_select(ErlDrvEvent, void*); - -static PROCESS -get_signal_proxy_pid(void) { - union SIGNAL *sig; - SIGSELECT any_sig[] = {1,ERTS_SIGNAL_OSE_DRV_ATTACH}; - - if (!sig_proxy_pid) { - sig = alloc(sizeof(union SIGNAL), ERTS_SIGNAL_OSE_DRV_ATTACH); - hunt("ose_signal_driver_proxy", 0, NULL, &sig); - sig = receive(any_sig); - sig_proxy_pid = sender(&sig); - free_buf(&sig); - } - ASSERT(sig_proxy_pid); - return sig_proxy_pid; -} - -static ErlDrvOseEventId -resolve_signal(union SIGNAL* sig) { - switch(sig->sig_no) { - - case FM_READ_PTR_REPLY: - return (ErlDrvOseEventId)sig->fm_read_reply.handle; - - case FM_WRITE_PTR_REPLY: - return (ErlDrvOseEventId)sig->fm_write_reply.handle; - - case ERTS_SIGNAL_OSE_DRV_ATTACH: - return (ErlDrvOseEventId)sig->async.target; - - default: - break; - } - return (ErlDrvOseEventId)-1; -} - -struct erl_drv_entry spawn_driver_entry = { - spawn_init, - spawn_start, - NULL, /* erl_stop, */ - output, - ready_input, - ready_output, - "spawn", - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - ERL_DRV_EXTENDED_MARKER, - ERL_DRV_EXTENDED_MAJOR_VERSION, - ERL_DRV_EXTENDED_MINOR_VERSION, - ERL_DRV_FLAG_USE_PORT_LOCKING, - NULL, NULL, - stop_select -}; -struct erl_drv_entry fd_driver_entry = { - NULL, - fd_start, - fd_stop, - output, - ready_input, - ready_output, - "fd", - NULL, - NULL, - fd_control, - NULL, - NULL, - NULL, /* ready_async */ - NULL, /* flush */ - NULL, /* call */ - NULL, /* event */ - ERL_DRV_EXTENDED_MARKER, - ERL_DRV_EXTENDED_MAJOR_VERSION, - ERL_DRV_EXTENDED_MINOR_VERSION, - 0, /* ERL_DRV_FLAGs */ - NULL, /* handle2 */ - NULL, /* process_exit */ - stop_select -}; - -static void -set_spawn_fd(int local_fd, int remote_fd, PROCESS remote_pid) { - PROCESS vm_pid; - FmHandle handle; - char env_val[55]; - char env_name[10]; - EfsStatus efs_res; - - /* get pid of pipevm and handle of chosen fd */ - efs_res = efs_examine_fd(local_fd, FLIB_FD_VMPID, &vm_pid, 0); - DEBUG_CHECK_RES(efs_res, EFS_SUCCESS); - - /* setup the file descriptor to buffer per line */ - efs_res = efs_config_fd(local_fd, FLIB_FD_BUFMODE, FM_BUFF_LINE, - FLIB_FD_BUFSIZE, 80, 0); - DEBUG_CHECK_RES(efs_res, EFS_SUCCESS); - - /* duplicate handle and set spawn pid owner */ - efs_res = efs_dup_to(local_fd, remote_pid, &handle); - DEBUG_CHECK_RES(efs_res, EFS_SUCCESS); - - sprintf(env_name, "FD%d", remote_fd); - - /* Syntax of the environment variable: - * "FD#" "<pid of pipevm>,<handle>,<buffer mode>,<buff size>,<omode>" */ - sprintf(env_val, "0x%lx,0x%lx,%lu,%lu,0x%x", - vm_pid, handle, - FM_BUFF_LINE, 80, - O_APPEND); - - set_env(remote_pid, env_name, env_val); -} - -static ErlDrvData -set_driver_data(ErlDrvPort port_num, - int ifd, - int ofd, - int packet_bytes, - int read_write, - int exit_status, - PROCESS pid) -{ - Port *prt; - ErtsSysReportExit *report_exit; - - prt = erts_drvport2port(port_num); - if (prt != ERTS_INVALID_ERL_DRV_PORT) { - prt->os_pid = pid; - } - - /* READ */ - if (read_write & DO_READ) { - EfsStatus res = efs_examine_fd(ifd, FLIB_FD_HANDLE, - &driver_data[ifd].handle, 0); - if (res != EFS_SUCCESS) - ramlog_printf("%s:%d: efs_examine_fd(%d) failed with %d\n", - __FILE__,__LINE__,ifd,errno); - driver_data[ifd].ifd = ifd; - driver_data[ifd].packet_bytes = packet_bytes; - driver_data[ifd].port_num = port_num; - driver_data[ifd].pid = pid; - - /* async read struct */ - memset(&driver_data[ifd].aiocb, 0, sizeof(struct aiocb)); - driver_data[ifd].aiocb.aio_buf = driver_alloc(AIO_PIPE_SIZE); - driver_data[ifd].aiocb.aio_fildes = ifd; - driver_data[ifd].aiocb.aio_nbytes = (packet_bytes?packet_bytes:AIO_PIPE_SIZE); - driver_data[ifd].alive = 1; - driver_data[ifd].status = 0; - driver_data[ifd].input_event = - erl_drv_ose_event_alloc(FM_READ_PTR_REPLY, - driver_data[ifd].handle, resolve_signal, - &driver_data[ifd].ifd); - - /* READ & WRITE */ - if (read_write & DO_WRITE) { - driver_data[ifd].ofd = ofd; - efs_examine_fd(ofd, FLIB_FD_HANDLE, &driver_data[ofd].handle, 0); - - driver_data[ifd].output_event = - erl_drv_ose_event_alloc(FM_WRITE_PTR_REPLY, - driver_data[ofd].handle, resolve_signal, - &driver_data[ofd].ofd); - driver_data[ofd].pid = pid; - if (ifd != ofd) { - driver_data[ofd] = driver_data[ifd]; - driver_data[ofd].aiocb.aio_buf = NULL; - } - } - else { /* READ ONLY */ - driver_data[ifd].ofd = -1; - } - - /* enable input event */ - (void) driver_select(port_num, driver_data[ifd].input_event, - (ERL_DRV_READ | ERL_DRV_USE), 1); - - if (aio_read(&driver_data[ifd].aiocb)) - ramlog_printf("%s:%d: aio_read(%d) failed with %d\n", - __FILE__,__LINE__,ifd,errno); - } - else { /* WRITE ONLY */ - efs_examine_fd(ofd, FLIB_FD_HANDLE, &driver_data[ofd].handle, 0); - driver_data[ofd].packet_bytes = packet_bytes; - driver_data[ofd].port_num = port_num; - driver_data[ofd].ofd = ofd; - driver_data[ofd].pid = pid; - driver_data[ofd].alive = 1; - driver_data[ofd].status = 0; - driver_data[ofd].output_event = - erl_drv_ose_event_alloc(FM_WRITE_PTR_REPLY, driver_data[ofd].handle, - resolve_signal, &driver_data[ofd].ofd); - driver_data[ofd].input_event = driver_data[ofd].output_event; - } - - /* this is used for spawned load modules, and is needed - * to properly uninstall them */ - if (exit_status) { - struct PmProgramInfo *info; - int install_handle_size; - union SIGNAL *sig; - PmStatus pm_status; - report_exit = erts_alloc(ERTS_ALC_T_PRT_REP_EXIT, - sizeof(ErtsSysReportExit)); - report_exit->next = report_exit_list; - report_exit->port = erts_drvport2id(port_num); - report_exit->pid = pid; - report_exit->ifd = (read_write & DO_READ) ? ifd : -1; - report_exit->ofd = (read_write & DO_WRITE) ? ofd : -1; - report_exit_list = report_exit; - report_exit->attach_event = - erl_drv_ose_event_alloc(ERTS_SIGNAL_OSE_DRV_ATTACH, pid, - resolve_signal, &driver_data[ifd].ifd); - - /* setup ifd and ofd report exit */ - driver_data[ifd].report_exit = report_exit; - driver_data[ofd].report_exit = report_exit; - - pm_status = ose_pm_program_info(pid, &info); - DEBUG_CHECK_RES(pm_status, PM_SUCCESS); - - install_handle_size = strlen(info->install_handle)+1; - driver_data[ifd].install_handle = driver_alloc(install_handle_size); - strcpy(driver_data[ifd].install_handle, - info->install_handle); - - free_buf((union SIGNAL **)&info); - - sig = alloc(sizeof(struct async), ERTS_SIGNAL_OSE_DRV_ATTACH); - sig->async.target = pid; - send(&sig, get_signal_proxy_pid()); - - /* this event will trigger when we receive an attach signal - * from the recently dead load module */ - (void)driver_select(port_num,report_exit->attach_event, DO_READ, 1); - } - else { - report_exit = NULL; - } - - /* the return value is the pointer to the driver_data struct we created - * in this function, it will be used in the drivers input - * and output functions */ - return (ErlDrvData)((!(read_write & DO_READ) && read_write & DO_WRITE) - ? &driver_data[ofd] - : &driver_data[ifd]); -} - -static int spawn_init() -{ - int i; - - driver_data = (struct driver_data *) - erts_alloc(ERTS_ALC_T_DRV_TAB, max_files * sizeof(struct driver_data)); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, - max_files * sizeof(struct driver_data)); - - for (i = 0; i < max_files; i++) - driver_data[i].pid = -1; - - return 1; -} - -static void -init_fd_data(int fd, ErlDrvPort port_num) -{ - fd_data[fd].buf = NULL; - fd_data[fd].cpos = NULL; - fd_data[fd].remain = 0; - fd_data[fd].sz = 0; - fd_data[fd].psz = 0; -} - -/* FIXME write a decent text on pipes on ose */ -static ErlDrvData -spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* opts) -{ - int ifd[2]; - int ofd[2]; - static uint32_t ticker = 1; - PmStatus pm_status; - OSDOMAIN domain = PM_NEW_DOMAIN; - PROCESS progpid, mainbid, mainpid; - char *handle = NULL; - struct PmProgramInfo *info; - char *args = NULL; - char *tmp_handle; - ErlDrvData res = (ErlDrvData)-1; - int handle_size; - char *ptr; - - - args = driver_alloc(strlen(name)+1); - strcpy(args, name); - /* We need to handle name in three parts - * - install handle (must be unique) - * - install binary (needed for ose_pm_install_load_module()) - * - full path (as argument to the spawned applications env.var - */ - - /* full path including arguments */ - args = driver_alloc(strlen(name)+1); - strcpy(args, name); - - /* handle path */ - tmp_handle = strrchr(name, '/'); - if (tmp_handle == NULL) { - tmp_handle = name; - } - else { - tmp_handle++; - } - - /* handle args */ - ptr = strchr(tmp_handle, ' '); - if (ptr != NULL) { - *ptr = '\0'; - handle_size = ptr - tmp_handle; - } - else { - handle_size = strlen(name)+1; - } - - /* make room for ticker */ - handle_size += (ticker<10)?3:((ticker<100)?4:5); - handle = driver_alloc(handle_size); - - do { - snprintf(handle, handle_size, "%s_%d", tmp_handle, ticker); - pm_status = ose_pm_install_load_module(0, "ELF", name, handle, - 0, 0, NULL); - ticker++; - } while (pm_status == PM_EINSTALL_HANDLE_ALREADY_INSTALLED); - - if (pm_status != PM_SUCCESS) { - errno = ENOSYS; /* FIXME add comment */ - return ERL_DRV_ERROR_ERRNO; - } - - /* Create Program */ - pm_status = ose_pm_create_program(&domain, handle, 0, 0, - NULL, &progpid, &mainbid); - DEBUG_CHECK_RES(pm_status, PM_SUCCESS); - - /* Get the mainpid from the newly created program */ - pm_status = ose_pm_program_info(progpid, &info); - DEBUG_CHECK_RES(pm_status, PM_SUCCESS); - - mainpid = info->main_process; - free_buf ((union SIGNAL **)&info); - - /* pipevm needs to be started - * pipe will return 0 if success, -1 if not, - * errno will be set */ - if (pipe(ifd) != 0 || pipe(ofd) != 0) { - DEBUG_CHECK_RES(0, -1); - ASSERT(0); - } - - /* setup driver data */ - res = set_driver_data(port_num, ofd[0], ifd[1], opts->packet_bytes, - opts->read_write, 1 /* opts->exit_status */, progpid); - - /* init the fd_data array for read/write */ - init_fd_data(ofd[0], port_num); - init_fd_data(ifd[1], port_num); - - /* setup additional configurations - * for the spawned applications environment */ - if (args != NULL) { - set_env(progpid, "ARGV", args); - } - set_env(mainbid, "EFS_RESOLVE_TMO", 0); - set_spawn_fd(ifd[0], 0, mainpid); - set_spawn_fd(ofd[1], 1, mainpid); - set_spawn_fd(ofd[1], 2, mainpid); - - /* start the spawned program */ - pm_status = ose_pm_start_program(mainbid); - DEBUG_CHECK_RES(pm_status, PM_SUCCESS); - - /* close unused fd's */ - close(ifd[0]); - close(ofd[1]); - - if (handle) { - driver_free(handle); - } - - return (ErlDrvData)res; -} - -#define FD_DEF_HEIGHT 24 -#define FD_DEF_WIDTH 80 -/* Control op */ -#define FD_CTRL_OP_GET_WINSIZE 100 - -static int fd_get_window_size(int fd, Uint32 *width, Uint32 *height) -{ -#ifdef TIOCGWINSZ - struct winsize ws; - if (ioctl(fd,TIOCGWINSZ,&ws) == 0) { - *width = (Uint32) ws.ws_col; - *height = (Uint32) ws.ws_row; - return 0; - } -#endif - return -1; -} - -static ErlDrvSSizeT fd_control(ErlDrvData drv_data, - unsigned int command, - char *buf, ErlDrvSizeT len, - char **rbuf, ErlDrvSizeT rlen) -{ - struct driver_data *data = (struct driver_data *)drv_data; - char resbuff[2*sizeof(Uint32)]; - switch (command) { - case FD_CTRL_OP_GET_WINSIZE: - { - Uint32 w,h; - if (fd_get_window_size(data->ifd,&w,&h)) - return 0; - memcpy(resbuff,&w,sizeof(Uint32)); - memcpy(resbuff+sizeof(Uint32),&h,sizeof(Uint32)); - } - break; - default: - return 0; - } - if (rlen < 2*sizeof(Uint32)) { - *rbuf = driver_alloc(2*sizeof(Uint32)); - } - memcpy(*rbuf,resbuff,2*sizeof(Uint32)); - return 2*sizeof(Uint32); -} - -static ErlDrvData fd_start(ErlDrvPort port_num, char* name, - SysDriverOpts* opts) -{ - ErlDrvData res; - - CHLD_STAT_LOCK; - if (opts->read_write & DO_READ) { - init_fd_data(opts->ifd, port_num); - } - if (opts->read_write & DO_WRITE) { - init_fd_data(opts->ofd, port_num); - } - res = set_driver_data(port_num, opts->ifd, opts->ofd, - opts->packet_bytes, - opts->read_write, 0, -1); - CHLD_STAT_UNLOCK; - return res; -} - -static void clear_fd_data(int fd) -{ - if (fd_data[fd].sz > 0) { - erts_free(ERTS_ALC_T_FD_ENTRY_BUF, (void *) fd_data[fd].buf); - ASSERT(erts_smp_atomic_read_nob(&sys_misc_mem_sz) >= fd_data[fd].sz); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*fd_data[fd].sz); - } - fd_data[fd].buf = NULL; - fd_data[fd].sz = 0; - fd_data[fd].remain = 0; - fd_data[fd].cpos = NULL; - fd_data[fd].psz = 0; -} - -static void nbio_stop_fd(ErlDrvPort prt, ErlDrvEvent ev) -{ - int *fd; - driver_select(prt,ev,DO_READ|DO_WRITE,0); - erl_drv_ose_event_fetch(ev, NULL, NULL, (void **)&fd); - clear_fd_data(*fd); - SET_BLOCKING(*fd); -} - -static void fd_stop(ErlDrvData drv_data) /* Does not close the fds */ -{ - struct driver_data *data = (struct driver_data *)drv_data; - - if (data->ofd != -1) { - if (data->ifd != data->ofd) { /* read and write */ - nbio_stop_fd(data->port_num, data->input_event); - nbio_stop_fd(data->port_num, data->output_event); - } - else { /* write only */ - nbio_stop_fd(data->port_num, data->output_event); - } - } - else { /* read only */ - nbio_stop_fd(data->port_num, data->input_event); - } -} - - -static void erl_stop(ErlDrvData drv_data) -{ - struct driver_data *data = (struct driver_data *)drv_data; - - CHLD_STAT_LOCK; - data->pid = -1; - CHLD_STAT_UNLOCK; - - if (data->ofd != -1) { - if (data->ifd != data->ofd) { /* read and write */ - nbio_stop_fd(data->port_num, data->input_event); - nbio_stop_fd(data->port_num, data->output_event); - } - else { /* write only */ - nbio_stop_fd(data->port_num, data->output_event); - } - } - else { /* read only */ - nbio_stop_fd(data->port_num, data->input_event); - } - close(data->ifd); - close(data->ofd); -} - -/* The parameter e is a pointer to the driver_data structure - * related to the fd to be used as output */ -static void output(ErlDrvData drv_data, char* buf, ErlDrvSizeT len) -{ - ErlDrvSizeT sz; - char lb[4]; - char* lbp; - struct driver_data *data = (struct driver_data *)drv_data; - - if (((data->packet_bytes == 2) && - (len > 0xffff)) || (data->packet_bytes == 1 && len > 0xff)) { - driver_failure_posix(data->port_num, EINVAL); - return; /* -1; */ - } - put_int32(len, lb); - lbp = lb + (4-(data->packet_bytes)); - - if ((sz = driver_sizeq(data->port_num)) > 0) { - if (data->packet_bytes != 0) { - driver_enq(data->port_num, lbp, data->packet_bytes); - } - driver_enq(data->port_num, buf, len); - - if (sz + len + data->packet_bytes >= (1 << 13)) - set_busy_port(data->port_num, 1); - } - else { - char *pbbuf; - if (data->packet_bytes != 0) { - pbbuf = malloc(len + data->packet_bytes); - int i; - for (i = 0; i < data->packet_bytes; i++) { - *pbbuf++ = *lbp++; - } - strncpy(pbbuf, buf, len); - pbbuf -= data->packet_bytes; - } - driver_select(data->port_num, data->output_event, - ERL_DRV_WRITE|ERL_DRV_USE, 1); - WRITE_AIO(data->ofd, - (data->packet_bytes ? len+data->packet_bytes : len), - (data->packet_bytes ? pbbuf : buf)); - if (data->packet_bytes != 0) free(pbbuf); - } - return; /* 0; */ -} - -/* This function is being run when we in recieve - * either a read of 0 bytes, or the attach signal from a dying - * spawned load module */ -static int port_inp_failure(ErlDrvPort port_num, ErlDrvEvent ready_fd, int res) - /* Result: 0 (eof) or -1 (error) */ -{ - int *fd; - SIGSELECT sig_no; - ASSERT(res <= 0); - - erl_drv_ose_event_fetch(ready_fd,&sig_no, NULL, (void **)&fd); - /* As we need to handle two signals, we do this in two steps */ - if (driver_data[*fd].alive) { - report_exit_status(driver_data[*fd].report_exit, 0); /* status? */ - } - else { - driver_select(port_num,ready_fd,DO_READ|DO_WRITE,0); - clear_fd_data(*fd); - driver_report_exit(driver_data[*fd].port_num, driver_data[*fd].status); - /* As we do not really know if the spawn has crashed or exited nicely - * we do not check the result status of the following call.. FIXME - * can we handle this in a better way? */ - ose_pm_uninstall_load_module(driver_data[*fd].install_handle); - driver_free(driver_data[*fd].install_handle); - driver_free((void *)driver_data[*fd].aiocb.aio_buf); - - close(*fd); - } - - return 0; -} - -/* The parameter e is a pointer to the driver_data structure - * related to the fd to be used as output. - * ready_fd is the event that triggered this call to ready_input */ -static void ready_input(ErlDrvData drv_data, ErlDrvEvent ready_fd) -{ - int res; - Uint h; - char *buf; - union SIGNAL *sig; - struct driver_data *data = (struct driver_data *)drv_data; - - sig = erl_drv_ose_get_signal(ready_fd); - ASSERT(sig); - - - while (sig) { - /* If we've recieved an attach signal, we need to handle - * it in port_inp_failure */ - if (sig->sig_no == ERTS_SIGNAL_OSE_DRV_ATTACH) { - port_inp_failure(data->port_num, ready_fd, 0); - } - else { - res = sig->fm_read_reply.actual; - if (res == 0) { - port_inp_failure(data->port_num, ready_fd, res); - break; - } - - if (data->packet_bytes == 0) { - if (res < 0) { - if ((errno != EINTR) && (errno != ERRNO_BLOCK)) { - port_inp_failure(data->port_num, ready_fd, res); - } - } - else if (res == 0) { - /* read of 0 bytes, eof, otherside of pipe is assumed dead */ - port_inp_failure(data->port_num, ready_fd, res); - break; - } - else { - buf = driver_alloc(res); - memcpy(buf, (void *)data->aiocb.aio_buf, res); - driver_select(data->port_num, data->output_event, - ERL_DRV_WRITE|ERL_DRV_USE, 1); - driver_output(data->port_num, (char*) buf, res); - driver_free(buf); - } - /* clear the previous read */ - memset(data->aiocb.aio_buf, 0, res); - - /* issue a new read */ - DISPATCH_AIO(sig); - aio_read(&data->aiocb); - } - else if (data->packet_bytes && fd_data[data->ifd].remain > 0) { - /* we've read a partial package, or a header */ - - if (res == fd_data[data->ifd].remain) { /* we are done! */ - char *buf = data->aiocb.aio_buf; - int i; - - /* do we have anything buffered? */ - if (fd_data[data->ifd].buf != NULL) { - memcpy(fd_data[data->ifd].buf + fd_data[data->ifd].sz, - buf, res); - buf = fd_data[data->ifd].buf; - } - - fd_data[data->ifd].sz += res; - driver_output(data->port_num, buf, (fd_data[data->ifd].sz>0?fd_data[data->ifd].sz:res)); - clear_fd_data(data->ifd); - - /* clear the previous read */ - memset(data->aiocb.aio_buf, 0, res); - - /* issue a new read */ - DISPATCH_AIO(sig); - data->aiocb.aio_nbytes = data->packet_bytes; - - if (data->aiocb.aio_buf == NULL) { - port_inp_failure(data->port_num, ready_fd, -1); - } - aio_read(&data->aiocb); - } - else if(res < fd_data[data->ifd].remain) { /* received part of a package */ - if (fd_data[data->ifd].sz == 0) { - - fd_data[data->ifd].sz += res; - memcpy(fd_data[data->ifd].buf, data->aiocb.aio_buf, res); - fd_data[data->ifd].remain -= res; - } - else { - memcpy(fd_data[data->ifd].buf + fd_data[data->ifd].sz, - data->aiocb.aio_buf, res); - fd_data[data->ifd].sz += res; - fd_data[data->ifd].remain -= res; - } - /* clear the previous read */ - memset(data->aiocb.aio_buf, 0, res); - - /* issue a new read */ - DISPATCH_AIO(sig); - data->aiocb.aio_nbytes = fd_data[data->ifd].remain; - - if (data->aiocb.aio_buf == NULL) { - port_inp_failure(data->port_num, ready_fd, -1); - } - aio_read(&data->aiocb); - } - } - else if (data->packet_bytes && fd_data[data->ifd].remain == 0) { /* we've recieved a header */ - - /* analyze the header FIXME */ - switch (data->packet_bytes) { - case 1: h = get_int8(data->aiocb.aio_buf); break; - case 2: h = get_int16(data->aiocb.aio_buf); break; - case 4: h = get_int32(data->aiocb.aio_buf); break; - } - - fd_data[data->ifd].buf = erts_alloc_fnf(ERTS_ALC_T_FD_ENTRY_BUF, h + data->packet_bytes); - fd_data[data->ifd].remain = ((h + data->packet_bytes) - res); - - /* clear the previous read */ - memset(data->aiocb.aio_buf, 0, data->packet_bytes); - - /* issue a new read */ - DISPATCH_AIO(sig); - data->aiocb.aio_nbytes = h; - - if (data->aiocb.aio_buf == NULL) { - port_inp_failure(data->port_num, ready_fd, -1); - } - aio_read(&data->aiocb); - } - } - sig = erl_drv_ose_get_signal(ready_fd); - } -} - - -/* The parameter e is a pointer to the driver_data structure - * related to the fd to be used as output. - * ready_fd is the event that triggered this call to ready_input */ -static void ready_output(ErlDrvData drv_data, ErlDrvEvent ready_fd) -{ - SysIOVec *iov; - int vlen; - int res; - union SIGNAL *sig; - struct driver_data *data = (struct driver_data *)drv_data; - - sig = erl_drv_ose_get_signal(ready_fd); - ASSERT(sig); - - while (sig != NULL) { - if (sig->fm_write_reply.actual <= 0) { - int status; - - status = efs_status_to_errno(sig->fm_write_reply.status); - driver_select(data->port_num, ready_fd, ERL_DRV_WRITE, 0); - DISPATCH_AIO(sig); - FREE_AIO(sig->fm_write_reply.buffer); - - driver_failure_posix(data->port_num, status); - } - else { /* written bytes > 0 */ - iov = driver_peekq(data->port_num, &vlen); - if (vlen > 0) { - DISPATCH_AIO(sig); - FREE_AIO(sig->fm_write_reply.buffer); - res = driver_deq(data->port_num, iov[0].iov_len); - if (res > 0) { - iov = driver_peekq(data->port_num, &vlen); - WRITE_AIO(data->ofd, iov[0].iov_len, iov[0].iov_base); - } - } - else if (vlen == 0) { - DISPATCH_AIO(sig); - FREE_AIO(sig->fm_write_reply.buffer); - } - - } - sig = erl_drv_ose_get_signal(ready_fd); - } -} - -static void stop_select(ErlDrvEvent ready_fd, void* _) -{ - int *fd; - erl_drv_ose_event_fetch(ready_fd, NULL, NULL, (void **)&fd); - erl_drv_ose_event_free(ready_fd); - close(*fd); -} - - -void erts_do_break_handling(void) -{ - struct termios temp_mode; - int saved = 0; - - /* - * Most functions that do_break() calls are intentionally not thread safe; - * therefore, make sure that all threads but this one are blocked before - * proceeding! - */ - erts_smp_thr_progress_block(); - - /* during break we revert to initial settings */ - /* this is done differently for oldshell */ - if (using_oldshell && !replace_intr) { - SET_BLOCKING(1); - } - else if (isatty(0)) { - tcgetattr(0,&temp_mode); - tcsetattr(0,TCSANOW,&initial_tty_mode); - saved = 1; - } - - /* call the break handling function, reset the flag */ - do_break(); - - fflush(stdout); - - /* after break we go back to saved settings */ - if (using_oldshell && !replace_intr) { - SET_NONBLOCKING(1); - } - else if (saved) { - tcsetattr(0,TCSANOW,&temp_mode); - } - - erts_smp_thr_progress_unblock(); -} - -static pid_t -getpid(void) -{ - return get_bid(current_process()); -} - -int getpagesize(void) -{ - return 1024; -} - - -/* Fills in the systems representation of the jam/beam process identifier. -** The Pid is put in STRING representation in the supplied buffer, -** no interpretatione of this should be done by the rest of the -** emulator. The buffer should be at least 21 bytes long. -*/ -void sys_get_pid(char *buffer, size_t buffer_size){ - pid_t p = getpid(); - /* Assume the pid is scalar and can rest in an unsigned long... */ - erts_snprintf(buffer, buffer_size, "%lu",(unsigned long) p); -} - -int -erts_sys_putenv_raw(char *key, char *value) { - return erts_sys_putenv(key, value); -} -int -erts_sys_putenv(char *key, char *value) -{ - int res; - - erts_smp_rwmtx_rwlock(&environ_rwmtx); - res = set_env(get_bid(current_process()), key, - value); - erts_smp_rwmtx_rwunlock(&environ_rwmtx); - return res; -} - - -int -erts_sys_unsetenv(char *key) -{ - int res; - - erts_smp_rwmtx_rwlock(&environ_rwmtx); - res = set_env(get_bid(current_process()),key,NULL); - erts_smp_rwmtx_rwunlock(&environ_rwmtx); - - return res; -} - -int -erts_sys_getenv__(char *key, char *value, size_t *size) -{ - int res; - char *orig_value = get_env(get_bid(current_process()), key); - if (!orig_value) - res = -1; - else { - size_t len = sys_strlen(orig_value); - if (len >= *size) { - *size = len + 1; - res = 1; - } - else { - *size = len; - sys_memcpy((void *) value, (void *) orig_value, len+1); - res = 0; - } - free_buf((union SIGNAL **)&orig_value); - } - return res; -} - -int -erts_sys_getenv_raw(char *key, char *value, size_t *size) { - return erts_sys_getenv(key, value, size); -} - -/* - * erts_sys_getenv - * returns: - * -1, if environment key is not set with a value - * 0, if environment key is set and value fits into buffer res - * 1, if environment key is set but does not fit into buffer res - * res is set with the needed buffer res value - */ - -int -erts_sys_getenv(char *key, char *value, size_t *size) -{ - int res; - erts_smp_rwmtx_rlock(&environ_rwmtx); - res = erts_sys_getenv__(key, value, size); - erts_smp_rwmtx_runlock(&environ_rwmtx); - return res; -} - -void -sys_init_io(void) -{ - fd_data = (struct fd_data *) - erts_alloc(ERTS_ALC_T_FD_TAB, max_files * sizeof(struct fd_data)); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, - max_files * sizeof(struct fd_data)); -} - -extern const char pre_loaded_code[]; -extern Preload pre_loaded[]; - -void erts_sys_alloc_init(void) -{ -} - -void *erts_sys_alloc(ErtsAlcType_t t, void *x, Uint sz) -{ - void *res = malloc((size_t) sz); -#if HAVE_ERTS_MSEG - if (!res) { - erts_mseg_clear_cache(); - return malloc((size_t) sz); - } -#endif - return res; -} - -void *erts_sys_realloc(ErtsAlcType_t t, void *x, void *p, Uint sz) -{ - void *res = realloc(p, (size_t) sz); -#if HAVE_ERTS_MSEG - if (!res) { - erts_mseg_clear_cache(); - return realloc(p, (size_t) sz); - } -#endif - return res; -} - -void erts_sys_free(ErtsAlcType_t t, void *x, void *p) -{ - free(p); -} - -/* Return a pointer to a vector of names of preloaded modules */ - -Preload* -sys_preloaded(void) -{ - return pre_loaded; -} - -/* Return a pointer to preloaded code for module "module" */ -unsigned char* -sys_preload_begin(Preload* p) -{ - return p->code; -} - -/* Clean up if allocated */ -void sys_preload_end(Preload* p) -{ - /* Nothing */ -} - -/* Read a key from console (?) */ - -int sys_get_key(fd) -int fd; -{ - int c; - unsigned char rbuf[64]; - - fflush(stdout); /* Flush query ??? */ - - if ((c = read(fd,rbuf,64)) <= 0) { - return c; - } - - return rbuf[0]; -} - - -#ifdef DEBUG - -extern int erts_initialized; -void -erl_assert_error(const char* expr, const char* func, - const char* file, int line) -{ - fflush(stdout); - fprintf(stderr, "%s:%d:%s() Assertion failed: %s\n", - file, line, func, expr); - fflush(stderr); - ramlog_printf("%s:%d:%s() Assertion failed: %s\n", - file, line, func, expr); - - abort(); -} - -void -erl_debug(char* fmt, ...) -{ - char sbuf[1024]; /* Temporary buffer. */ - va_list va; - - if (debug_log) { - va_start(va, fmt); - vsprintf(sbuf, fmt, va); - va_end(va); - fprintf(stderr, "%s", sbuf); - } -} - -#endif /* DEBUG */ - -static ERTS_INLINE void -report_exit_status(ErtsSysReportExit *rep, int status) -{ - if (rep->ifd >= 0) { - driver_data[rep->ifd].alive = 0; - driver_data[rep->ifd].status = status; - } - if (rep->ofd >= 0) { - driver_data[rep->ofd].alive = 0; - driver_data[rep->ofd].status = status; - } - - erts_free(ERTS_ALC_T_PRT_REP_EXIT, rep); -} - -#define ERTS_REPORT_EXIT_STATUS report_exit_status - -/* - * Called from schedule() when it runs out of runnable processes, - * or when Erlang code has performed INPUT_REDUCTIONS reduction - * steps. runnable == 0 iff there are no runnable Erlang processes. - */ -void -erl_sys_schedule(int runnable) -{ - ASSERT(get_fsem(current_process()) == 0); -#ifdef ERTS_SMP - ASSERT(erts_get_scheduler_data()->no == 1); - ERTS_CHK_IO(!runnable); -#else - ERTS_CHK_IO( 1 ); -#endif - ASSERT(get_fsem(current_process()) == 0); - ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking()); -} - - -#ifdef ERTS_SMP - -void -erts_sys_main_thread(void) -{ - erts_thread_disable_fpe(); - - /* Become signal receiver thread... */ -#ifdef ERTS_ENABLE_LOCK_CHECK - erts_lc_set_thread_name("signal_receiver"); -#endif - - while (1) { - static const SIGSELECT sigsel[] = {0}; - union SIGNAL *msg = receive(sigsel); - - fprintf(stderr,"Main thread got message %d from 0x%x!!\r\n", - msg->sig_no, sender(&msg)); - free_buf(&msg); - } -} - -#endif /* ERTS_SMP */ - -void -erl_sys_args(int* argc, char** argv) -{ - int i, j; - - erts_smp_rwmtx_init(&environ_rwmtx, "environ"); - - init_check_io(); - - /* Handled arguments have been marked with NULL. Slide arguments - not handled towards the beginning of argv. */ - for (i = 0, j = 0; i < *argc; i++) { - if (argv[i]) - argv[j++] = argv[i]; - } - *argc = j; - -} diff --git a/erts/emulator/sys/ose/sys_float.c b/erts/emulator/sys/ose/sys_float.c deleted file mode 100644 index 3d9abc6bd1..0000000000 --- a/erts/emulator/sys/ose/sys_float.c +++ /dev/null @@ -1,845 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 2001-2013. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - */ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include "sys.h" -#include "global.h" -#include "erl_process.h" - - -#ifdef NO_FPE_SIGNALS - -void -erts_sys_init_float(void) -{ -# ifdef SIGFPE - sys_sigset(SIGFPE, SIG_IGN); /* Ignore so we can test for NaN and Inf */ -# endif -} - -#else /* !NO_FPE_SIGNALS */ - -#ifdef ERTS_SMP -static erts_tsd_key_t fpe_key; - -/* once-only initialisation early in the main thread (via erts_sys_init_float()) */ -static void erts_init_fp_exception(void) -{ - /* XXX: the wrappers prevent using a pthread destructor to - deallocate the key's value; so when/where do we do that? */ - erts_tsd_key_create(&fpe_key); -} - -void erts_thread_init_fp_exception(void) -{ - unsigned long *fpe = erts_alloc(ERTS_ALC_T_FP_EXCEPTION, sizeof(*fpe)); - *fpe = 0L; - erts_tsd_set(fpe_key, fpe); -} - -static ERTS_INLINE volatile unsigned long *erts_thread_get_fp_exception(void) -{ - return (volatile unsigned long*)erts_tsd_get(fpe_key); -} -#else /* !SMP */ -#define erts_init_fp_exception() /*empty*/ -static volatile unsigned long fp_exception; -#define erts_thread_get_fp_exception() (&fp_exception) -#endif /* SMP */ - -volatile unsigned long *erts_get_current_fp_exception(void) -{ - Process *c_p; - - c_p = erts_get_current_process(); - if (c_p) - return &c_p->fp_exception; - return erts_thread_get_fp_exception(); -} - -static void set_current_fp_exception(unsigned long pc) -{ - volatile unsigned long *fpexnp = erts_get_current_fp_exception(); - ASSERT(fpexnp != NULL); - *fpexnp = pc; -} - -void erts_fp_check_init_error(volatile unsigned long *fpexnp) -{ - char buf[64]; - snprintf(buf, sizeof buf, "ERTS_FP_CHECK_INIT at %p: detected unhandled FPE at %p\r\n", - __builtin_return_address(0), (void*)*fpexnp); - if (write(2, buf, strlen(buf)) <= 0) - erl_exit(ERTS_ABORT_EXIT, "%s", buf); - *fpexnp = 0; -#if defined(__i386__) || defined(__x86_64__) - erts_restore_fpu(); -#endif -} - -/* Is there no standard identifier for Darwin/MacOSX ? */ -#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__) -#define __DARWIN__ 1 -#endif - -#if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__) - -static void unmask_x87(void) -{ - unsigned short cw; - - __asm__ __volatile__("fstcw %0" : "=m"(cw)); - cw &= ~(0x01|0x04|0x08); /* unmask IM, ZM, OM */ - __asm__ __volatile__("fldcw %0" : : "m"(cw)); -} - -/* mask x87 FPE, return true if the previous state was unmasked */ -static int mask_x87(void) -{ - unsigned short cw; - int unmasked; - - __asm__ __volatile__("fstcw %0" : "=m"(cw)); - unmasked = (cw & (0x01|0x04|0x08)) == 0; - /* or just set cw = 0x37f */ - cw |= (0x01|0x04|0x08); /* mask IM, ZM, OM */ - __asm__ __volatile__("fldcw %0" : : "m"(cw)); - return unmasked; -} - -static void unmask_sse2(void) -{ - unsigned int mxcsr; - - __asm__ __volatile__("stmxcsr %0" : "=m"(mxcsr)); - mxcsr &= ~(0x003F|0x0680); /* clear exn flags, unmask OM, ZM, IM (not PM, UM, DM) */ - __asm__ __volatile__("ldmxcsr %0" : : "m"(mxcsr)); -} - -/* mask SSE2 FPE, return true if the previous state was unmasked */ -static int mask_sse2(void) -{ - unsigned int mxcsr; - int unmasked; - - __asm__ __volatile__("stmxcsr %0" : "=m"(mxcsr)); - unmasked = (mxcsr & 0x0680) == 0; - /* or just set mxcsr = 0x1f80 */ - mxcsr &= ~0x003F; /* clear exn flags */ - mxcsr |= 0x0680; /* mask OM, ZM, IM (not PM, UM, DM) */ - __asm__ __volatile__("ldmxcsr %0" : : "m"(mxcsr)); - return unmasked; -} - -#if defined(__x86_64__) - -static inline int cpu_has_sse2(void) { return 1; } - -#else /* !__x86_64__ */ - -/* - * Check if an x86-32 processor has SSE2. - */ -static unsigned int xor_eflags(unsigned int mask) -{ - unsigned int eax, edx; - - eax = mask; /* eax = mask */ - __asm__("pushfl\n\t" - "popl %0\n\t" /* edx = original EFLAGS */ - "xorl %0, %1\n\t" /* eax = mask ^ EFLAGS */ - "pushl %1\n\t" - "popfl\n\t" /* new EFLAGS = mask ^ original EFLAGS */ - "pushfl\n\t" - "popl %1\n\t" /* eax = new EFLAGS */ - "xorl %0, %1\n\t" /* eax = new EFLAGS ^ old EFLAGS */ - "pushl %0\n\t" - "popfl" /* restore original EFLAGS */ - : "=d"(edx), "=a"(eax) - : "1"(eax)); - return eax; -} - -static __inline__ unsigned int cpuid_eax(unsigned int op) -{ - unsigned int eax, save_ebx; - - /* In PIC mode i386 reserves EBX. So we must save - and restore it ourselves to not upset gcc. */ - __asm__( - "movl %%ebx, %1\n\t" - "cpuid\n\t" - "movl %1, %%ebx" - : "=a"(eax), "=m"(save_ebx) - : "0"(op) - : "cx", "dx"); - return eax; -} - -static __inline__ unsigned int cpuid_edx(unsigned int op) -{ - unsigned int eax, edx, save_ebx; - - /* In PIC mode i386 reserves EBX. So we must save - and restore it ourselves to not upset gcc. */ - __asm__( - "movl %%ebx, %2\n\t" - "cpuid\n\t" - "movl %2, %%ebx" - : "=a"(eax), "=d"(edx), "=m"(save_ebx) - : "0"(op) - : "cx"); - return edx; -} - -/* The AC bit, bit #18, is a new bit introduced in the EFLAGS - * register on the Intel486 processor to generate alignment - * faults. This bit cannot be set on the Intel386 processor. - */ -static __inline__ int is_386(void) -{ - return ((xor_eflags(1<<18) >> 18) & 1) == 0; -} - -/* Newer x86 processors have a CPUID instruction, as indicated by - * the ID bit (#21) in EFLAGS being modifiable. - */ -static __inline__ int has_CPUID(void) -{ - return (xor_eflags(1<<21) >> 21) & 1; -} - -static int cpu_has_sse2(void) -{ - unsigned int maxlev, features; - static int has_sse2 = -1; - - if (has_sse2 >= 0) - return has_sse2; - has_sse2 = 0; - - if (is_386()) - return 0; - if (!has_CPUID()) - return 0; - maxlev = cpuid_eax(0); - /* Intel A-step Pentium had a preliminary version of CPUID. - It also didn't have SSE2. */ - if ((maxlev & 0xFFFFFF00) == 0x0500) - return 0; - /* If max level is zero then CPUID cannot report any features. */ - if (maxlev == 0) - return 0; - features = cpuid_edx(1); - has_sse2 = (features & (1 << 26)) != 0; - - return has_sse2; -} -#endif /* !__x86_64__ */ - -static void unmask_fpe(void) -{ - __asm__ __volatile__("fnclex"); - unmask_x87(); - if (cpu_has_sse2()) - unmask_sse2(); -} - -static void unmask_fpe_conditional(int unmasked) -{ - if (unmasked) - unmask_fpe(); -} - -/* mask x86 FPE, return true if the previous state was unmasked */ -static int mask_fpe(void) -{ - int unmasked; - - unmasked = mask_x87(); - if (cpu_has_sse2()) - unmasked |= mask_sse2(); - return unmasked; -} - -void erts_restore_fpu(void) -{ - __asm__ __volatile__("fninit"); - unmask_x87(); - if (cpu_has_sse2()) - unmask_sse2(); -} - -#elif defined(__sparc__) && defined(__linux__) - -#if defined(__arch64__) -#define LDX "ldx" -#define STX "stx" -#else -#define LDX "ld" -#define STX "st" -#endif - -static void unmask_fpe(void) -{ - unsigned long fsr; - - __asm__(STX " %%fsr, %0" : "=m"(fsr)); - fsr &= ~(0x1FUL << 23); /* clear FSR[TEM] field */ - fsr |= (0x1AUL << 23); /* enable NV, OF, DZ exceptions */ - __asm__ __volatile__(LDX " %0, %%fsr" : : "m"(fsr)); -} - -static void unmask_fpe_conditional(int unmasked) -{ - if (unmasked) - unmask_fpe(); -} - -/* mask SPARC FPE, return true if the previous state was unmasked */ -static int mask_fpe(void) -{ - unsigned long fsr; - int unmasked; - - __asm__(STX " %%fsr, %0" : "=m"(fsr)); - unmasked = ((fsr >> 23) & 0x1A) == 0x1A; - fsr &= ~(0x1FUL << 23); /* clear FSR[TEM] field */ - __asm__ __volatile__(LDX " %0, %%fsr" : : "m"(fsr)); - return unmasked; -} - -#elif (defined(__powerpc__) && defined(__linux__)) || (defined(__ppc__) && defined(__DARWIN__)) - -#if defined(__linux__) -#include <sys/prctl.h> - -static void set_fpexc_precise(void) -{ - if (prctl(PR_SET_FPEXC, PR_FP_EXC_PRECISE) < 0) { - perror("PR_SET_FPEXC"); - exit(1); - } -} - -#elif defined(__DARWIN__) - -#include <mach/mach.h> -#include <pthread.h> - -/* - * FE0 FE1 MSR bits - * 0 0 floating-point exceptions disabled - * 0 1 floating-point imprecise nonrecoverable - * 1 0 floating-point imprecise recoverable - * 1 1 floating-point precise mode - * - * Apparently: - * - Darwin 5.5 (MacOS X <= 10.1) starts with FE0 == FE1 == 0, - * and resets FE0 and FE1 to 0 after each SIGFPE. - * - Darwin 6.0 (MacOS X 10.2) starts with FE0 == FE1 == 1, - * and does not reset FE0 or FE1 after a SIGFPE. - */ -#define FE0_MASK (1<<11) -#define FE1_MASK (1<<8) - -/* a thread cannot get or set its own MSR bits */ -static void *fpu_fpe_enable(void *arg) -{ - thread_t t = *(thread_t*)arg; - struct ppc_thread_state state; - unsigned int state_size = PPC_THREAD_STATE_COUNT; - - if (thread_get_state(t, PPC_THREAD_STATE, (natural_t*)&state, &state_size) != KERN_SUCCESS) { - perror("thread_get_state"); - exit(1); - } - if ((state.srr1 & (FE1_MASK|FE0_MASK)) != (FE1_MASK|FE0_MASK)) { -#if 1 - /* This would also have to be performed in the SIGFPE handler - to work around the MSR reset older Darwin releases do. */ - state.srr1 |= (FE1_MASK|FE0_MASK); - thread_set_state(t, PPC_THREAD_STATE, (natural_t*)&state, state_size); -#else - fprintf(stderr, "srr1 == 0x%08x, your Darwin is too old\n", state.srr1); - exit(1); -#endif - } - return NULL; /* Ok, we appear to be on Darwin 6.0 or later */ -} - -static void set_fpexc_precise(void) -{ - thread_t self = mach_thread_self(); - pthread_t enabler; - - if (pthread_create(&enabler, NULL, fpu_fpe_enable, &self)) { - perror("pthread_create"); - } else if (pthread_join(enabler, NULL)) { - perror("pthread_join"); - } -} - -#endif - -static void set_fpscr(unsigned int fpscr) -{ - union { - double d; - unsigned int fpscr[2]; - } u; - - u.fpscr[0] = 0xFFF80000; - u.fpscr[1] = fpscr; - __asm__ __volatile__("mtfsf 255,%0" : : "f"(u.d)); -} - -static unsigned int get_fpscr(void) -{ - union { - double d; - unsigned int fpscr[2]; - } u; - - __asm__("mffs %0" : "=f"(u.d)); - return u.fpscr[1]; -} - -static void unmask_fpe(void) -{ - set_fpexc_precise(); - set_fpscr(0x80|0x40|0x10); /* VE, OE, ZE; not UE or XE */ -} - -static void unmask_fpe_conditional(int unmasked) -{ - if (unmasked) - unmask_fpe(); -} - -/* mask PowerPC FPE, return true if the previous state was unmasked */ -static int mask_fpe(void) -{ - int unmasked; - - unmasked = (get_fpscr() & (0x80|0x40|0x10)) == (0x80|0x40|0x10); - set_fpscr(0x00); - return unmasked; -} - -#else - -static void unmask_fpe(void) -{ - fpsetmask(FP_X_INV | FP_X_OFL | FP_X_DZ); -} - -static void unmask_fpe_conditional(int unmasked) -{ - if (unmasked) - unmask_fpe(); -} - -/* mask IEEE FPE, return true if previous state was unmasked */ -static int mask_fpe(void) -{ - const fp_except unmasked_mask = FP_X_INV | FP_X_OFL | FP_X_DZ; - fp_except old_mask; - - old_mask = fpsetmask(0); - return (old_mask & unmasked_mask) == unmasked_mask; -} - -#endif - -#if (defined(__linux__) && (defined(__i386__) || defined(__x86_64__) || defined(__sparc__) || defined(__powerpc__))) || (defined(__DARWIN__) && (defined(__i386__) || defined(__x86_64__) || defined(__ppc__))) || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__))) || ((defined(__NetBSD__) || defined(__OpenBSD__)) && defined(__x86_64__)) || (defined(__sun__) && defined(__x86_64__)) - -#if defined(__linux__) && defined(__i386__) -#if !defined(X86_FXSR_MAGIC) -#define X86_FXSR_MAGIC 0x0000 -#endif -#elif defined(__FreeBSD__) && defined(__x86_64__) -#include <sys/types.h> -#include <machine/fpu.h> -#elif defined(__FreeBSD__) && defined(__i386__) -#include <sys/types.h> -#include <machine/npx.h> -#elif defined(__DARWIN__) -#include <machine/signal.h> -#elif defined(__OpenBSD__) && defined(__x86_64__) -#include <sys/types.h> -#include <machine/fpu.h> -#endif -#if !(defined(__OpenBSD__) && defined(__x86_64__)) -#include <ucontext.h> -#endif -#include <string.h> - -#if defined(__linux__) && defined(__x86_64__) -#define mc_pc(mc) ((mc)->gregs[REG_RIP]) -#elif defined(__linux__) && defined(__i386__) -#define mc_pc(mc) ((mc)->gregs[REG_EIP]) -#elif defined(__DARWIN__) && defined(__i386__) -#ifdef DARWIN_MODERN_MCONTEXT -#define mc_pc(mc) ((mc)->__ss.__eip) -#else -#define mc_pc(mc) ((mc)->ss.eip) -#endif -#elif defined(__DARWIN__) && defined(__x86_64__) -#ifdef DARWIN_MODERN_MCONTEXT -#define mc_pc(mc) ((mc)->__ss.__rip) -#else -#define mc_pc(mc) ((mc)->ss.rip) -#endif -#elif defined(__FreeBSD__) && defined(__x86_64__) -#define mc_pc(mc) ((mc)->mc_rip) -#elif defined(__FreeBSD__) && defined(__i386__) -#define mc_pc(mc) ((mc)->mc_eip) -#elif defined(__NetBSD__) && defined(__x86_64__) -#define mc_pc(mc) ((mc)->__gregs[_REG_RIP]) -#elif defined(__NetBSD__) && defined(__i386__) -#define mc_pc(mc) ((mc)->__gregs[_REG_EIP]) -#elif defined(__OpenBSD__) && defined(__x86_64__) -#define mc_pc(mc) ((mc)->sc_rip) -#elif defined(__sun__) && defined(__x86_64__) -#define mc_pc(mc) ((mc)->gregs[REG_RIP]) -#endif - -static void fpe_sig_action(int sig, siginfo_t *si, void *puc) -{ - ucontext_t *uc = puc; - unsigned long pc; - -#if defined(__linux__) -#if defined(__x86_64__) - mcontext_t *mc = &uc->uc_mcontext; - fpregset_t fpstate = mc->fpregs; - pc = mc_pc(mc); - /* A failed SSE2 instruction will restart. To avoid - looping we mask SSE2 exceptions now and unmask them - again later in erts_check_fpe()/erts_restore_fpu(). - On RISCs we update PC to skip the failed instruction, - but the ever increasing complexity of the x86 instruction - set encoding makes that a poor solution here. */ - fpstate->mxcsr = 0x1F80; - fpstate->swd &= ~0xFF; -#elif defined(__i386__) - mcontext_t *mc = &uc->uc_mcontext; - fpregset_t fpstate = mc->fpregs; - pc = mc_pc(mc); - if ((fpstate->status >> 16) == X86_FXSR_MAGIC) - ((struct _fpstate*)fpstate)->mxcsr = 0x1F80; - fpstate->sw &= ~0xFF; -#elif defined(__sparc__) && defined(__arch64__) - /* on SPARC the 3rd parameter points to a sigcontext not a ucontext */ - struct sigcontext *sc = (struct sigcontext*)puc; - pc = sc->sigc_regs.tpc; - sc->sigc_regs.tpc = sc->sigc_regs.tnpc; - sc->sigc_regs.tnpc += 4; -#elif defined(__sparc__) - /* on SPARC the 3rd parameter points to a sigcontext not a ucontext */ - struct sigcontext *sc = (struct sigcontext*)puc; - pc = sc->si_regs.pc; - sc->si_regs.pc = sc->si_regs.npc; - sc->si_regs.npc = (unsigned long)sc->si_regs.npc + 4; -#elif defined(__powerpc__) -#if defined(__powerpc64__) - mcontext_t *mc = &uc->uc_mcontext; - unsigned long *regs = &mc->gp_regs[0]; -#else - mcontext_t *mc = uc->uc_mcontext.uc_regs; - unsigned long *regs = &mc->gregs[0]; -#endif - pc = regs[PT_NIP]; - regs[PT_NIP] += 4; - regs[PT_FPSCR] = 0x80|0x40|0x10; /* VE, OE, ZE; not UE or XE */ -#endif -#elif defined(__DARWIN__) && (defined(__i386__) || defined(__x86_64__)) -#ifdef DARWIN_MODERN_MCONTEXT - mcontext_t mc = uc->uc_mcontext; - pc = mc_pc(mc); - mc->__fs.__fpu_mxcsr = 0x1F80; - *(unsigned short *)&mc->__fs.__fpu_fsw &= ~0xFF; -#else - mcontext_t mc = uc->uc_mcontext; - pc = mc_pc(mc); - mc->fs.fpu_mxcsr = 0x1F80; - *(unsigned short *)&mc->fs.fpu_fsw &= ~0xFF; -#endif /* DARWIN_MODERN_MCONTEXT */ -#elif defined(__DARWIN__) && defined(__ppc__) - mcontext_t mc = uc->uc_mcontext; - pc = mc->ss.srr0; - mc->ss.srr0 += 4; - mc->fs.fpscr = 0x80|0x40|0x10; -#elif defined(__FreeBSD__) && defined(__x86_64__) - mcontext_t *mc = &uc->uc_mcontext; - struct savefpu *savefpu = (struct savefpu*)&mc->mc_fpstate; - struct envxmm *envxmm = &savefpu->sv_env; - pc = mc_pc(mc); - envxmm->en_mxcsr = 0x1F80; - envxmm->en_sw &= ~0xFF; -#elif defined(__FreeBSD__) && defined(__i386__) - mcontext_t *mc = &uc->uc_mcontext; - union savefpu *savefpu = (union savefpu*)&mc->mc_fpstate; - pc = mc_pc(mc); - if (mc->mc_fpformat == _MC_FPFMT_XMM) { - struct envxmm *envxmm = &savefpu->sv_xmm.sv_env; - envxmm->en_mxcsr = 0x1F80; - envxmm->en_sw &= ~0xFF; - } else { - struct env87 *env87 = &savefpu->sv_87.sv_env; - env87->en_sw &= ~0xFF; - } -#elif defined(__NetBSD__) && defined(__x86_64__) - mcontext_t *mc = &uc->uc_mcontext; - struct fxsave64 *fxsave = (struct fxsave64 *)&mc->__fpregs; - pc = mc_pc(mc); - fxsave->fx_mxcsr = 0x1F80; - fxsave->fx_fsw &= ~0xFF; -#elif defined(__NetBSD__) && defined(__i386__) - mcontext_t *mc = &uc->uc_mcontext; - pc = mc_pc(mc); - if (uc->uc_flags & _UC_FXSAVE) { - struct envxmm *envxmm = (struct envxmm *)&mc->__fpregs; - envxmm->en_mxcsr = 0x1F80; - envxmm->en_sw &= ~0xFF; - } else { - struct env87 *env87 = (struct env87 *)&mc->__fpregs; - env87->en_sw &= ~0xFF; - } -#elif defined(__OpenBSD__) && defined(__x86_64__) - struct fxsave64 *fxsave = uc->sc_fpstate; - pc = mc_pc(uc); - fxsave->fx_mxcsr = 0x1F80; - fxsave->fx_fsw &= ~0xFF; -#elif defined(__sun__) && defined(__x86_64__) - mcontext_t *mc = &uc->uc_mcontext; - struct fpchip_state *fpstate = &mc->fpregs.fp_reg_set.fpchip_state; - pc = mc_pc(mc); - fpstate->mxcsr = 0x1F80; - fpstate->sw &= ~0xFF; -#endif -#if 0 - { - char buf[64]; - snprintf(buf, sizeof buf, "%s: FPE at %p\r\n", __FUNCTION__, (void*)pc); - write(2, buf, strlen(buf)); - } -#endif - set_current_fp_exception(pc); -} - -static void erts_thread_catch_fp_exceptions(void) -{ - struct sigaction act; - memset(&act, 0, sizeof act); - act.sa_sigaction = fpe_sig_action; - act.sa_flags = SA_SIGINFO; - sigaction(SIGFPE, &act, NULL); - unmask_fpe(); -} - -#else /* !((__linux__ && (__i386__ || __x86_64__ || __powerpc__)) || (__DARWIN__ && (__i386__ || __x86_64__ || __ppc__))) */ - -static void fpe_sig_handler(int sig) -{ - set_current_fp_exception(1); /* XXX: convert to sigaction so we can get the trap PC */ -} - -static void erts_thread_catch_fp_exceptions(void) -{ - sys_sigset(SIGFPE, fpe_sig_handler); - unmask_fpe(); -} - -#endif /* (__linux__ && (__i386__ || __x86_64__ || __powerpc__)) || (__DARWIN__ && (__i386__ || __x86_64__ || __ppc__))) */ - -/* once-only initialisation early in the main thread */ -void erts_sys_init_float(void) -{ - erts_init_fp_exception(); - erts_thread_catch_fp_exceptions(); - erts_printf_block_fpe = erts_sys_block_fpe; - erts_printf_unblock_fpe = erts_sys_unblock_fpe; -} - -#endif /* NO_FPE_SIGNALS */ - -void erts_thread_init_float(void) -{ -#ifdef ERTS_SMP - /* This allows Erlang schedulers to leave Erlang-process context - and still have working FP exceptions. XXX: is this needed? */ - erts_thread_init_fp_exception(); -#endif - -#ifndef NO_FPE_SIGNALS - /* NOTE: - * erts_thread_disable_fpe() is called in all threads at - * creation. We at least need to call unmask_fpe() - */ -#if defined(__DARWIN__) || defined(__FreeBSD__) - /* Darwin (7.9.0) does not appear to propagate FP exception settings - to a new thread from its parent. So if we want FP exceptions, we - must manually re-enable them in each new thread. - FreeBSD 6.1 appears to suffer from a similar issue. */ - erts_thread_catch_fp_exceptions(); -#else - unmask_fpe(); -#endif - -#endif -} - -void erts_thread_disable_fpe(void) -{ -#if !defined(NO_FPE_SIGNALS) - (void)mask_fpe(); -#endif -} - -#if !defined(NO_FPE_SIGNALS) -int erts_sys_block_fpe(void) -{ - return mask_fpe(); -} - -void erts_sys_unblock_fpe(int unmasked) -{ - unmask_fpe_conditional(unmasked); -} -#endif - -/* The following check is incorporated from the Vee machine */ - -#define ISDIGIT(d) ((d) >= '0' && (d) <= '9') - -/* - ** Convert a double to ascii format 0.dddde[+|-]ddd - ** return number of characters converted or -1 if error. - ** - ** These two functions should maybe use localeconv() to pick up - ** the current radix character, but since it is uncertain how - ** expensive such a system call is, and since no-one has heard - ** of other radix characters than '.' and ',' an ad-hoc - ** low execution time solution is used instead. - */ - -int -sys_double_to_chars_ext(double fp, char *buffer, size_t buffer_size, size_t decimals) -{ - char *s = buffer; - - if (erts_snprintf(buffer, buffer_size, "%.*e", decimals, fp) >= buffer_size) - return -1; - /* Search upto decimal point */ - if (*s == '+' || *s == '-') s++; - while (ISDIGIT(*s)) s++; - if (*s == ',') *s++ = '.'; /* Replace ',' with '.' */ - /* Scan to end of string */ - while (*s) s++; - return s-buffer; /* i.e strlen(buffer) */ -} - -/* Float conversion */ - -int -sys_chars_to_double(char* buf, double* fp) -{ -#ifndef NO_FPE_SIGNALS - volatile unsigned long *fpexnp = erts_get_current_fp_exception(); -#endif - char *s = buf, *t, *dp; - - /* Robert says that something like this is what he really wanted: - * (The [.,] radix test is NOT what Robert wanted - it was added later) - * - * 7 == sscanf(Tbuf, "%[+-]%[0-9][.,]%[0-9]%[eE]%[+-]%[0-9]%s", ....); - * if (*s2 == 0 || *s3 == 0 || *s4 == 0 || *s6 == 0 || *s7) - * break; - */ - - /* Scan string to check syntax. */ - if (*s == '+' || *s == '-') s++; - if (!ISDIGIT(*s)) /* Leading digits. */ - return -1; - while (ISDIGIT(*s)) s++; - if (*s != '.' && *s != ',') /* Decimal part. */ - return -1; - dp = s++; /* Remember decimal point pos just in case */ - if (!ISDIGIT(*s)) - return -1; - while (ISDIGIT(*s)) s++; - if (*s == 'e' || *s == 'E') { - /* There is an exponent. */ - s++; - if (*s == '+' || *s == '-') s++; - if (!ISDIGIT(*s)) - return -1; - while (ISDIGIT(*s)) s++; - } - if (*s) /* That should be it */ - return -1; - -#ifdef NO_FPE_SIGNALS - errno = 0; -#endif - __ERTS_FP_CHECK_INIT(fpexnp); - *fp = strtod(buf, &t); - __ERTS_FP_ERROR_THOROUGH(fpexnp, *fp, return -1); - if (t != s) { /* Whole string not scanned */ - /* Try again with other radix char */ - *dp = (*dp == '.') ? ',' : '.'; - errno = 0; - __ERTS_FP_CHECK_INIT(fpexnp); - *fp = strtod(buf, &t); - __ERTS_FP_ERROR_THOROUGH(fpexnp, *fp, return -1); - } - -#ifdef NO_FPE_SIGNALS - if (errno == ERANGE) { - if (*fp == HUGE_VAL || *fp == -HUGE_VAL) { - /* overflow, should give error */ - return -1; - } else if (t == s && *fp == 0.0) { - /* This should give 0.0 - OTP-7178 */ - errno = 0; - - } else if (*fp == 0.0) { - return -1; - } - } -#endif - return 0; -} - -int -matherr(struct exception *exc) -{ -#if !defined(NO_FPE_SIGNALS) - volatile unsigned long *fpexnp = erts_get_current_fp_exception(); - if (fpexnp != NULL) - *fpexnp = (unsigned long)__builtin_return_address(0); -#endif - return 1; -} diff --git a/erts/emulator/sys/ose/sys_time.c b/erts/emulator/sys/ose/sys_time.c deleted file mode 100644 index 5dac75956a..0000000000 --- a/erts/emulator/sys/ose/sys_time.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 2005-2009. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - */ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include "sys.h" -#include "global.h" - -/******************* Routines for time measurement *********************/ - -int erts_ticks_per_sec = 0; /* Will be SYS_CLK_TCK in erl_unix_sys.h */ - -int sys_init_time(void) -{ - return SYS_CLOCK_RESOLUTION; -} - -clock_t sys_times(SysTimes *now) { - now->tms_utime = now->tms_stime = now->tms_cutime = now->tms_cstime = 0; - return 0; -} - -static OSTICK last_tick_count = 0; -static SysHrTime wrap = 0; -static OSTICK us_per_tick; - -void sys_init_hrtime() { - us_per_tick = system_tick(); -} - -SysHrTime sys_gethrtime() { - OSTICK ticks = get_ticks(); - if (ticks < (SysHrTime) last_tick_count) { - wrap += 1ULL << 32; - } - last_tick_count = ticks; - return ((((SysHrTime) ticks) + wrap) * 1000*us_per_tick); -} diff --git a/erts/emulator/sys/unix/erl_child_setup.c b/erts/emulator/sys/unix/erl_child_setup.c index a3c5c20641..4e61530cf1 100644 --- a/erts/emulator/sys/unix/erl_child_setup.c +++ b/erts/emulator/sys/unix/erl_child_setup.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2002-2009. All Rights Reserved. + * Copyright Ericsson AB 2002-2015. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,109 +19,233 @@ */ /* - * After a vfork() (or fork()) the child exec()s to this program which - * sets up the child and exec()s to the user program (see spawn_start() - * in sys.c and ticket OTP-4389). + * This program is started at erts startup and all fork's that + * have to be done are done in here. This is done for a couple + * of reasons: + * - Allow usage of fork without a memory explosion. + * -- we do not want to use vfork, as it blocks the VM + * until the execv is done, and if the program that + * is to be executed is on an NFS that is unavailable, + * the execv can block for a very long time. + * -- we cannot do fork inside the VM as that would temporarily + * duplicate the memory usage of the VM per parallel exec. + * + * Some implementation notes: + * - A single Unix Domain Socket is setup in between the VM and + * this program. Over that UDS the file descriptors that should + * be used to talk to the child program are sent. + * The actual command to execute, together with options and the + * environment, is sent over the pipe represented by the + * file descriptors mentioned above. We don't send the + * command over the UDS as that would increase the likely hood + * that it's buffer would be full. + * + * - Since it is this program that execv's, it has to take care of + * all the SIGCHLD signals that the child programs generate. The + * signals are received and the pid+exit reason is sent as data + * on the UDS to the VM. The VM is then able to map the pid to the + * port of the child program that just exited and deliver the status + * code if requested. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif -#define NEED_CHILD_SETUP_DEFINES -#include "sys.h" -#include "erl_misc_utils.h" +#include <stdlib.h> +#include <stdio.h> +#include <sys/wait.h> -#ifdef SIG_SIGSET /* Old SysV */ -void sys_sigrelease(int sig) +#define WANT_NONBLOCKING + +#include "erl_driver.h" +#include "sys_uds.h" +#include "hash.h" +#include "erl_child_setup.h" + +#define SET_CLOEXEC(fd) fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC) + +#if defined(__ANDROID__) +#define SHELL "/system/bin/sh" +#else +#define SHELL "/bin/sh" +#endif /* __ANDROID__ */ + +//#define HARD_DEBUG +#ifdef HARD_DEBUG +#define DEBUG_PRINT(fmt, ...) fprintf(stderr, fmt "\r\n", ##__VA_ARGS__) +#else +#define DEBUG_PRINT(fmt, ...) +#endif + +#define ABORT(fmt, ...) do { \ + fprintf(stderr, "erl_child_setup: " fmt "\r\n", ##__VA_ARGS__); \ + abort(); \ + } while(0) + +#ifdef DEBUG +void +erl_assert_error(const char* expr, const char* func, const char* file, int line) { - sigrelse(sig); + fflush(stdout); + fprintf(stderr, "%s:%d:%s() Assertion failed: %s\n", + file, line, func, expr); + fflush(stderr); + abort(); } -#else /* !SIG_SIGSET */ -#ifdef SIG_SIGNAL /* Old BSD */ -sys_sigrelease(int sig) +#endif + +void sys_sigblock(int sig) { - sigsetmask(sigblock(0) & ~sigmask(sig)); + sigset_t mask; + + sigemptyset(&mask); + sigaddset(&mask, sig); + sigprocmask(SIG_BLOCK, &mask, (sigset_t *)NULL); } -#else /* !SIG_SIGNAL */ /* The True Way - POSIX!:-) */ + void sys_sigrelease(int sig) { sigset_t mask; - sigemptyset(&mask); sigaddset(&mask, sig); sigprocmask(SIG_UNBLOCK, &mask, (sigset_t *)NULL); } -#endif /* !SIG_SIGNAL */ -#endif /* !SIG_SIGSET */ - -#if defined(__ANDROID__) -static int system_properties_fd(void); -#endif /* __ANDROID__ */ -#if defined(__ANDROID__) -#define SHELL "/system/bin/sh" -#else -#define SHELL "/bin/sh" -#endif /* __ANDROID__ */ +static void add_os_pid_to_port_id_mapping(Eterm, pid_t); +static Eterm get_port_id(pid_t); +static int forker_hash_init(void); +static int max_files = -1; +static int sigchld_pipe[2]; -int -main(int argc, char *argv[]) +static int +start_new_child(int pipes[]) { - int i, from, to; - int erts_spawn_executable = 0; + int size, res, i, pos = 0; + char *buff, *o_buff; + + char *cmd, *wd, **new_environ, **args = NULL; + + Sint cnt, flags; - /* OBSERVE! - * Keep child setup after fork() (implemented in sys.c) up to date - * if changes are made here. - */ + /* only child executes here */ - if (argc != CS_ARGV_NO_OF_ARGS) { - if (argc < CS_ARGV_NO_OF_ARGS) { - return 1; - } else { - erts_spawn_executable = 1; - } + do { + res = read(pipes[0], (char*)&size, sizeof(size)); + } while(res < 0 && (errno == EINTR || errno == ERRNO_BLOCK)); + + if (res <= 0) { + goto child_error; } - if (strcmp("false", argv[CS_ARGV_UNBIND_IX]) != 0) - if (erts_unbind_from_cpu_str(argv[CS_ARGV_UNBIND_IX]) != 0) - return 1; + buff = malloc(size); + + DEBUG_PRINT("size = %d", size); + + do { + if ((res = read(pipes[0], buff + pos, size - pos)) < 0) { + if (errno == ERRNO_BLOCK || errno == EINTR) + continue; + goto child_error; + } + if (res == 0) { + errno = EPIPE; + goto child_error; + } + pos += res; + } while(size - pos != 0); + + o_buff = buff; + + flags = get_int32(buff); + buff += sizeof(Sint32); - for (i = 0; i < CS_ARGV_NO_OF_DUP2_OPS; i++) { - if (argv[CS_ARGV_DUP2_OP_IX(i)][0] == '-' - && argv[CS_ARGV_DUP2_OP_IX(i)][1] == '\0') - break; - if (sscanf(argv[CS_ARGV_DUP2_OP_IX(i)], "%d:%d", &from, &to) != 2) - return 1; - if (dup2(from, to) < 0) - return 1; + DEBUG_PRINT("flags = %d", flags); + + cmd = buff; + buff += strlen(buff) + 1; + if (*buff == '\0') { + wd = NULL; + } else { + wd = buff; + buff += strlen(buff) + 1; } + buff++; - if (sscanf(argv[CS_ARGV_FD_CR_IX], "%d:%d", &from, &to) != 2) - return 1; + DEBUG_PRINT("wd = %s", wd); -#if defined(HAVE_CLOSEFROM) - closefrom(from); -#elif defined(__ANDROID__) - if (from <= to) { - int spfd = system_properties_fd(); - for (i = from; i <= to; i++) { - if (i != spfd) { - (void) close(i); - } - } + cnt = get_int32(buff); + buff += sizeof(Sint32); + new_environ = malloc(sizeof(char*)*(cnt + 1)); + + DEBUG_PRINT("env_len = %ld", cnt); + for (i = 0; i < cnt; i++, buff++) { + new_environ[i] = buff; + while(*buff != '\0') buff++; } -#else /* !__ANDROID__ */ - for (i = from; i <= to; i++) { - (void) close(i); + new_environ[cnt] = NULL; + + if (o_buff + size != buff) { + /* This is a spawn executable call */ + cnt = get_int32(buff); + buff += sizeof(Sint32); + args = malloc(sizeof(char*)*(cnt + 1)); + for (i = 0; i < cnt; i++, buff++) { + args[i] = buff; + while(*buff != '\0') buff++; + } + args[cnt] = NULL; } -#endif /* HAVE_CLOSEFROM */ - if (!(argv[CS_ARGV_WD_IX][0] == '.' && argv[CS_ARGV_WD_IX][1] == '\0') - && chdir(argv[CS_ARGV_WD_IX]) < 0) - return 1; + if (o_buff + size != buff) { + errno = EINVAL; + goto child_error; + } + + DEBUG_PRINT("read ack"); + do { + ErtsSysForkerProto proto; + res = read(pipes[0], &proto, sizeof(proto)); + if (res > 0) { + ASSERT(proto.action == ErtsSysForkerProtoAction_Ack); + ASSERT(res == sizeof(proto)); + } + } while(res < 0 && (errno == EINTR || errno == ERRNO_BLOCK)); + if (res < 1) { + errno = EPIPE; + goto child_error; + } + + DEBUG_PRINT("Do that forking business: '%s'\n",cmd); + + /* When the dup2'ing below is done, only + fd's 0, 1, 2 and maybe 3, 4 should survive the + exec. All other fds (i.e. the unix domain sockets + and stray pipe ends) should have CLOEXEC set on them + so they will be closed when the exec happens */ + if (flags & FORKER_FLAG_USE_STDIO) { + /* stdin for process */ + if (flags & FORKER_FLAG_DO_WRITE && + dup2(pipes[0], 0) < 0) + goto child_error; + /* stdout for process */ + if (flags & FORKER_FLAG_DO_READ && + dup2(pipes[1], 1) < 0) + goto child_error; + } + else { /* XXX will fail if pipes[0] == 4 (unlikely..) */ + if (flags & FORKER_FLAG_DO_READ && dup2(pipes[1], 4) < 0) + goto child_error; + if (flags & FORKER_FLAG_DO_WRITE && dup2(pipes[0], 3) < 0) + goto child_error; + } + + if (dup2(pipes[2], 2) < 0) + goto child_error; + + if (wd && chdir(wd) < 0) + goto child_error; #if defined(USE_SETPGRP_NOARGS) /* SysV */ (void) setpgrp(); @@ -131,34 +255,301 @@ main(int argc, char *argv[]) (void) setsid(); #endif + close(pipes[0]); + close(pipes[1]); + close(pipes[2]); + sys_sigrelease(SIGCHLD); - sys_sigrelease(SIGINT); - sys_sigrelease(SIGUSR1); - - if (erts_spawn_executable) { - if (argv[CS_ARGV_NO_OF_ARGS + 1] == NULL) { - execl(argv[CS_ARGV_NO_OF_ARGS],argv[CS_ARGV_NO_OF_ARGS], - (char *) NULL); - } else { - execv(argv[CS_ARGV_NO_OF_ARGS],&(argv[CS_ARGV_NO_OF_ARGS + 1])); - } + + if (args) { + /* spawn_executable */ + execve(cmd, args, new_environ); } else { - execl(SHELL, "sh", "-c", argv[CS_ARGV_CMD_IX], (char *) NULL); + execle(SHELL, "sh", "-c", cmd, (char *) NULL, new_environ); } - return 1; +child_error: + DEBUG_PRINT("exec error: %d\r\n",errno); + _exit(128 + errno); +} + + +/* + * [OTP-3906] + * Solaris signal management gets confused when threads are used and a + * lot of child processes dies. The confusion results in that SIGCHLD + * signals aren't delivered to the emulator which in turn results in + * a lot of defunct processes in the system. + * + * The problem seems to appear when a signal is frequently + * blocked/unblocked at the same time as the signal is frequently + * propagated. The child waiter thread is a workaround for this problem. + * The SIGCHLD signal is always blocked (in all threads), and the child + * waiter thread fetches the signal by a call to sigwait(). See + * child_waiter(). + * + * This should be a non-issue since the fork:ing was moved outside of + * the emulator into erl_child_setup. I'm leaving the comment here + * for posterity. */ + +static void handle_sigchld(int sig) { + int buff[2], res; + + sys_sigblock(SIGCHLD); + + while ((buff[0] = waitpid((pid_t)(-1), buff+1, WNOHANG)) > 0) { + do { + res = write(sigchld_pipe[1], buff, sizeof(buff)); + } while (res < 0 && errno == EINTR); + if (res <= 0) + ABORT("Failed to write to sigchld_pipe (%d): %d (%d)", sigchld_pipe[1], res, errno); + DEBUG_PRINT("Reap child %d (%d)", buff[0], buff[1]); + } + + sys_sigrelease(SIGCHLD); } #if defined(__ANDROID__) static int system_properties_fd(void) { - int fd; + static int fd = -2; char *env; + if (fd != -2) return fd; env = getenv("ANDROID_PROPERTY_WORKSPACE"); if (!env) { + fd = -1; return -1; } fd = atoi(env); return fd; } #endif /* __ANDROID__ */ + +int +main(int argc, char *argv[]) +{ + /* This fd should be open from beam */ + int uds_fd = 3, max_fd = 3; +#ifndef HAVE_CLOSEFROM + int i; +#endif + struct sigaction sa; + + if (argc < 1 || sscanf(argv[1],"%d",&max_files) != 1) { + ABORT("Invalid arguments to child_setup"); + } + +/* We close all fds except the uds from beam. + All other fds from now on will have the + CLOEXEC flags set on them. This means that we + only have to close a very limited number of fds + after we fork before the exec. */ +#if defined(HAVE_CLOSEFROM) + closefrom(4); +#else + for (i = 4; i < max_files; i++) +#if defined(__ANDROID__) + if (i != system_properties_fd()) +#endif + (void) close(i); +#endif + + if (pipe(sigchld_pipe) < 0) { + ABORT("Failed to setup sigchld pipe (%d)", errno); + } + + SET_CLOEXEC(sigchld_pipe[0]); + SET_CLOEXEC(sigchld_pipe[1]); + + max_fd = max_fd < sigchld_pipe[0] ? sigchld_pipe[0] : max_fd; + + sa.sa_handler = &handle_sigchld; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART | SA_NOCLDSTOP; + if (sigaction(SIGCHLD, &sa, 0) == -1) { + perror(0); + exit(1); + } + + forker_hash_init(); + + SET_CLOEXEC(uds_fd); + + DEBUG_PRINT("Starting forker %d", max_files); + + while (1) { + fd_set read_fds; + int res; + FD_ZERO(&read_fds); + FD_SET(uds_fd, &read_fds); + FD_SET(sigchld_pipe[0], &read_fds); + DEBUG_PRINT("child_setup selecting on %d, %d (%d)", + uds_fd, sigchld_pipe[0], max_fd); + res = select(max_fd+1, &read_fds, NULL, NULL, NULL); + + if (res < 0) { + if (errno == EINTR) continue; + ABORT("Select failed: %d (%d)",res, errno); + } + + if (FD_ISSET(uds_fd, &read_fds)) { + int pipes[3], res, os_pid; + ErtsSysForkerProto proto; + errno = 0; + if ((res = sys_uds_read(uds_fd, (char*)&proto, sizeof(proto), + pipes, 3, MSG_DONTWAIT)) < 0) { + if (errno == EINTR) + continue; + DEBUG_PRINT("erl_child_setup failed to read from uds: %d, %d", res, errno); + _exit(0); + } + + if (res == 0) { + DEBUG_PRINT("uds was closed!"); + _exit(0); + } + /* Since we use unix domain sockets and send the entire data in + one go we *should* get the entire payload at once. */ + ASSERT(res == sizeof(proto)); + ASSERT(proto.action == ErtsSysForkerProtoAction_Start); + + sys_sigblock(SIGCHLD); + + errno = 0; + + os_pid = fork(); + if (os_pid == 0) + start_new_child(pipes); + + add_os_pid_to_port_id_mapping(proto.u.start.port_id, os_pid); + + /* We write an ack here, but expect the reply on + the pipes[0] inside the fork */ + proto.action = ErtsSysForkerProtoAction_Go; + proto.u.go.os_pid = os_pid; + proto.u.go.error_number = errno; + while (write(pipes[1], &proto, sizeof(proto)) < 0 && errno == EINTR) + ; /* remove gcc warning */ + +#ifdef FORKER_PROTO_START_ACK + proto.action = ErtsSysForkerProtoAction_StartAck; + while (write(uds_fd, &proto, sizeof(proto)) < 0 && errno == EINTR) + ; /* remove gcc warning */ +#endif + + sys_sigrelease(SIGCHLD); + close(pipes[0]); + close(pipes[1]); + close(pipes[2]); + } + + if (FD_ISSET(sigchld_pipe[0], &read_fds)) { + int ibuff[2]; + ErtsSysForkerProto proto; + res = read(sigchld_pipe[0], ibuff, sizeof(ibuff)); + if (res <= 0) { + if (errno == EINTR) + continue; + ABORT("Failed to read from sigchld pipe: %d (%d)", res, errno); + } + + proto.u.sigchld.port_id = get_port_id((pid_t)(ibuff[0])); + + if (proto.u.sigchld.port_id == THE_NON_VALUE) + continue; /* exit status report not requested */ + + proto.action = ErtsSysForkerProtoAction_SigChld; + proto.u.sigchld.error_number = ibuff[1]; + DEBUG_PRINT("send %s to %d", buff, uds_fd); + if (write(uds_fd, &proto, sizeof(proto)) < 0) { + if (errno == EINTR) + continue; + /* The uds was close, which most likely means that the VM + has exited. This will be detected when we try to read + from the uds_fd. */ + DEBUG_PRINT("Failed to write to uds: %d (%d)", uds_fd, errno); + } + } + } + return 1; +} + +typedef struct exit_status { + HashBucket hb; + pid_t os_pid; + Eterm port_id; +} ErtsSysExitStatus; + +static Hash *forker_hash; + +static void add_os_pid_to_port_id_mapping(Eterm port_id, pid_t os_pid) +{ + if (port_id != THE_NON_VALUE) { + /* exit status report requested */ + ErtsSysExitStatus es; + es.os_pid = os_pid; + es.port_id = port_id; + hash_put(forker_hash, &es); + } +} + +static Eterm get_port_id(pid_t os_pid) +{ + ErtsSysExitStatus est, *es; + Eterm port_id; + est.os_pid = os_pid; + es = hash_remove(forker_hash, &est); + if (!es) return THE_NON_VALUE; + port_id = es->port_id; + free(es); + return port_id; +} + +static int fcmp(void *a, void *b) +{ + ErtsSysExitStatus *sa = a; + ErtsSysExitStatus *sb = b; + return !(sa->os_pid == sb->os_pid); +} + +static HashValue fhash(void *e) +{ + ErtsSysExitStatus *se = e; + Uint32 val = se->os_pid; + val = (val+0x7ed55d16) + (val<<12); + val = (val^0xc761c23c) ^ (val>>19); + val = (val+0x165667b1) + (val<<5); + val = (val+0xd3a2646c) ^ (val<<9); + val = (val+0xfd7046c5) + (val<<3); + val = (val^0xb55a4f09) ^ (val>>16); + return val; +} + +static void *falloc(void *e) +{ + ErtsSysExitStatus *se = e; + ErtsSysExitStatus *ne = malloc(sizeof(ErtsSysExitStatus)); + ne->os_pid = se->os_pid; + ne->port_id = se->port_id; + return ne; +} + +static void *meta_alloc(int type, size_t size) { return malloc(size); } +static void meta_free(int type, void *p) { free(p); } + +static int forker_hash_init(void) +{ + HashFunctions forker_hash_functions; + forker_hash_functions.hash = fhash; + forker_hash_functions.cmp = fcmp; + forker_hash_functions.alloc = falloc; + forker_hash_functions.free = free; + forker_hash_functions.meta_alloc = meta_alloc; + forker_hash_functions.meta_free = meta_free; + forker_hash_functions.meta_print = NULL; + + forker_hash = hash_new(0, "forker_hash", + 16, forker_hash_functions); + + return 1; +} diff --git a/erts/emulator/sys/unix/erl_child_setup.h b/erts/emulator/sys/unix/erl_child_setup.h new file mode 100644 index 0000000000..a28b136bfc --- /dev/null +++ b/erts/emulator/sys/unix/erl_child_setup.h @@ -0,0 +1,77 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2015-2015. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + * + * This file defines the interface inbetween erts and child_setup. + */ + +#ifndef _ERL_UNIX_FORKER_H +#define _ERL_UNIX_FORKER_H + +#include "sys.h" + +#ifdef __FreeBSD__ +/* The freebsd sendmsg man page explicitly states that + you should not close fds before they are known + to have reached the other side, so this Ack protects + against that. */ +#define FORKER_PROTO_START_ACK 1 +#endif + +#define FORKER_ARGV_NO_OF_ARGS 3 +#define FORKER_ARGV_PROGNAME_IX 0 /* Program name */ +#define FORKER_ARGV_MAX_FILES 1 /* max_files */ + +#define FORKER_FLAG_USE_STDIO (1 << 0) /* dup the pipe to stdin/stderr */ +#define FORKER_FLAG_EXIT_STATUS (1 << 1) /* send the exit status to parent */ +#define FORKER_FLAG_DO_READ (1 << 2) /* dup write fd */ +#define FORKER_FLAG_DO_WRITE (1 << 3) /* dup read fd */ + +#if SIZEOF_VOID_P == SIZEOF_LONG +typedef unsigned long ErtsSysPortId; +#elif SIZEOF_VOID_P == SIZEOF_INT +typedef unsigned int ErtsSysPortId; +#elif SIZEOF_VOID_P == SIZEOF_LONG_LONG +typedef unsigned long long ErtsSysPortId; +#endif + +typedef struct ErtsSysForkerProto_ { + enum { + ErtsSysForkerProtoAction_Start, + ErtsSysForkerProtoAction_StartAck, + ErtsSysForkerProtoAction_Go, + ErtsSysForkerProtoAction_SigChld, + ErtsSysForkerProtoAction_Ack + } action; + union { + struct { + ErtsSysPortId port_id; + int fds[3]; + } start; + struct { + pid_t os_pid; + int error_number; + } go; + struct { + ErtsSysPortId port_id; + int error_number; + } sigchld; + } u; +} ErtsSysForkerProto; + +#endif /* #ifndef _ERL_UNIX_FORKER_H */ diff --git a/erts/emulator/sys/unix/erl_unix_sys.h b/erts/emulator/sys/unix/erl_unix_sys.h index 8d4e98bf3a..0352ee1b3c 100644 --- a/erts/emulator/sys/unix/erl_unix_sys.h +++ b/erts/emulator/sys/unix/erl_unix_sys.h @@ -30,9 +30,7 @@ #include <limits.h> #include <stdlib.h> #include <string.h> -#ifndef QNX #include <memory.h> -#endif #if defined(__sun__) && defined(__SVR4) && !defined(__EXTENSIONS__) # define __EXTENSIONS__ @@ -92,11 +90,6 @@ #include <ieeefp.h> #endif -#ifdef QNX -#include <process.h> -#include <sys/qnx_glob.h> -#endif - #include <pwd.h> #ifndef HZ @@ -136,13 +129,6 @@ # define ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT #endif -#ifndef ENABLE_CHILD_WAITER_THREAD -# ifdef ERTS_SMP -# define ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN -void erts_check_children(void); -# endif -#endif - typedef void *GETENV_STATE; /* @@ -310,7 +296,6 @@ typedef void (*SIGFUNC)(int); extern SIGFUNC sys_signal(int, SIGFUNC); extern void sys_sigrelease(int); extern void sys_sigblock(int); -extern void sys_stop_cat(void); /* * Handling of floating point exceptions. @@ -425,19 +410,6 @@ void erts_sys_unblock_fpe(int); #define ERTS_FP_ERROR_THOROUGH(p, f, A) __ERTS_FP_ERROR_THOROUGH(&(p)->fp_exception, f, A) -#ifdef NEED_CHILD_SETUP_DEFINES -/* The child setup argv[] */ -#define CS_ARGV_PROGNAME_IX 0 /* Program name */ -#define CS_ARGV_UNBIND_IX 1 /* Unbind from cpu */ -#define CS_ARGV_WD_IX 2 /* Working directory */ -#define CS_ARGV_CMD_IX 3 /* Command */ -#define CS_ARGV_FD_CR_IX 4 /* Fd close range */ -#define CS_ARGV_DUP2_OP_IX(N) ((N) + 5) /* dup2 operations */ - -#define CS_ARGV_NO_OF_DUP2_OPS 3 /* Number of dup2 ops */ -#define CS_ARGV_NO_OF_ARGS 8 /* Number of arguments */ -#endif /* #ifdef NEED_CHILD_SETUP_DEFINES */ - /* Threads */ #ifdef USE_THREADS extern int init_async(int); diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c index 8d7da3e47e..2ad5f3b4d5 100644 --- a/erts/emulator/sys/unix/sys.c +++ b/erts/emulator/sys/unix/sys.c @@ -49,7 +49,6 @@ #include <sys/ioctl.h> #endif -#define NEED_CHILD_SETUP_DEFINES #define ERTS_WANT_BREAK_HANDLING #define ERTS_WANT_GOT_SIGUSR1 #define WANT_NONBLOCKING /* must define this to pull in defs from sys.h */ @@ -67,7 +66,7 @@ #include "erl_mseg.h" extern char **environ; -static erts_smp_rwmtx_t environ_rwmtx; +erts_smp_rwmtx_t environ_rwmtx; #define MAX_VSIZE 16 /* Max number of entries allowed in an I/O * vector sock_sendv(). @@ -76,89 +75,12 @@ static erts_smp_rwmtx_t environ_rwmtx; * Don't need global.h, but bif_table.h (included by bif.h), * won't compile otherwise */ -#include "global.h" +#include "global.h" #include "bif.h" -#include "erl_sys_driver.h" #include "erl_check_io.h" #include "erl_cpu_topology.h" -#ifndef DISABLE_VFORK -#define DISABLE_VFORK 0 -#endif - -#if defined IOV_MAX -#define MAXIOV IOV_MAX -#elif defined UIO_MAXIOV -#define MAXIOV UIO_MAXIOV -#else -#define MAXIOV 16 -#endif - -#ifdef USE_THREADS -# ifdef ENABLE_CHILD_WAITER_THREAD -# define CHLDWTHR ENABLE_CHILD_WAITER_THREAD -# else -# define CHLDWTHR 0 -# endif -# define FDBLOCK 1 -#else -# define CHLDWTHR 0 -# define FDBLOCK 0 -#endif -/* - * [OTP-3906] - * Solaris signal management gets confused when threads are used and a - * lot of child processes dies. The confusion results in that SIGCHLD - * signals aren't delivered to the emulator which in turn results in - * a lot of defunct processes in the system. - * - * The problem seems to appear when a signal is frequently - * blocked/unblocked at the same time as the signal is frequently - * propagated. The child waiter thread is a workaround for this problem. - * The SIGCHLD signal is always blocked (in all threads), and the child - * waiter thread fetches the signal by a call to sigwait(). See - * child_waiter(). - */ - -typedef struct ErtsSysReportExit_ ErtsSysReportExit; -struct ErtsSysReportExit_ { - ErtsSysReportExit *next; - Eterm port; - int pid; - int ifd; - int ofd; -#if CHLDWTHR && !defined(ERTS_SMP) - int status; -#endif -}; - -/* Used by the fd driver iff the fd could not be set to non-blocking */ -typedef struct ErtsSysBlocking_ { - ErlDrvPDL pdl; - int res; - int err; - unsigned int pkey; -} ErtsSysBlocking; - - -/* This data is shared by these drivers - initialized by spawn_init() */ -static struct driver_data { - ErlDrvPort port_num; - int ofd, packet_bytes; - ErtsSysReportExit *report_exit; - int pid; - int alive; - int status; - int terminating; - ErtsSysBlocking *blocking; -} *driver_data; /* indexed by fd */ - -static ErtsSysReportExit *report_exit_list; -#if CHLDWTHR && !defined(ERTS_SMP) -static ErtsSysReportExit *report_exit_transit_list; -#endif - extern int driver_interrupt(int, int); extern void do_break(void); @@ -170,33 +92,6 @@ extern void erts_sys_init_float(void); extern void erl_crash_dump(char* file, int line, char* fmt, ...); -#define DIR_SEPARATOR_CHAR '/' - -#if defined(__ANDROID__) -#define SHELL "/system/bin/sh" -#else -#define SHELL "/bin/sh" -#endif /* __ANDROID__ */ - - -#if defined(DEBUG) -#define ERL_BUILD_TYPE_MARKER ".debug" -#elif defined(PURIFY) -#define ERL_BUILD_TYPE_MARKER ".purify" -#elif defined(QUANTIFY) -#define ERL_BUILD_TYPE_MARKER ".quantify" -#elif defined(PURECOV) -#define ERL_BUILD_TYPE_MARKER ".purecov" -#elif defined(VALGRIND) -#define ERL_BUILD_TYPE_MARKER ".valgrind" -#else /* opt */ -#define ERL_BUILD_TYPE_MARKER -#endif - -#define CHILD_SETUP_PROG_NAME "child_setup" ERL_BUILD_TYPE_MARKER -#if !DISABLE_VFORK -static char *child_setup_prog; -#endif #ifdef DEBUG static int debug_log = 0; @@ -220,7 +115,7 @@ static volatile int have_prepared_crash_dump; (have_prepared_crash_dump++) #endif -static erts_smp_atomic_t sys_misc_mem_sz; +erts_smp_atomic_t sys_misc_mem_sz; #if defined(ERTS_SMP) static void smp_sig_notify(char c); @@ -233,46 +128,6 @@ static int sig_suspend_fds[2] = {-1, -1}; jmp_buf erts_sys_sigsegv_jmp; -#if CHLDWTHR || defined(ERTS_SMP) -erts_mtx_t chld_stat_mtx; -#endif -#if CHLDWTHR -static erts_tid_t child_waiter_tid; -/* chld_stat_mtx is used to protect against concurrent accesses - of the driver_data fields pid, alive, and status. */ -erts_cnd_t chld_stat_cnd; -static long children_alive; -#define CHLD_STAT_LOCK erts_mtx_lock(&chld_stat_mtx) -#define CHLD_STAT_UNLOCK erts_mtx_unlock(&chld_stat_mtx) -#define CHLD_STAT_WAIT erts_cnd_wait(&chld_stat_cnd, &chld_stat_mtx) -#define CHLD_STAT_SIGNAL erts_cnd_signal(&chld_stat_cnd) -#elif defined(ERTS_SMP) /* ------------------------------------------------- */ -#define CHLD_STAT_LOCK erts_mtx_lock(&chld_stat_mtx) -#define CHLD_STAT_UNLOCK erts_mtx_unlock(&chld_stat_mtx) - -#else /* ------------------------------------------------------------------- */ -#define CHLD_STAT_LOCK -#define CHLD_STAT_UNLOCK -static volatile int children_died; -#endif - - -static struct fd_data { - char pbuf[4]; /* hold partial packet bytes */ - int psz; /* size of pbuf */ - char *buf; - char *cpos; - int sz; - int remain; /* for input on fd */ -} *fd_data; /* indexed by fd */ - -/* static FUNCTION(int, write_fill, (int, char*, int)); unused? */ -static void note_child_death(int, int); - -#if CHLDWTHR -static void* child_waiter(void *); -#endif - static int crashdump_companion_cube_fd = -1; /********************* General functions ****************************/ @@ -453,9 +308,10 @@ MALLOC_USE_HASH(1); #ifdef USE_THREADS #ifdef ERTS_THR_HAVE_SIG_FUNCS + /* * Child thread inherits parents signal mask at creation. In order to - * guarantee that the main thread will receive all SIGINT, SIGCHLD, and + * guarantee that the main thread will receive all SIGINT, and * SIGUSR1 signals sent to the process, we block these signals in the * parent thread when creating a new thread. */ @@ -551,14 +407,11 @@ erts_sys_pre_init(void) #ifdef ERTS_THR_HAVE_SIG_FUNCS sigemptyset(&thr_create_sigmask); sigaddset(&thr_create_sigmask, SIGINT); /* block interrupt */ - sigaddset(&thr_create_sigmask, SIGCHLD); /* block child signals */ sigaddset(&thr_create_sigmask, SIGUSR1); /* block user defined signal */ #endif erts_thr_init(&eid); - report_exit_list = NULL; - #ifdef ERTS_ENABLE_LOCK_COUNT erts_lcnt_init(); #endif @@ -569,17 +422,6 @@ erts_sys_pre_init(void) #ifdef USE_THREADS -#if CHLDWTHR || defined(ERTS_SMP) - erts_mtx_init(&chld_stat_mtx, "child_status"); -#endif -#if CHLDWTHR -#ifndef ERTS_SMP - report_exit_transit_list = NULL; -#endif - erts_cnd_init(&chld_stat_cnd); - children_alive = 0; -#endif - #ifdef ERTS_SMP erts_smp_atomic32_init_nob(&erts_break_requested, 0); erts_smp_atomic32_init_nob(&erts_got_sigusr1, 0); @@ -589,9 +431,6 @@ erts_sys_pre_init(void) erts_got_sigusr1 = 0; have_prepared_crash_dump = 0; #endif -#if !CHLDWTHR && !defined(ERTS_SMP) - children_died = 0; -#endif #endif /* USE_THREADS */ @@ -628,39 +467,6 @@ erts_sys_pre_init(void) void erl_sys_init(void) { -#if !DISABLE_VFORK - { - int res; - char bindir[MAXPATHLEN]; - size_t bindirsz = sizeof(bindir); - Uint csp_path_sz; - - res = erts_sys_getenv_raw("BINDIR", bindir, &bindirsz); - if (res != 0) { - if (res < 0) - erl_exit(-1, - "Environment variable BINDIR is not set\n"); - if (res > 0) - erl_exit(-1, - "Value of environment variable BINDIR is too large\n"); - } - if (bindir[0] != DIR_SEPARATOR_CHAR) - erl_exit(-1, - "Environment variable BINDIR does not contain an" - " absolute path\n"); - csp_path_sz = (strlen(bindir) - + 1 /* DIR_SEPARATOR_CHAR */ - + sizeof(CHILD_SETUP_PROG_NAME) - + 1); - child_setup_prog = erts_alloc(ERTS_ALC_T_CS_PROG_PATH, csp_path_sz); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, csp_path_sz); - erts_snprintf(child_setup_prog, csp_path_sz, - "%s%c%s", - bindir, - DIR_SEPARATOR_CHAR, - CHILD_SETUP_PROG_NAME); - } -#endif #ifdef USE_SETLINEBUF setlinebuf(stdout); @@ -978,43 +784,6 @@ int sys_max_files(void) return(max_files); } -static void block_signals(void) -{ -#if !CHLDWTHR - sys_sigblock(SIGCHLD); -#endif -#ifndef ERTS_SMP - sys_sigblock(SIGINT); -#ifndef ETHR_UNUSABLE_SIGUSRX - sys_sigblock(SIGUSR1); -#endif /* #ifndef ETHR_UNUSABLE_SIGUSRX */ -#endif /* #ifndef ERTS_SMP */ - -#if defined(ERTS_SMP) && !defined(ETHR_UNUSABLE_SIGUSRX) - sys_sigblock(ERTS_SYS_SUSPEND_SIGNAL); -#endif - -} - -static void unblock_signals(void) -{ - /* Update erl_child_setup.c if changed */ -#if !CHLDWTHR - sys_sigrelease(SIGCHLD); -#endif -#ifndef ERTS_SMP - sys_sigrelease(SIGINT); -#ifndef ETHR_UNUSABLE_SIGUSRX - sys_sigrelease(SIGUSR1); -#endif /* #ifndef ETHR_UNUSABLE_SIGUSRX */ -#endif /* #ifndef ERTS_SMP */ - -#if defined(ERTS_SMP) && !defined(ETHR_UNUSABLE_SIGUSRX) - sys_sigrelease(ERTS_SYS_SUSPEND_SIGNAL); -#endif - -} - /************************** OS info *******************************/ /* Used by erlang:info/1. */ @@ -1102,1502 +871,6 @@ void fini_getenv_state(GETENV_STATE *state) erts_smp_rwmtx_runlock(&environ_rwmtx); } - -/************************** Port I/O *******************************/ - - - -/* I. Common stuff */ - -/* - * Decreasing the size of it below 16384 is not allowed. - */ - -/* II. The spawn/fd/vanilla drivers */ - -#define ERTS_SYS_READ_BUF_SZ (64*1024) - -/* Driver interfaces */ -static ErlDrvData spawn_start(ErlDrvPort, char*, SysDriverOpts*); -static ErlDrvData fd_start(ErlDrvPort, char*, SysDriverOpts*); -#if FDBLOCK -static void fd_async(void *); -static void fd_ready_async(ErlDrvData drv_data, ErlDrvThreadData thread_data); -#endif -static ErlDrvSSizeT fd_control(ErlDrvData, unsigned int, char *, ErlDrvSizeT, - char **, ErlDrvSizeT); -static ErlDrvData vanilla_start(ErlDrvPort, char*, SysDriverOpts*); -static int spawn_init(void); -static void fd_stop(ErlDrvData); -static void fd_flush(ErlDrvData); -static void stop(ErlDrvData); -static void ready_input(ErlDrvData, ErlDrvEvent); -static void ready_output(ErlDrvData, ErlDrvEvent); -static void output(ErlDrvData, char*, ErlDrvSizeT); -static void outputv(ErlDrvData, ErlIOVec*); -static void stop_select(ErlDrvEvent, void*); - -struct erl_drv_entry spawn_driver_entry = { - spawn_init, - spawn_start, - stop, - output, - ready_input, - ready_output, - "spawn", - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - ERL_DRV_EXTENDED_MARKER, - ERL_DRV_EXTENDED_MAJOR_VERSION, - ERL_DRV_EXTENDED_MINOR_VERSION, - ERL_DRV_FLAG_USE_PORT_LOCKING, - NULL, NULL, - stop_select -}; -struct erl_drv_entry fd_driver_entry = { - NULL, - fd_start, - fd_stop, - output, - ready_input, - ready_output, - "fd", - NULL, - NULL, - fd_control, - NULL, - outputv, -#if FDBLOCK - fd_ready_async, /* ready_async */ -#else - NULL, -#endif - fd_flush, /* flush */ - NULL, /* call */ - NULL, /* event */ - ERL_DRV_EXTENDED_MARKER, - ERL_DRV_EXTENDED_MAJOR_VERSION, - ERL_DRV_EXTENDED_MINOR_VERSION, - 0, /* ERL_DRV_FLAGs */ - NULL, /* handle2 */ - NULL, /* process_exit */ - stop_select -}; -struct erl_drv_entry vanilla_driver_entry = { - NULL, - vanilla_start, - stop, - output, - ready_input, - ready_output, - "vanilla", - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, /* flush */ - NULL, /* call */ - NULL, /* event */ - ERL_DRV_EXTENDED_MARKER, - ERL_DRV_EXTENDED_MAJOR_VERSION, - ERL_DRV_EXTENDED_MINOR_VERSION, - 0, /* ERL_DRV_FLAGs */ - NULL, /* handle2 */ - NULL, /* process_exit */ - stop_select -}; - -/* Handle SIGCHLD signals. */ -#if (defined(SIG_SIGSET) || defined(SIG_SIGNAL)) -static RETSIGTYPE onchld(void) -#else -static RETSIGTYPE onchld(int signum) -#endif -{ -#if CHLDWTHR - ASSERT(0); /* We should *never* catch a SIGCHLD signal */ -#elif defined(ERTS_SMP) - smp_sig_notify('C'); -#else - children_died = 1; - ERTS_CHK_IO_AS_INTR(); /* Make sure we don't sleep in poll */ -#endif -} - -static int set_blocking_data(struct driver_data *dd) { - - dd->blocking = erts_alloc(ERTS_ALC_T_SYS_BLOCKING, sizeof(ErtsSysBlocking)); - - erts_smp_atomic_add_nob(&sys_misc_mem_sz, sizeof(ErtsSysBlocking)); - - dd->blocking->pdl = driver_pdl_create(dd->port_num); - dd->blocking->res = 0; - dd->blocking->err = 0; - dd->blocking->pkey = driver_async_port_key(dd->port_num); - - return 1; -} - -static int set_driver_data(ErlDrvPort port_num, - int ifd, - int ofd, - int packet_bytes, - int read_write, - int exit_status, - int pid, - int is_blocking) -{ - Port *prt; - ErtsSysReportExit *report_exit; - - if (!exit_status) - report_exit = NULL; - else { - report_exit = erts_alloc(ERTS_ALC_T_PRT_REP_EXIT, - sizeof(ErtsSysReportExit)); - report_exit->next = report_exit_list; - report_exit->port = erts_drvport2id(port_num); - report_exit->pid = pid; - report_exit->ifd = read_write & DO_READ ? ifd : -1; - report_exit->ofd = read_write & DO_WRITE ? ofd : -1; -#if CHLDWTHR && !defined(ERTS_SMP) - report_exit->status = 0; -#endif - report_exit_list = report_exit; - } - - prt = erts_drvport2port(port_num); - if (prt != ERTS_INVALID_ERL_DRV_PORT) - prt->os_pid = pid; - - if (read_write & DO_READ) { - driver_data[ifd].packet_bytes = packet_bytes; - driver_data[ifd].port_num = port_num; - driver_data[ifd].report_exit = report_exit; - driver_data[ifd].pid = pid; - driver_data[ifd].alive = 1; - driver_data[ifd].status = 0; - driver_data[ifd].terminating = 0; - driver_data[ifd].blocking = NULL; - if (read_write & DO_WRITE) { - driver_data[ifd].ofd = ofd; - if (is_blocking && FDBLOCK) - if (!set_blocking_data(driver_data+ifd)) - return -1; - if (ifd != ofd) - driver_data[ofd] = driver_data[ifd]; /* structure copy */ - } else { /* DO_READ only */ - driver_data[ifd].ofd = -1; - } - (void) driver_select(port_num, ifd, (ERL_DRV_READ|ERL_DRV_USE), 1); - return(ifd); - } else { /* DO_WRITE only */ - driver_data[ofd].packet_bytes = packet_bytes; - driver_data[ofd].port_num = port_num; - driver_data[ofd].report_exit = report_exit; - driver_data[ofd].ofd = ofd; - driver_data[ofd].pid = pid; - driver_data[ofd].alive = 1; - driver_data[ofd].status = 0; - driver_data[ofd].terminating = 0; - driver_data[ofd].blocking = NULL; - if (is_blocking && FDBLOCK) - if (!set_blocking_data(driver_data+ofd)) - return -1; - return(ofd); - } -} - -static int spawn_init() -{ - int i; -#if CHLDWTHR - erts_thr_opts_t thr_opts = ERTS_THR_OPTS_DEFAULT_INITER; - - thr_opts.detached = 0; - thr_opts.suggested_stack_size = 0; /* Smallest possible */ - thr_opts.name = "child_waiter"; -#endif - - sys_signal(SIGPIPE, SIG_IGN); /* Ignore - we'll handle the write failure */ - driver_data = (struct driver_data *) - erts_alloc(ERTS_ALC_T_DRV_TAB, max_files * sizeof(struct driver_data)); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, - max_files * sizeof(struct driver_data)); - - for (i = 0; i < max_files; i++) - driver_data[i].pid = -1; - -#if CHLDWTHR - sys_sigblock(SIGCHLD); -#endif - - sys_signal(SIGCHLD, onchld); /* Reap children */ - -#if CHLDWTHR - erts_thr_create(&child_waiter_tid, child_waiter, NULL, &thr_opts); -#endif - - return 1; -} - -static void close_pipes(int ifd[2], int ofd[2], int read_write) -{ - if (read_write & DO_READ) { - (void) close(ifd[0]); - (void) close(ifd[1]); - } - if (read_write & DO_WRITE) { - (void) close(ofd[0]); - (void) close(ofd[1]); - } -} - -static void init_fd_data(int fd, ErlDrvPort port_num) -{ - fd_data[fd].buf = NULL; - fd_data[fd].cpos = NULL; - fd_data[fd].remain = 0; - fd_data[fd].sz = 0; - fd_data[fd].psz = 0; -} - -static char **build_unix_environment(char *block) -{ - int i; - int j; - int len; - char *cp; - char **cpp; - char** old_env; - - ERTS_SMP_LC_ASSERT(erts_smp_lc_rwmtx_is_rlocked(&environ_rwmtx)); - - cp = block; - len = 0; - while (*cp != '\0') { - cp += strlen(cp) + 1; - len++; - } - old_env = environ; - while (*old_env++ != NULL) { - len++; - } - - cpp = (char **) erts_alloc_fnf(ERTS_ALC_T_ENVIRONMENT, - sizeof(char *) * (len+1)); - if (cpp == NULL) { - return NULL; - } - - cp = block; - len = 0; - while (*cp != '\0') { - cpp[len] = cp; - cp += strlen(cp) + 1; - len++; - } - - i = len; - for (old_env = environ; *old_env; old_env++) { - char* old = *old_env; - - for (j = 0; j < len; j++) { - char *s, *t; - - s = cpp[j]; - t = old; - while (*s == *t && *s != '=') { - s++, t++; - } - if (*s == '=' && *t == '=') { - break; - } - } - - if (j == len) { /* New version not found */ - cpp[len++] = old; - } - } - - for (j = 0; j < i; ) { - size_t last = strlen(cpp[j])-1; - if (cpp[j][last] == '=' && strchr(cpp[j], '=') == cpp[j]+last) { - cpp[j] = cpp[--len]; - if (len < i) { - i--; - } else { - j++; - } - } - else { - j++; - } - } - - cpp[len] = NULL; - return cpp; -} - -/* - [arndt] In most Unix systems, including Solaris 2.5, 'fork' allocates memory - in swap space for the child of a 'fork', whereas 'vfork' does not do this. - The natural call to use here is therefore 'vfork'. Due to a bug in - 'vfork' in Solaris 2.5 (apparently fixed in 2.6), using 'vfork' - can be dangerous in what seems to be these circumstances: - If the child code under a vfork sets the signal action to SIG_DFL - (or SIG_IGN) - for any signal which was previously set to a signal handler, the - state of the parent is clobbered, so that the later arrival of - such a signal yields a sigsegv in the parent. If the signal was - not set to a signal handler, but ignored, all seems to work. - If you change the forking code below, beware of this. - */ - -static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* opts) -{ -#define CMD_LINE_PREFIX_STR "exec " -#define CMD_LINE_PREFIX_STR_SZ (sizeof(CMD_LINE_PREFIX_STR) - 1) - - int ifd[2], ofd[2], len, pid, i; - char **volatile new_environ; /* volatile since a vfork() then cannot - cause 'new_environ' to be clobbered - in the parent process. */ - int saved_errno; - long res; - char *cmd_line; -#ifndef QNX - int unbind; -#endif -#if !DISABLE_VFORK - int no_vfork; - size_t no_vfork_sz = sizeof(no_vfork); - - no_vfork = (erts_sys_getenv_raw("ERL_NO_VFORK", - (char *) &no_vfork, - &no_vfork_sz) >= 0); -#endif - - switch (opts->read_write) { - case DO_READ: - if (pipe(ifd) < 0) - return ERL_DRV_ERROR_ERRNO; - if (ifd[0] >= max_files) { - close_pipes(ifd, ofd, opts->read_write); - errno = EMFILE; - return ERL_DRV_ERROR_ERRNO; - } - ofd[1] = -1; /* keep purify happy */ - break; - case DO_WRITE: - if (pipe(ofd) < 0) return ERL_DRV_ERROR_ERRNO; - if (ofd[1] >= max_files) { - close_pipes(ifd, ofd, opts->read_write); - errno = EMFILE; - return ERL_DRV_ERROR_ERRNO; - } - ifd[0] = -1; /* keep purify happy */ - break; - case DO_READ|DO_WRITE: - if (pipe(ifd) < 0) return ERL_DRV_ERROR_ERRNO; - errno = EMFILE; /* default for next two conditions */ - if (ifd[0] >= max_files || pipe(ofd) < 0) { - close_pipes(ifd, ofd, DO_READ); - return ERL_DRV_ERROR_ERRNO; - } - if (ofd[1] >= max_files) { - close_pipes(ifd, ofd, opts->read_write); - errno = EMFILE; - return ERL_DRV_ERROR_ERRNO; - } - break; - default: - ASSERT(0); - return ERL_DRV_ERROR_GENERAL; - } - - if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) { - /* started with spawn_executable, not with spawn */ - len = strlen(name); - cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP, len + 1); - if (!cmd_line) { - close_pipes(ifd, ofd, opts->read_write); - errno = ENOMEM; - return ERL_DRV_ERROR_ERRNO; - } - memcpy((void *) cmd_line,(void *) name, len); - cmd_line[len] = '\0'; - if (access(cmd_line,X_OK) != 0) { - int save_errno = errno; - erts_free(ERTS_ALC_T_TMP, cmd_line); - errno = save_errno; - return ERL_DRV_ERROR_ERRNO; - } - } else { - /* make the string suitable for giving to "sh" */ - len = strlen(name); - cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP, - CMD_LINE_PREFIX_STR_SZ + len + 1); - if (!cmd_line) { - close_pipes(ifd, ofd, opts->read_write); - errno = ENOMEM; - return ERL_DRV_ERROR_ERRNO; - } - memcpy((void *) cmd_line, - (void *) CMD_LINE_PREFIX_STR, - CMD_LINE_PREFIX_STR_SZ); - memcpy((void *) (cmd_line + CMD_LINE_PREFIX_STR_SZ), (void *) name, len); - cmd_line[CMD_LINE_PREFIX_STR_SZ + len] = '\0'; - } - - erts_smp_rwmtx_rlock(&environ_rwmtx); - - if (opts->envir == NULL) { - new_environ = environ; - } else if ((new_environ = build_unix_environment(opts->envir)) == NULL) { - erts_smp_rwmtx_runlock(&environ_rwmtx); - erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); - errno = ENOMEM; - return ERL_DRV_ERROR_ERRNO; - } - -#ifndef QNX - /* Block child from SIGINT and SIGUSR1. Must be before fork() - to be safe. */ - block_signals(); - - CHLD_STAT_LOCK; - - unbind = erts_sched_bind_atfork_prepare(); - -#if !DISABLE_VFORK - /* See fork/vfork discussion before this function. */ - if (no_vfork) { -#endif - - DEBUGF(("Using fork\n")); - pid = fork(); - - if (pid == 0) { - /* The child! Setup child... */ - - if (erts_sched_bind_atfork_child(unbind) != 0) - goto child_error; - - /* OBSERVE! - * Keep child setup after vfork() (implemented below and in - * erl_child_setup.c) up to date if changes are made here. - */ - - if (opts->use_stdio) { - if (opts->read_write & DO_READ) { - /* stdout for process */ - if (dup2(ifd[1], 1) < 0) - goto child_error; - if(opts->redir_stderr) - /* stderr for process */ - if (dup2(ifd[1], 2) < 0) - goto child_error; - } - if (opts->read_write & DO_WRITE) - /* stdin for process */ - if (dup2(ofd[0], 0) < 0) - goto child_error; - } - else { /* XXX will fail if ofd[0] == 4 (unlikely..) */ - if (opts->read_write & DO_READ) - if (dup2(ifd[1], 4) < 0) - goto child_error; - if (opts->read_write & DO_WRITE) - if (dup2(ofd[0], 3) < 0) - goto child_error; - } - -#if defined(HAVE_CLOSEFROM) - closefrom(opts->use_stdio ? 3 : 5); -#else - for (i = opts->use_stdio ? 3 : 5; i < max_files; i++) - (void) close(i); -#endif - - if (opts->wd && chdir(opts->wd) < 0) - goto child_error; - -#if defined(USE_SETPGRP_NOARGS) /* SysV */ - (void) setpgrp(); -#elif defined(USE_SETPGRP) /* BSD */ - (void) setpgrp(0, getpid()); -#else /* POSIX */ - (void) setsid(); -#endif - - unblock_signals(); - - if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) { - if (opts->argv == NULL) { - execle(cmd_line,cmd_line,(char *) NULL, new_environ); - } else { - if (opts->argv[0] == erts_default_arg0) { - opts->argv[0] = cmd_line; - } - execve(cmd_line, opts->argv, new_environ); - if (opts->argv[0] == cmd_line) { - opts->argv[0] = erts_default_arg0; - } - } - } else { - execle(SHELL, "sh", "-c", cmd_line, (char *) NULL, new_environ); - } - child_error: - _exit(1); - } -#if !DISABLE_VFORK - } -#define ENOUGH_BYTES (44) - else { /* Use vfork() */ - char **cs_argv= erts_alloc(ERTS_ALC_T_TMP,(CS_ARGV_NO_OF_ARGS + 1)* - sizeof(char *)); - char fd_close_range[ENOUGH_BYTES]; /* 44 bytes are enough to */ - char dup2_op[CS_ARGV_NO_OF_DUP2_OPS][ENOUGH_BYTES]; /* hold any "%d:%d" string */ - /* on a 64-bit machine. */ - - /* Setup argv[] for the child setup program (implemented in - erl_child_setup.c) */ - i = 0; - if (opts->use_stdio) { - if (opts->read_write & DO_READ){ - /* stdout for process */ - erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ifd[1], 1); - if(opts->redir_stderr) - /* stderr for process */ - erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ifd[1], 2); - } - if (opts->read_write & DO_WRITE) - /* stdin for process */ - erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ofd[0], 0); - } else { /* XXX will fail if ofd[0] == 4 (unlikely..) */ - if (opts->read_write & DO_READ) - erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ifd[1], 4); - if (opts->read_write & DO_WRITE) - erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ofd[0], 3); - } - for (; i < CS_ARGV_NO_OF_DUP2_OPS; i++) - strcpy(&dup2_op[i][0], "-"); - erts_snprintf(fd_close_range, ENOUGH_BYTES, "%d:%d", opts->use_stdio ? 3 : 5, max_files-1); - - cs_argv[CS_ARGV_PROGNAME_IX] = child_setup_prog; - cs_argv[CS_ARGV_WD_IX] = opts->wd ? opts->wd : "."; - cs_argv[CS_ARGV_UNBIND_IX] = erts_sched_bind_atvfork_child(unbind); - cs_argv[CS_ARGV_FD_CR_IX] = fd_close_range; - for (i = 0; i < CS_ARGV_NO_OF_DUP2_OPS; i++) - cs_argv[CS_ARGV_DUP2_OP_IX(i)] = &dup2_op[i][0]; - - if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) { - int num = 0; - int j = 0; - if (opts->argv != NULL) { - for(; opts->argv[num] != NULL; ++num) - ; - } - cs_argv = erts_realloc(ERTS_ALC_T_TMP,cs_argv, (CS_ARGV_NO_OF_ARGS + 1 + num + 1) * sizeof(char *)); - cs_argv[CS_ARGV_CMD_IX] = "-"; - cs_argv[CS_ARGV_NO_OF_ARGS] = cmd_line; - if (opts->argv != NULL) { - for (;opts->argv[j] != NULL; ++j) { - if (opts->argv[j] == erts_default_arg0) { - cs_argv[CS_ARGV_NO_OF_ARGS + 1 + j] = cmd_line; - } else { - cs_argv[CS_ARGV_NO_OF_ARGS + 1 + j] = opts->argv[j]; - } - } - } - cs_argv[CS_ARGV_NO_OF_ARGS + 1 + j] = NULL; - } else { - cs_argv[CS_ARGV_CMD_IX] = cmd_line; /* Command */ - cs_argv[CS_ARGV_NO_OF_ARGS] = NULL; - } - DEBUGF(("Using vfork\n")); - pid = vfork(); - - if (pid == 0) { - /* The child! */ - - /* Observe! - * OTP-4389: The child setup program (implemented in - * erl_child_setup.c) will perform the necessary setup of the - * child before it execs to the user program. This because - * vfork() only allow an *immediate* execve() or _exit() in the - * child. - */ - execve(child_setup_prog, cs_argv, new_environ); - _exit(1); - } - erts_free(ERTS_ALC_T_TMP,cs_argv); - } -#undef ENOUGH_BYTES -#endif - - erts_sched_bind_atfork_parent(unbind); - - if (pid == -1) { - saved_errno = errno; - CHLD_STAT_UNLOCK; - erts_smp_rwmtx_runlock(&environ_rwmtx); - erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); - unblock_signals(); - close_pipes(ifd, ofd, opts->read_write); - errno = saved_errno; - return ERL_DRV_ERROR_ERRNO; - } -#else /* QNX */ - if (opts->use_stdio) { - if (opts->read_write & DO_READ) - qnx_spawn_options.iov[1] = ifd[1]; /* stdout for process */ - if (opts->read_write & DO_WRITE) - qnx_spawn_options.iov[0] = ofd[0]; /* stdin for process */ - } - else { - if (opts->read_write & DO_READ) - qnx_spawn_options.iov[4] = ifd[1]; - if (opts->read_write & DO_WRITE) - qnx_spawn_options.iov[3] = ofd[0]; - } - /* Close fds on exec */ - for (i = 3; i < max_files; i++) - fcntl(i, F_SETFD, 1); - - qnx_spawn_options.flags = _SPAWN_SETSID; - if ((pid = spawnl(P_NOWAIT, SHELL, SHELL, "-c", cmd_line, - (char *) 0)) < 0) { - erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); - reset_qnx_spawn(); - erts_smp_rwmtx_runlock(&environ_rwmtx); - close_pipes(ifd, ofd, opts->read_write); - return ERL_DRV_ERROR_GENERAL; - } - reset_qnx_spawn(); -#endif /* QNX */ - - erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); - - if (new_environ != environ) - erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ); - - if (opts->read_write & DO_READ) - (void) close(ifd[1]); - if (opts->read_write & DO_WRITE) - (void) close(ofd[0]); - - if (opts->read_write & DO_READ) { - SET_NONBLOCKING(ifd[0]); - init_fd_data(ifd[0], port_num); - } - if (opts->read_write & DO_WRITE) { - SET_NONBLOCKING(ofd[1]); - init_fd_data(ofd[1], port_num); - } - - res = set_driver_data(port_num, ifd[0], ofd[1], opts->packet_bytes, - opts->read_write, opts->exit_status, pid, 0); - /* Don't unblock SIGCHLD until now, since the call above must - first complete putting away the info about our new subprocess. */ - unblock_signals(); - -#if CHLDWTHR - ASSERT(children_alive >= 0); - - if (!(children_alive++)) - CHLD_STAT_SIGNAL; /* Wake up child waiter thread if no children - was alive before we fork()ed ... */ -#endif - /* Don't unlock chld_stat_mtx until now of the same reason as above */ - CHLD_STAT_UNLOCK; - - erts_smp_rwmtx_runlock(&environ_rwmtx); - - return (ErlDrvData)res; -#undef CMD_LINE_PREFIX_STR -#undef CMD_LINE_PREFIX_STR_SZ -} - -#ifdef QNX -static reset_qnx_spawn() -{ - int i; - - /* Reset qnx_spawn_options */ - qnx_spawn_options.flags = 0; - qnx_spawn_options.iov[0] = 0xff; - qnx_spawn_options.iov[1] = 0xff; - qnx_spawn_options.iov[2] = 0xff; - qnx_spawn_options.iov[3] = 0xff; -} -#endif - -#define FD_DEF_HEIGHT 24 -#define FD_DEF_WIDTH 80 -/* Control op */ -#define FD_CTRL_OP_GET_WINSIZE 100 - -static int fd_get_window_size(int fd, Uint32 *width, Uint32 *height) -{ -#ifdef TIOCGWINSZ - struct winsize ws; - if (ioctl(fd,TIOCGWINSZ,&ws) == 0) { - *width = (Uint32) ws.ws_col; - *height = (Uint32) ws.ws_row; - return 0; - } -#endif - return -1; -} - -static ErlDrvSSizeT fd_control(ErlDrvData drv_data, - unsigned int command, - char *buf, ErlDrvSizeT len, - char **rbuf, ErlDrvSizeT rlen) -{ - int fd = (int)(long)drv_data; - char resbuff[2*sizeof(Uint32)]; - switch (command) { - case FD_CTRL_OP_GET_WINSIZE: - { - Uint32 w,h; - if (fd_get_window_size(fd,&w,&h)) - return 0; - memcpy(resbuff,&w,sizeof(Uint32)); - memcpy(resbuff+sizeof(Uint32),&h,sizeof(Uint32)); - } - break; - default: - return 0; - } - if (rlen < 2*sizeof(Uint32)) { - *rbuf = driver_alloc(2*sizeof(Uint32)); - } - memcpy(*rbuf,resbuff,2*sizeof(Uint32)); - return 2*sizeof(Uint32); -} - -static ErlDrvData fd_start(ErlDrvPort port_num, char* name, - SysDriverOpts* opts) -{ - ErlDrvData res; - int non_blocking = 0; - - if (((opts->read_write & DO_READ) && opts->ifd >= max_files) || - ((opts->read_write & DO_WRITE) && opts->ofd >= max_files)) - return ERL_DRV_ERROR_GENERAL; - - /* - * Historical: - * - * "Note about nonblocking I/O. - * - * At least on Solaris, setting the write end of a TTY to nonblocking, - * will set the input end to nonblocking as well (and vice-versa). - * If erl is run in a pipeline like this: cat | erl - * the input end of the TTY will be the standard input of cat. - * And cat is not prepared to handle nonblocking I/O." - * - * Actually, the reason for this is not that the tty itself gets set - * in non-blocking mode, but that the "input end" (cat's stdin) and - * the "output end" (erlang's stdout) are typically the "same" file - * descriptor, dup()'ed from a single fd by one of this process' - * ancestors. - * - * The workaround for this problem used to be a rather bad kludge, - * interposing an extra process ("internal cat") between erlang's - * stdout and the original stdout, allowing erlang to set its stdout - * in non-blocking mode without affecting the stdin of the preceding - * process in the pipeline - and being a kludge, it caused all kinds - * of weird problems. - * - * So, this is the current logic: - * - * The only reason to set non-blocking mode on the output fd at all is - * if it's something that can cause a write() to block, of course, - * i.e. primarily if it points to a tty, socket, pipe, or fifo. - * - * If we don't set non-blocking mode when we "should" have, and output - * becomes blocked, the entire runtime system will be suspended - this - * is normally bad of course, and can happen fairly "easily" - e.g. user - * hits ^S on tty - but doesn't necessarily happen. - * - * If we do set non-blocking mode when we "shouldn't" have, the runtime - * system will end up seeing EOF on the input fd (due to the preceding - * process dying), which typically will cause the entire runtime system - * to terminate immediately (due to whatever erlang process is seeing - * the EOF taking it as a signal to halt the system). This is *very* bad. - * - * I.e. we should take a conservative approach, and only set non- - * blocking mode when we a) need to, and b) are reasonably certain - * that it won't be a problem. And as in the example above, the problem - * occurs when input fd and output fd point to different "things". - * - * However, determining that they are not just the same "type" of - * "thing", but actually the same instance of that type of thing, is - * unreasonably complex in many/most cases. - * - * Also, with pipes, sockets, and fifos it's far from obvious that the - * user *wants* non-blocking output: If you're running erlang inside - * some complex pipeline, you're probably not running a real-time system - * that must never stop, but rather *want* it to suspend if the output - * channel is "full". - * - * So, the bottom line: We will only set the output fd non-blocking if - * it points to a tty, and either a) the input fd also points to a tty, - * or b) we can make sure that setting the output fd non-blocking - * doesn't interfere with someone else's input, via a somewhat milder - * kludge than the above. - * - * Also keep in mind that while this code is almost exclusively run as - * a result of an erlang open_port({fd,0,1}, ...), that isn't the only - * case - it can be called with any old pre-existing file descriptors, - * the relations between which (if they're even two) we can only guess - * at - still, we try our best... - * - * Added note OTP 18: Some systems seem to use stdout/stderr to log data - * using unix pipes, so we cannot allow the system to block on a write. - * Therefore we use an async thread to write the data to fd's that could - * not be set to non-blocking. When no async threads are available we - * fall back on the old behaviour. - * - * Also the guarantee about what is delivered to the OS has changed. - * Pre 18 the fd driver did no flushing of data before terminating. - * Now it does. This is because we want to be able to guarantee that things - * such as escripts and friends really have outputted all data before - * terminating. This could potentially block the termination of the system - * for a very long time, but if the user wants to terminate fast she should - * use erlang:halt with flush=false. - */ - - if (opts->read_write & DO_READ) { - init_fd_data(opts->ifd, port_num); - } - if (opts->read_write & DO_WRITE) { - init_fd_data(opts->ofd, port_num); - - /* If we don't have a read end, all bets are off - no non-blocking. */ - if (opts->read_write & DO_READ) { - - if (isatty(opts->ofd)) { /* output fd is a tty:-) */ - - if (isatty(opts->ifd)) { /* input fd is also a tty */ - - /* To really do this "right", we should also check that - input and output fd point to the *same* tty - but - this seems like overkill; ttyname() isn't for free, - and this is a very common case - and it's hard to - imagine a scenario where setting non-blocking mode - here would cause problems - go ahead and do it. */ - - non_blocking = 1; - SET_NONBLOCKING(opts->ofd); - - } else { /* output fd is a tty, input fd isn't */ - - /* This is a "problem case", but also common (see the - example above) - i.e. it makes sense to try a bit - harder before giving up on non-blocking mode: Try to - re-open the tty that the output fd points to, and if - successful replace the original one with the "new" fd - obtained this way, and set *that* one in non-blocking - mode. (Yes, this is a kludge.) - - However, re-opening the tty may fail in a couple of - (unusual) cases: - - 1) The name of the tty (or an equivalent one, i.e. - same major/minor number) can't be found, because - it actually lives somewhere other than /dev (or - wherever ttyname() looks for it), and isn't - equivalent to any of those that do live in the - "standard" place - this should be *very* unusual. - - 2) Permissions on the tty don't allow us to open it - - it's perfectly possible to have an fd open to an - object whose permissions wouldn't allow us to open - it. This is not as unusual as it sounds, one case - is if the user has su'ed to someone else (not - root) - we have a read/write fd open to the tty - (because it has been inherited all the way down - here), but we have neither read nor write - permission for the tty. - - In these cases, we finally give up, and don't set the - output fd in non-blocking mode. */ - - char *tty; - int nfd; - - if ((tty = ttyname(opts->ofd)) != NULL && - (nfd = open(tty, O_WRONLY)) != -1) { - dup2(nfd, opts->ofd); - close(nfd); - non_blocking = 1; - SET_NONBLOCKING(opts->ofd); - } - } - } - } - } - CHLD_STAT_LOCK; - res = (ErlDrvData)(long)set_driver_data(port_num, opts->ifd, opts->ofd, - opts->packet_bytes, - opts->read_write, 0, -1, - !non_blocking); - CHLD_STAT_UNLOCK; - return res; -} - -static void clear_fd_data(int fd) -{ - if (fd_data[fd].sz > 0) { - erts_free(ERTS_ALC_T_FD_ENTRY_BUF, (void *) fd_data[fd].buf); - ASSERT(erts_smp_atomic_read_nob(&sys_misc_mem_sz) >= fd_data[fd].sz); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*fd_data[fd].sz); - } - fd_data[fd].buf = NULL; - fd_data[fd].sz = 0; - fd_data[fd].remain = 0; - fd_data[fd].cpos = NULL; - fd_data[fd].psz = 0; -} - -static void nbio_stop_fd(ErlDrvPort prt, int fd) -{ - driver_select(prt,fd,DO_READ|DO_WRITE,0); - clear_fd_data(fd); - SET_BLOCKING(fd); -} - -static void fd_stop(ErlDrvData ev) /* Does not close the fds */ -{ - int ofd; - int fd = (int)(long)ev; - ErlDrvPort prt = driver_data[fd].port_num; - -#if FDBLOCK - if (driver_data[fd].blocking) { - erts_free(ERTS_ALC_T_SYS_BLOCKING,driver_data[fd].blocking); - driver_data[fd].blocking = NULL; - erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*sizeof(ErtsSysBlocking)); - } -#endif - - nbio_stop_fd(prt, fd); - ofd = driver_data[fd].ofd; - if (ofd != fd && ofd != -1) - nbio_stop_fd(prt, ofd); -} - -static void fd_flush(ErlDrvData fd) -{ - if (!driver_data[(int)(long)fd].terminating) - driver_data[(int)(long)fd].terminating = 1; -} - -static ErlDrvData vanilla_start(ErlDrvPort port_num, char* name, - SysDriverOpts* opts) -{ - int flags, fd; - ErlDrvData res; - - flags = (opts->read_write == DO_READ ? O_RDONLY : - opts->read_write == DO_WRITE ? O_WRONLY|O_CREAT|O_TRUNC : - O_RDWR|O_CREAT); - if ((fd = open(name, flags, 0666)) < 0) - return ERL_DRV_ERROR_GENERAL; - if (fd >= max_files) { - close(fd); - return ERL_DRV_ERROR_GENERAL; - } - SET_NONBLOCKING(fd); - init_fd_data(fd, port_num); - - CHLD_STAT_LOCK; - res = (ErlDrvData)(long)set_driver_data(port_num, fd, fd, - opts->packet_bytes, - opts->read_write, 0, -1, 0); - CHLD_STAT_UNLOCK; - return res; -} - -/* Note that driver_data[fd].ifd == fd if the port was opened for reading, */ -/* otherwise (i.e. write only) driver_data[fd].ofd = fd. */ - -static void stop(ErlDrvData fd) -{ - ErlDrvPort prt; - int ofd; - - prt = driver_data[(int)(long)fd].port_num; - nbio_stop_fd(prt, (int)(long)fd); - - ofd = driver_data[(int)(long)fd].ofd; - if (ofd != (int)(long)fd && (int)(long)ofd != -1) - nbio_stop_fd(prt, ofd); - else - ofd = -1; - - CHLD_STAT_LOCK; - - /* Mark as unused. */ - driver_data[(int)(long)fd].pid = -1; - - CHLD_STAT_UNLOCK; - - /* SMP note: Close has to be last thing done (open file descriptors work - as locks on driver_data[] entries) */ - driver_select(prt, (int)(long)fd, ERL_DRV_USE, 0); /* close(fd); */ - if (ofd >= 0) { - driver_select(prt, (int)(long)ofd, ERL_DRV_USE, 0); /* close(ofd); */ - } -} - -/* used by fd_driver */ -static void outputv(ErlDrvData e, ErlIOVec* ev) -{ - int fd = (int)(long)e; - ErlDrvPort ix = driver_data[fd].port_num; - int pb = driver_data[fd].packet_bytes; - int ofd = driver_data[fd].ofd; - ssize_t n; - ErlDrvSizeT sz; - char lb[4]; - char* lbp; - ErlDrvSizeT len = ev->size; - - /* (len > ((unsigned long)-1 >> (4-pb)*8)) */ - /* if (pb >= 0 && (len & (((ErlDrvSizeT)1 << (pb*8))) - 1) != len) {*/ - if (((pb == 2) && (len > 0xffff)) || (pb == 1 && len > 0xff)) { - driver_failure_posix(ix, EINVAL); - return; /* -1; */ - } - /* Handles 0 <= pb <= 4 only */ - put_int32((Uint32) len, lb); - lbp = lb + (4-pb); - - ev->iov[0].iov_base = lbp; - ev->iov[0].iov_len = pb; - ev->size += pb; - - if (driver_data[fd].blocking && FDBLOCK) - driver_pdl_lock(driver_data[fd].blocking->pdl); - - if ((sz = driver_sizeq(ix)) > 0) { - driver_enqv(ix, ev, 0); - - if (driver_data[fd].blocking && FDBLOCK) - driver_pdl_unlock(driver_data[fd].blocking->pdl); - - if (sz + ev->size >= (1 << 13)) - set_busy_port(ix, 1); - } - else if (!driver_data[fd].blocking || !FDBLOCK) { - /* We try to write directly if the fd in non-blocking */ - int vsize = ev->vsize > MAX_VSIZE ? MAX_VSIZE : ev->vsize; - - n = writev(ofd, (const void *) (ev->iov), vsize); - if (n == ev->size) - return; /* 0;*/ - if (n < 0) { - if ((errno != EINTR) && (errno != ERRNO_BLOCK)) { - driver_failure_posix(ix, errno); - return; /* -1;*/ - } - n = 0; - } - driver_enqv(ix, ev, n); /* n is the skip value */ - driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1); - } -#if FDBLOCK - else { - if (ev->size != 0) { - driver_enqv(ix, ev, 0); - driver_pdl_unlock(driver_data[fd].blocking->pdl); - driver_async(ix, &driver_data[fd].blocking->pkey, - fd_async, driver_data+fd, NULL); - } else { - driver_pdl_unlock(driver_data[fd].blocking->pdl); - } - } -#endif - /* return 0;*/ -} - -/* Used by spawn_driver and vanilla driver */ -static void output(ErlDrvData e, char* buf, ErlDrvSizeT len) -{ - int fd = (int)(long)e; - ErlDrvPort ix = driver_data[fd].port_num; - int pb = driver_data[fd].packet_bytes; - int ofd = driver_data[fd].ofd; - ssize_t n; - ErlDrvSizeT sz; - char lb[4]; - char* lbp; - struct iovec iv[2]; - - /* (len > ((unsigned long)-1 >> (4-pb)*8)) */ - if (((pb == 2) && (len > 0xffff)) || (pb == 1 && len > 0xff)) { - driver_failure_posix(ix, EINVAL); - return; /* -1; */ - } - put_int32(len, lb); - lbp = lb + (4-pb); - - if ((sz = driver_sizeq(ix)) > 0) { - driver_enq(ix, lbp, pb); - driver_enq(ix, buf, len); - if (sz + len + pb >= (1 << 13)) - set_busy_port(ix, 1); - } - else { - iv[0].iov_base = lbp; - iv[0].iov_len = pb; /* should work for pb=0 */ - iv[1].iov_base = buf; - iv[1].iov_len = len; - n = writev(ofd, iv, 2); - if (n == pb+len) - return; /* 0; */ - if (n < 0) { - if ((errno != EINTR) && (errno != ERRNO_BLOCK)) { - driver_failure_posix(ix, errno); - return; /* -1; */ - } - n = 0; - } - if (n < pb) { - driver_enq(ix, lbp+n, pb-n); - driver_enq(ix, buf, len); - } - else { - n -= pb; - driver_enq(ix, buf+n, len-n); - } - driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1); - } - return; /* 0; */ -} - -static int port_inp_failure(ErlDrvPort port_num, int ready_fd, int res) - /* Result: 0 (eof) or -1 (error) */ -{ - int err = errno; - - ASSERT(res <= 0); - (void) driver_select(port_num, ready_fd, ERL_DRV_READ|ERL_DRV_WRITE, 0); - clear_fd_data(ready_fd); - - if (driver_data[ready_fd].blocking && FDBLOCK) { - driver_pdl_lock(driver_data[ready_fd].blocking->pdl); - if (driver_sizeq(driver_data[ready_fd].port_num) > 0) { - driver_pdl_unlock(driver_data[ready_fd].blocking->pdl); - /* We have stuff in the output queue, so we just - set the state to terminating and wait for fd_async_ready - to terminate the port */ - if (res == 0) - driver_data[ready_fd].terminating = 2; - else - driver_data[ready_fd].terminating = -err; - return 0; - } - driver_pdl_unlock(driver_data[ready_fd].blocking->pdl); - } - - if (res == 0) { - if (driver_data[ready_fd].report_exit) { - CHLD_STAT_LOCK; - - if (driver_data[ready_fd].alive) { - /* - * We have eof and want to report exit status, but the process - * hasn't exited yet. When it does report_exit_status() will - * driver_select() this fd which will make sure that we get - * back here with driver_data[ready_fd].alive == 0 and - * driver_data[ready_fd].status set. - */ - CHLD_STAT_UNLOCK; - return 0; - } - else { - int status = driver_data[ready_fd].status; - CHLD_STAT_UNLOCK; - - /* We need not be prepared for stopped/continued processes. */ - if (WIFSIGNALED(status)) - status = 128 + WTERMSIG(status); - else - status = WEXITSTATUS(status); - - driver_report_exit(driver_data[ready_fd].port_num, status); - } - } - driver_failure_eof(port_num); - } else { - driver_failure_posix(port_num, err); - } - return 0; -} - -/* fd is the drv_data that is returned from the */ -/* initial start routine */ -/* ready_fd is the descriptor that is ready to read */ - -static void ready_input(ErlDrvData e, ErlDrvEvent ready_fd) -{ - int fd = (int)(long)e; - ErlDrvPort port_num; - int packet_bytes; - int res; - Uint h; - - port_num = driver_data[fd].port_num; - packet_bytes = driver_data[fd].packet_bytes; - - - if (packet_bytes == 0) { - byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF, - ERTS_SYS_READ_BUF_SZ); - res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ); - if (res < 0) { - if ((errno != EINTR) && (errno != ERRNO_BLOCK)) - port_inp_failure(port_num, ready_fd, res); - } - else if (res == 0) - port_inp_failure(port_num, ready_fd, res); - else - driver_output(port_num, (char*) read_buf, res); - erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf); - } - else if (fd_data[ready_fd].remain > 0) { /* We try to read the remainder */ - /* space is allocated in buf */ - res = read(ready_fd, fd_data[ready_fd].cpos, - fd_data[ready_fd].remain); - if (res < 0) { - if ((errno != EINTR) && (errno != ERRNO_BLOCK)) - port_inp_failure(port_num, ready_fd, res); - } - else if (res == 0) { - port_inp_failure(port_num, ready_fd, res); - } - else if (res == fd_data[ready_fd].remain) { /* we're done */ - driver_output(port_num, fd_data[ready_fd].buf, - fd_data[ready_fd].sz); - clear_fd_data(ready_fd); - } - else { /* if (res < fd_data[ready_fd].remain) */ - fd_data[ready_fd].cpos += res; - fd_data[ready_fd].remain -= res; - } - } - else if (fd_data[ready_fd].remain == 0) { /* clean fd */ - byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF, - ERTS_SYS_READ_BUF_SZ); - /* We make one read attempt and see what happens */ - res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ); - if (res < 0) { - if ((errno != EINTR) && (errno != ERRNO_BLOCK)) - port_inp_failure(port_num, ready_fd, res); - } - else if (res == 0) { /* eof */ - port_inp_failure(port_num, ready_fd, res); - } - else if (res < packet_bytes - fd_data[ready_fd].psz) { - memcpy(fd_data[ready_fd].pbuf+fd_data[ready_fd].psz, - read_buf, res); - fd_data[ready_fd].psz += res; - } - else { /* if (res >= packet_bytes) */ - unsigned char* cpos = read_buf; - int bytes_left = res; - - while (1) { - int psz = fd_data[ready_fd].psz; - char* pbp = fd_data[ready_fd].pbuf + psz; - - while(bytes_left && (psz < packet_bytes)) { - *pbp++ = *cpos++; - bytes_left--; - psz++; - } - - if (psz < packet_bytes) { - fd_data[ready_fd].psz = psz; - break; - } - fd_data[ready_fd].psz = 0; - - switch (packet_bytes) { - case 1: h = get_int8(fd_data[ready_fd].pbuf); break; - case 2: h = get_int16(fd_data[ready_fd].pbuf); break; - case 4: h = get_int32(fd_data[ready_fd].pbuf); break; - default: ASSERT(0); return; /* -1; */ - } - - if (h <= (bytes_left)) { - driver_output(port_num, (char*) cpos, h); - cpos += h; - bytes_left -= h; - continue; - } - else { /* The last message we got was split */ - char *buf = erts_alloc_fnf(ERTS_ALC_T_FD_ENTRY_BUF, h); - if (!buf) { - errno = ENOMEM; - port_inp_failure(port_num, ready_fd, -1); - } - else { - erts_smp_atomic_add_nob(&sys_misc_mem_sz, h); - sys_memcpy(buf, cpos, bytes_left); - fd_data[ready_fd].buf = buf; - fd_data[ready_fd].sz = h; - fd_data[ready_fd].remain = h - bytes_left; - fd_data[ready_fd].cpos = buf + bytes_left; - } - break; - } - } - } - erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf); - } -} - - -/* fd is the drv_data that is returned from the */ -/* initial start routine */ -/* ready_fd is the descriptor that is ready to read */ - -static void ready_output(ErlDrvData e, ErlDrvEvent ready_fd) -{ - int fd = (int)(long)e; - ErlDrvPort ix = driver_data[fd].port_num; - int n; - struct iovec* iv; - int vsize; - - - if ((iv = (struct iovec*) driver_peekq(ix, &vsize)) == NULL) { - driver_select(ix, ready_fd, ERL_DRV_WRITE, 0); - if (driver_data[fd].terminating) - driver_failure_atom(driver_data[fd].port_num,"normal"); - return; /* 0; */ - } - vsize = vsize > MAX_VSIZE ? MAX_VSIZE : vsize; - if ((n = writev(ready_fd, iv, vsize)) > 0) { - if (driver_deq(ix, n) == 0) - set_busy_port(ix, 0); - } - else if (n < 0) { - if (errno == ERRNO_BLOCK || errno == EINTR) - return; /* 0; */ - else { - int res = errno; - driver_select(ix, ready_fd, ERL_DRV_WRITE, 0); - driver_failure_posix(ix, res); - return; /* -1; */ - } - } - return; /* 0; */ -} - -static void stop_select(ErlDrvEvent fd, void* _) -{ - close((int)fd); -} - -#if FDBLOCK - -static void -fd_async(void *async_data) -{ - int res; - struct driver_data *dd = (struct driver_data*)async_data; - SysIOVec *iov0; - SysIOVec *iov; - int iovlen; - int err = 0; - /* much of this code is stolen from efile_drv:invoke_writev */ - driver_pdl_lock(dd->blocking->pdl); - iov0 = driver_peekq(dd->port_num, &iovlen); - iovlen = iovlen < MAXIOV ? iovlen : MAXIOV; - iov = erts_alloc_fnf(ERTS_ALC_T_SYS_WRITE_BUF, - sizeof(SysIOVec)*iovlen); - if (!iov) { - res = -1; - err = ENOMEM; - driver_pdl_unlock(dd->blocking->pdl); - } else { - memcpy(iov,iov0,iovlen*sizeof(SysIOVec)); - driver_pdl_unlock(dd->blocking->pdl); - - do { - res = writev(dd->ofd, iov, iovlen); - } while (res < 0 && errno == EINTR); - if (res < 0) - err = errno; - - erts_free(ERTS_ALC_T_SYS_WRITE_BUF, iov); - } - dd->blocking->res = res; - dd->blocking->err = err; -} - -void fd_ready_async(ErlDrvData drv_data, - ErlDrvThreadData thread_data) { - struct driver_data *dd = (struct driver_data *)thread_data; - ErlDrvPort port_num = dd->port_num; - - ASSERT(dd->blocking); - ASSERT(dd == (driver_data + (int)(long)drv_data)); - - if (dd->blocking->res > 0) { - driver_pdl_lock(dd->blocking->pdl); - if (driver_deq(port_num, dd->blocking->res) == 0) { - driver_pdl_unlock(dd->blocking->pdl); - set_busy_port(port_num, 0); - if (dd->terminating) { - /* The port is has been ordered to terminate - from either fd_flush or port_inp_failure */ - if (dd->terminating == 1) - driver_failure_atom(port_num, "normal"); - else if (dd->terminating == 2) - driver_failure_eof(port_num); - else if (dd->terminating < 0) - driver_failure_posix(port_num, -dd->terminating); - return; /* -1; */ - } - } else { - driver_pdl_unlock(dd->blocking->pdl); - /* still data left to write in queue */ - driver_async(port_num, &dd->blocking->pkey, fd_async, dd, NULL); - return /* 0; */; - } - } else if (dd->blocking->res < 0) { - if (dd->blocking->err == ERRNO_BLOCK) { - set_busy_port(port_num, 1); - /* still data left to write in queue */ - driver_async(port_num, &dd->blocking->pkey, fd_async, dd, NULL); - } else - driver_failure_posix(port_num, dd->blocking->err); - return; /* -1; */ - } - return; /* 0; */ -} - -#endif - void erts_do_break_handling(void) { struct termios temp_mode; @@ -2738,10 +1011,6 @@ erts_sys_unsetenv(char *key) void sys_init_io(void) { - fd_data = (struct fd_data *) - erts_alloc(ERTS_ALC_T_FD_TAB, max_files * sizeof(struct fd_data)); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, - max_files * sizeof(struct fd_data)); } #if (0) /* unused? */ @@ -2935,179 +1204,6 @@ erl_debug(char* fmt, ...) #endif /* DEBUG */ -static ERTS_INLINE void -report_exit_status(ErtsSysReportExit *rep, int status) -{ - Port *pp; -#ifdef ERTS_SMP - CHLD_STAT_UNLOCK; - pp = erts_thr_id2port_sflgs(rep->port, - ERTS_PORT_SFLGS_INVALID_DRIVER_LOOKUP); - CHLD_STAT_LOCK; -#else - pp = erts_id2port_sflgs(rep->port, - NULL, - 0, - ERTS_PORT_SFLGS_INVALID_DRIVER_LOOKUP); -#endif - if (pp) { - if (rep->ifd >= 0) { - driver_data[rep->ifd].alive = 0; - driver_data[rep->ifd].status = status; - (void) driver_select(ERTS_Port2ErlDrvPort(pp), - rep->ifd, - (ERL_DRV_READ|ERL_DRV_USE), - 1); - } - if (rep->ofd >= 0) { - driver_data[rep->ofd].alive = 0; - driver_data[rep->ofd].status = status; - (void) driver_select(ERTS_Port2ErlDrvPort(pp), - rep->ofd, - (ERL_DRV_WRITE|ERL_DRV_USE), - 1); - } -#ifdef ERTS_SMP - erts_thr_port_release(pp); -#else - erts_port_release(pp); -#endif - } - erts_free(ERTS_ALC_T_PRT_REP_EXIT, rep); -} - -#if !CHLDWTHR /* ---------------------------------------------------------- */ - -#define ERTS_REPORT_EXIT_STATUS report_exit_status - -static int check_children(void) -{ - int res = 0; - int pid; - int status; - -#ifndef ERTS_SMP - if (children_died) -#endif - { - sys_sigblock(SIGCHLD); - CHLD_STAT_LOCK; - while ((pid = waitpid(-1, &status, WNOHANG)) > 0) - note_child_death(pid, status); -#ifndef ERTS_SMP - children_died = 0; -#endif - CHLD_STAT_UNLOCK; - sys_sigrelease(SIGCHLD); - res = 1; - } - return res; -} - -#ifdef ERTS_SMP - -void -erts_check_children(void) -{ - (void) check_children(); -} - -#endif - -#elif CHLDWTHR && defined(ERTS_SMP) /* ------------------------------------- */ - -#define ERTS_REPORT_EXIT_STATUS report_exit_status - -#define check_children() (0) - - -#else /* CHLDWTHR && !defined(ERTS_SMP) ------------------------------------ */ - -#define ERTS_REPORT_EXIT_STATUS initiate_report_exit_status - -static ERTS_INLINE void -initiate_report_exit_status(ErtsSysReportExit *rep, int status) -{ - rep->next = report_exit_transit_list; - rep->status = status; - report_exit_transit_list = rep; - erts_sys_schedule_interrupt(1); -} - -static int check_children(void) -{ - int res; - ErtsSysReportExit *rep; - CHLD_STAT_LOCK; - rep = report_exit_transit_list; - res = rep != NULL; - while (rep) { - ErtsSysReportExit *curr_rep = rep; - rep = rep->next; - report_exit_status(curr_rep, curr_rep->status); - } - report_exit_transit_list = NULL; - CHLD_STAT_UNLOCK; - return res; -} - -#endif /* ------------------------------------------------------------------ */ - -static void note_child_death(int pid, int status) -{ - ErtsSysReportExit **repp = &report_exit_list; - ErtsSysReportExit *rep = report_exit_list; - - while (rep) { - if (pid == rep->pid) { - *repp = rep->next; - ERTS_REPORT_EXIT_STATUS(rep, status); - break; - } - repp = &rep->next; - rep = rep->next; - } -} - -#if CHLDWTHR - -static void * -child_waiter(void *unused) -{ - int pid; - int status; - -#ifdef ERTS_ENABLE_LOCK_CHECK - erts_lc_set_thread_name("child waiter"); -#endif - - while(1) { -#ifdef DEBUG - int waitpid_errno; -#endif - pid = waitpid(-1, &status, 0); -#ifdef DEBUG - waitpid_errno = errno; -#endif - CHLD_STAT_LOCK; - if (pid < 0) { - ASSERT(waitpid_errno == ECHILD); - } - else { - children_alive--; - ASSERT(children_alive >= 0); - note_child_death(pid, status); - } - while (!children_alive) - CHLD_STAT_WAIT; /* Wait for children to wait on... :) */ - CHLD_STAT_UNLOCK; - } - - return NULL; -} - -#endif - /* * Called from schedule() when it runs out of runnable processes, * or when Erlang code has performed INPUT_REDUCTIONS reduction @@ -3116,13 +1212,8 @@ child_waiter(void *unused) void erl_sys_schedule(int runnable) { -#ifdef ERTS_SMP ERTS_CHK_IO(!runnable); -#else - ERTS_CHK_IO(runnable ? 0 : !check_children()); -#endif ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking()); - (void) check_children(); } @@ -3150,10 +1241,6 @@ smp_sig_notify(char c) static void * signal_dispatcher_thread_func(void *unused) { -#if !CHLDWTHR - int initialized = 0; - int notify_check_children = 0; -#endif #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_set_thread_name("signal_dispatcher"); #endif @@ -3191,19 +1278,7 @@ signal_dispatcher_thread_func(void *unused) */ switch (buf[i]) { case 0: /* Emulator initialized */ -#if !CHLDWTHR - initialized = 1; - if (!notify_check_children) -#endif - break; -#if !CHLDWTHR - case 'C': /* SIGCHLD */ - if (initialized) - erts_smp_notify_check_children_needed(); - else - notify_check_children = 1; - break; -#endif + break; case 'I': /* SIGINT */ break_requested(); break; diff --git a/erts/emulator/sys/unix/sys_drivers.c b/erts/emulator/sys/unix/sys_drivers.c new file mode 100644 index 0000000000..2a7cd91265 --- /dev/null +++ b/erts/emulator/sys/unix/sys_drivers.c @@ -0,0 +1,1862 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 1996-2014. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef ISC32 +#define _POSIX_SOURCE +#define _XOPEN_SOURCE +#endif + +#include <sys/times.h> /* ! */ +#include <time.h> +#include <signal.h> +#include <sys/wait.h> +#include <sys/uio.h> +#include <termios.h> +#include <ctype.h> +#include <sys/utsname.h> +#include <sys/select.h> +#include <arpa/inet.h> + +#ifdef ISC32 +#include <sys/bsdtypes.h> +#endif + +#include <termios.h> +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef HAVE_SYS_IOCTL_H +#include <sys/ioctl.h> +#endif + +#define WANT_NONBLOCKING /* must define this to pull in defs from sys.h */ +#include "sys.h" + +#ifdef USE_THREADS +#include "erl_threads.h" +#endif + +extern char **environ; +extern erts_smp_rwmtx_t environ_rwmtx; + +extern erts_smp_atomic_t sys_misc_mem_sz; + +static Eterm forker_port; + +#define MAX_VSIZE 16 /* Max number of entries allowed in an I/O + * vector sock_sendv(). + */ +/* + * Don't need global.h, but erl_cpu_topology.h won't compile otherwise + */ +#include "global.h" +#include "erl_cpu_topology.h" + +#include "erl_sys_driver.h" +#include "sys_uds.h" + +#include "erl_child_setup.h" + +#if defined IOV_MAX +#define MAXIOV IOV_MAX +#elif defined UIO_MAXIOV +#define MAXIOV UIO_MAXIOV +#else +#define MAXIOV 16 +#endif + +#ifdef USE_THREADS +# define FDBLOCK 1 +#else +# define FDBLOCK 0 +#endif + +/* Used by the fd driver iff the fd could not be set to non-blocking */ +typedef struct ErtsSysBlocking_ { + ErlDrvPDL pdl; + int res; + int err; + unsigned int pkey; +} ErtsSysBlocking; + +typedef struct fd_data { + int fd; + char pbuf[4]; /* hold partial packet bytes */ + int psz; /* size of pbuf */ + char *buf; + char *cpos; + int sz; + int remain; /* for input on fd */ +} ErtsSysFdData; + +typedef struct driver_data { + ErlDrvPort port_num; + ErtsSysFdData *ofd; + ErtsSysFdData *ifd; + int packet_bytes; + int pid; + int alive; + int status; + int terminating; + ErtsSysBlocking *blocking; +} ErtsSysDriverData; + +#define DIR_SEPARATOR_CHAR '/' + +#if defined(__ANDROID__) +#define SHELL "/system/bin/sh" +#else +#define SHELL "/bin/sh" +#endif /* __ANDROID__ */ + +#if defined(DEBUG) +#define ERL_BUILD_TYPE_MARKER ".debug" +#elif defined(PURIFY) +#define ERL_BUILD_TYPE_MARKER ".purify" +#elif defined(QUANTIFY) +#define ERL_BUILD_TYPE_MARKER ".quantify" +#elif defined(PURECOV) +#define ERL_BUILD_TYPE_MARKER ".purecov" +#elif defined(VALGRIND) +#define ERL_BUILD_TYPE_MARKER ".valgrind" +#else /* opt */ +#define ERL_BUILD_TYPE_MARKER +#endif + +#ifdef DEBUG +#define close(fd) do { int res = close(fd); ASSERT(res > -1); } while(0) +#endif + +#define CHILD_SETUP_PROG_NAME "erl_child_setup" ERL_BUILD_TYPE_MARKER + +// #define HARD_DEBUG +#ifdef HARD_DEBUG +#define driver_select(port_num, fd, flags, onoff) \ + do { \ + if (((flags) & ERL_DRV_READ) && onoff) \ + fprintf(stderr,"%010d %p: read select %d\r\n", __LINE__, port_num, (int)fd); \ + if (((flags) & ERL_DRV_WRITE) && onoff) \ + fprintf(stderr,"%010d %p: writ select %d\r\n", __LINE__, port_num, (int)fd); \ + if (((flags) & ERL_DRV_READ) && !onoff) \ + fprintf(stderr,"%010d %p: read unsele %d\r\n", __LINE__, port_num, (int)fd); \ + if (((flags) & ERL_DRV_WRITE) && !onoff) \ + fprintf(stderr,"%010d %p: writ unsele %d\r\n", __LINE__, port_num, (int)fd); \ + driver_select_nkp(port_num, fd, flags, onoff); \ + } while(0) +#endif + +/* + * Decreasing the size of it below 16384 is not allowed. + */ + +#define ERTS_SYS_READ_BUF_SZ (64*1024) + +/* I. Initialization */ + +void +erl_sys_late_init(void) +{ + SysDriverOpts opts; +#ifdef ERTS_SMP + Port *port; +#endif + + sys_signal(SIGPIPE, SIG_IGN); /* Ignore - we'll handle the write failure */ + + opts.packet_bytes = 0; + opts.use_stdio = 1; + opts.redir_stderr = 0; + opts.read_write = 0; + opts.hide_window = 0; + opts.wd = NULL; + opts.envir = NULL; + opts.exit_status = 0; + opts.overlapped_io = 0; + opts.spawn_type = ERTS_SPAWN_ANY; + opts.argv = NULL; + opts.parallelism = erts_port_parallelism; + +#ifdef ERTS_SMP + port = +#endif + erts_open_driver(&forker_driver, make_internal_pid(0), "forker", &opts, NULL, NULL); +#ifdef ERTS_SMP + erts_mtx_unlock(port->lock); +#endif +} + +/* II. Prototypes */ + +/* II.I Spawn prototypes */ +static ErlDrvData spawn_start(ErlDrvPort, char*, SysDriverOpts*); +static ErlDrvSSizeT spawn_control(ErlDrvData, unsigned int, char *, + ErlDrvSizeT, char **, ErlDrvSizeT); + +/* II.II Vanilla prototypes */ +static ErlDrvData vanilla_start(ErlDrvPort, char*, SysDriverOpts*); + + +/* II.III FD prototypes */ +static ErlDrvData fd_start(ErlDrvPort, char*, SysDriverOpts*); +#if FDBLOCK +static void fd_async(void *); +static void fd_ready_async(ErlDrvData drv_data, ErlDrvThreadData thread_data); +#endif +static ErlDrvSSizeT fd_control(ErlDrvData, unsigned int, char *, ErlDrvSizeT, + char **, ErlDrvSizeT); +static void fd_stop(ErlDrvData); +static void fd_flush(ErlDrvData); + +/* II.IV Common prototypes */ +static void stop(ErlDrvData); +static void ready_input(ErlDrvData, ErlDrvEvent); +static void ready_output(ErlDrvData, ErlDrvEvent); +static void output(ErlDrvData, char*, ErlDrvSizeT); +static void outputv(ErlDrvData, ErlIOVec*); +static void stop_select(ErlDrvEvent, void*); + +/* II.V Forker prototypes */ +static ErlDrvData forker_start(ErlDrvPort, char*, SysDriverOpts*); +static void forker_stop(ErlDrvData); +static void forker_ready_input(ErlDrvData, ErlDrvEvent); +static void forker_ready_output(ErlDrvData, ErlDrvEvent); +static ErlDrvSSizeT forker_control(ErlDrvData, unsigned int, char *, + ErlDrvSizeT, char **, ErlDrvSizeT); + +/* III Driver entries */ + +/* III.I The spawn driver */ +struct erl_drv_entry spawn_driver_entry = { + NULL, + spawn_start, + stop, + output, + ready_input, + ready_output, + "spawn", + NULL, + NULL, + spawn_control, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + ERL_DRV_FLAG_USE_PORT_LOCKING | ERL_DRV_FLAG_USE_INIT_ACK, + NULL, NULL, + stop_select +}; + +/* III.II The fd driver */ +struct erl_drv_entry fd_driver_entry = { + NULL, + fd_start, + fd_stop, + output, + ready_input, + ready_output, + "fd", + NULL, + NULL, + fd_control, + NULL, + outputv, +#if FDBLOCK + fd_ready_async, /* ready_async */ +#else + NULL, +#endif + fd_flush, /* flush */ + NULL, /* call */ + NULL, /* event */ + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + 0, /* ERL_DRV_FLAGs */ + NULL, /* handle2 */ + NULL, /* process_exit */ + stop_select +}; + +/* III.III The vanilla driver */ +struct erl_drv_entry vanilla_driver_entry = { + NULL, + vanilla_start, + stop, + output, + ready_input, + ready_output, + "vanilla", + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, /* flush */ + NULL, /* call */ + NULL, /* event */ + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + 0, /* ERL_DRV_FLAGs */ + NULL, /* handle2 */ + NULL, /* process_exit */ + stop_select +}; + +/* III.III The forker driver */ +struct erl_drv_entry forker_driver_entry = { + NULL, + forker_start, + forker_stop, + NULL, + forker_ready_input, + forker_ready_output, + "spawn_forker", + NULL, + NULL, + forker_control, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + 0, + NULL, NULL, + stop_select +}; + +/* Untility functions */ + +static int set_blocking_data(ErtsSysDriverData *dd) { + + dd->blocking = erts_alloc(ERTS_ALC_T_SYS_BLOCKING, sizeof(ErtsSysBlocking)); + + erts_smp_atomic_add_nob(&sys_misc_mem_sz, sizeof(ErtsSysBlocking)); + + dd->blocking->pdl = driver_pdl_create(dd->port_num); + dd->blocking->res = 0; + dd->blocking->err = 0; + dd->blocking->pkey = driver_async_port_key(dd->port_num); + + return 1; +} + +static void init_fd_data(ErtsSysFdData *fd_data, int fd) +{ + fd_data->fd = fd; + fd_data->buf = NULL; + fd_data->cpos = NULL; + fd_data->remain = 0; + fd_data->sz = 0; + fd_data->psz = 0; +} + +static ErtsSysDriverData * +create_driver_data(ErlDrvPort port_num, + int ifd, + int ofd, + int packet_bytes, + int read_write, + int exit_status, + int pid, + int is_blocking) +{ + Port *prt; + ErtsSysDriverData *driver_data; + char *data; + int size = sizeof(ErtsSysDriverData); + + if (read_write & DO_READ) + size += sizeof(ErtsSysFdData); + + if ((read_write & DO_WRITE) && + ((ifd != ofd || ofd == -1) || !(read_write & DO_READ))) + size += sizeof(ErtsSysFdData); + + data = erts_alloc(ERTS_ALC_T_DRV_TAB,size); + erts_smp_atomic_add_nob(&sys_misc_mem_sz, size); + + driver_data = (ErtsSysDriverData*)data; + data += sizeof(*driver_data); + + prt = erts_drvport2port(port_num); + if (prt != ERTS_INVALID_ERL_DRV_PORT) + prt->os_pid = pid; + + driver_data->packet_bytes = packet_bytes; + driver_data->port_num = port_num; + driver_data->pid = pid; + driver_data->alive = exit_status ? 1 : 0; + driver_data->status = 0; + driver_data->terminating = 0; + driver_data->blocking = NULL; + + if (read_write & DO_READ) { + driver_data->ifd = (ErtsSysFdData*)data; + data += sizeof(*driver_data->ifd); + init_fd_data(driver_data->ifd, ifd); + driver_select(port_num, ifd, (ERL_DRV_READ|ERL_DRV_USE), 1); + } else { + driver_data->ifd = NULL; + } + + if (read_write & DO_WRITE) { + if (ofd != -1 && ifd == ofd && read_write & DO_READ) { + /* This is for when ifd and ofd are the same fd */ + driver_data->ofd = driver_data->ifd; + } else { + driver_data->ofd = (ErtsSysFdData*)data; + data += sizeof(*driver_data->ofd); + init_fd_data(driver_data->ofd, ofd); + } + if (is_blocking && FDBLOCK) + if (!set_blocking_data(driver_data)) { + erts_free(ERTS_ALC_T_DRV_TAB, driver_data); + return NULL; + } + } else { + driver_data->ofd = NULL; + } + + return driver_data; +} + +/* Spawn driver */ + +static void close_pipes(int ifd[2], int ofd[2]) +{ + close(ifd[0]); + close(ifd[1]); + close(ofd[0]); + close(ofd[1]); +} + +static char **build_unix_environment(char *block) +{ + int i; + int j; + int len; + char *cp; + char **cpp; + char** old_env; + + ERTS_SMP_LC_ASSERT(erts_smp_lc_rwmtx_is_rlocked(&environ_rwmtx)); + + cp = block; + len = 0; + while (*cp != '\0') { + cp += strlen(cp) + 1; + len++; + } + old_env = environ; + while (*old_env++ != NULL) { + len++; + } + + cpp = (char **) erts_alloc_fnf(ERTS_ALC_T_ENVIRONMENT, + sizeof(char *) * (len+1)); + if (cpp == NULL) { + return NULL; + } + + cp = block; + len = 0; + while (*cp != '\0') { + cpp[len] = cp; + cp += strlen(cp) + 1; + len++; + } + + i = len; + for (old_env = environ; *old_env; old_env++) { + char* old = *old_env; + + for (j = 0; j < len; j++) { + char *s, *t; + + /* check if cpp[j] equals old + before the = sign, + i.e. + "TMPDIR=/tmp/" */ + s = cpp[j]; + t = old; + while (*s == *t && *s != '=') { + s++, t++; + } + if (*s == '=' && *t == '=') { + break; + } + } + + if (j == len) { /* New version not found */ + cpp[len++] = old; + } + } + + for (j = 0; j < i; ) { + size_t last = strlen(cpp[j])-1; + if (cpp[j][last] == '=' && strchr(cpp[j], '=') == cpp[j]+last) { + cpp[j] = cpp[--len]; + if (len < i) { + i--; + } else { + j++; + } + } + else { + j++; + } + } + + cpp[len] = NULL; + return cpp; +} + +static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, + SysDriverOpts* opts) +{ +#define CMD_LINE_PREFIX_STR "exec " +#define CMD_LINE_PREFIX_STR_SZ (sizeof(CMD_LINE_PREFIX_STR) - 1) + + int len; + char **new_environ; + ErtsSysDriverData *dd; + char *cmd_line; + char wd_buff[MAXPATHLEN+1]; + char *wd; + int ifd[2], ofd[2], stderrfd; + + if (pipe(ifd) < 0) return ERL_DRV_ERROR_ERRNO; + errno = EMFILE; /* default for next three conditions */ + if (ifd[0] >= sys_max_files() || pipe(ofd) < 0) { + close(ifd[0]); + close(ifd[1]); + return ERL_DRV_ERROR_ERRNO; + } + if (ofd[1] >= sys_max_files()) { + close_pipes(ifd, ofd); + errno = EMFILE; + return ERL_DRV_ERROR_ERRNO; + } + + SET_NONBLOCKING(ifd[0]); + SET_NONBLOCKING(ofd[1]); + + stderrfd = opts->redir_stderr ? ifd[1] : dup(2); + + if (stderrfd >= sys_max_files() || stderrfd < 0) { + close_pipes(ifd, ofd); + if (stderrfd > -1) + close(stderrfd); + return ERL_DRV_ERROR_ERRNO; + } + + if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) { + /* started with spawn_executable, not with spawn */ + len = strlen(name); + cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP, len + 1); + if (!cmd_line) { + close_pipes(ifd, ofd); + errno = ENOMEM; + return ERL_DRV_ERROR_ERRNO; + } + memcpy((void *) cmd_line,(void *) name, len); + cmd_line[len] = '\0'; + len = len + 1; + if (access(cmd_line,X_OK) != 0) { + int save_errno = errno; + erts_free(ERTS_ALC_T_TMP, cmd_line); + close_pipes(ifd, ofd); + errno = save_errno; + return ERL_DRV_ERROR_ERRNO; + } + } else { + /* make the string suitable for giving to "sh" */ + len = strlen(name); + cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP, + CMD_LINE_PREFIX_STR_SZ + len + 1); + if (!cmd_line) { + close_pipes(ifd, ofd); + errno = ENOMEM; + return ERL_DRV_ERROR_ERRNO; + } + memcpy((void *) cmd_line, + (void *) CMD_LINE_PREFIX_STR, + CMD_LINE_PREFIX_STR_SZ); + memcpy((void *) (cmd_line + CMD_LINE_PREFIX_STR_SZ), (void *) name, len); + cmd_line[CMD_LINE_PREFIX_STR_SZ + len] = '\0'; + len = CMD_LINE_PREFIX_STR_SZ + len + 1; + } + + erts_smp_rwmtx_rlock(&environ_rwmtx); + + if (opts->envir == NULL) { + new_environ = environ; + } else if ((new_environ = build_unix_environment(opts->envir)) == NULL) { + erts_smp_rwmtx_runlock(&environ_rwmtx); + close_pipes(ifd, ofd); + erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); + errno = ENOMEM; + return ERL_DRV_ERROR_ERRNO; + } + + if (opts->wd == NULL) { + if ((wd = getcwd(wd_buff, MAXPATHLEN+1)) == NULL) { + /* on some OSs this call opens a fd in the + background which means that this can + return EMFILE */ + int err = errno; + close_pipes(ifd, ofd); + erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); + if (new_environ != environ) + erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ); + erts_smp_rwmtx_runlock(&environ_rwmtx); + errno = err; + return ERL_DRV_ERROR_ERRNO; + } + } else { + wd = opts->wd; + } + + { + struct iovec *io_vector; + int iov_len = 5; + char nullbuff[] = "\0"; + int j, i = 0, res; + Sint32 buffsz = 0, env_len = 0, argv_len = 0, + flags = (opts->use_stdio ? FORKER_FLAG_USE_STDIO : 0) + | (opts->exit_status ? FORKER_FLAG_EXIT_STATUS : 0) + | (opts->read_write & DO_READ ? FORKER_FLAG_DO_READ : 0) + | (opts->read_write & DO_WRITE ? FORKER_FLAG_DO_WRITE : 0); + + /* count number of elements in environment */ + while(new_environ[env_len] != NULL) + env_len++; + iov_len += 1 + env_len; /* num envs including size int */ + + /* count number of element in argument list */ + if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) { + if (opts->argv != NULL) { + while(opts->argv[argv_len] != NULL) + argv_len++; + } else { + argv_len++; + } + iov_len += 1 + argv_len; /* num argvs including size int */ + } + + io_vector = erts_alloc_fnf(ERTS_ALC_T_TMP, sizeof(struct iovec) * iov_len); + + if (!io_vector) { + close_pipes(ifd, ofd); + erts_smp_rwmtx_runlock(&environ_rwmtx); + erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); + if (new_environ != environ) + erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ); + errno = ENOMEM; + return ERL_DRV_ERROR_ERRNO; + } + + io_vector[i].iov_base = (void*)&buffsz; + io_vector[i++].iov_len = sizeof(buffsz); + + io_vector[i].iov_base = (void*)&flags; + flags = htonl(flags); + io_vector[i++].iov_len = sizeof(flags); + buffsz += sizeof(flags); + + io_vector[i].iov_base = cmd_line; + io_vector[i++].iov_len = len; + buffsz += len; + + io_vector[i].iov_base = wd; + io_vector[i].iov_len = strlen(io_vector[i].iov_base) + 1; + buffsz += io_vector[i++].iov_len; + + io_vector[i].iov_base = nullbuff; + io_vector[i++].iov_len = 1; + buffsz += io_vector[i-1].iov_len; + + io_vector[i].iov_base = (void*)&env_len; + env_len = htonl(env_len); + io_vector[i++].iov_len = sizeof(env_len); + buffsz += io_vector[i-1].iov_len; + + for (j = 0; new_environ[j] != NULL; j++) { + io_vector[i].iov_base = new_environ[j]; + io_vector[i++].iov_len = strlen(new_environ[j]) + 1; + buffsz += io_vector[i-1].iov_len; + } + + /* only append arguments if this was a spawn_executable */ + if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) { + + io_vector[i].iov_base = (void*)&argv_len; + argv_len = htonl(argv_len); + io_vector[i++].iov_len = sizeof(argv_len); + buffsz += io_vector[i-1].iov_len; + + if (opts->argv) { + /* If there are arguments we copy in the references to + them into the iov */ + for (j = 0; opts->argv[j]; j++) { + if (opts->argv[j] == erts_default_arg0) + io_vector[i].iov_base = cmd_line; + else + io_vector[i].iov_base = opts->argv[j]; + io_vector[i].iov_len = strlen(io_vector[i].iov_base) + 1; + buffsz += io_vector[i++].iov_len; + } + } else { + io_vector[i].iov_base = cmd_line; + io_vector[i].iov_len = strlen(io_vector[i].iov_base) + 1; + buffsz += io_vector[i++].iov_len; + } + } + + /* we send the request to do the fork */ + if ((res = writev(ofd[1], io_vector, iov_len > MAXIOV ? MAXIOV : iov_len)) < 0) { + if (errno == ERRNO_BLOCK) { + res = 0; + } else { + int err = errno; + close_pipes(ifd, ofd); + erts_free(ERTS_ALC_T_TMP, io_vector); + if (new_environ != environ) + erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ); + erts_smp_rwmtx_runlock(&environ_rwmtx); + erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); + errno = err; + return ERL_DRV_ERROR_ERRNO; + } + } + + if (res < buffsz) { + /* we only wrote part of the command payload. Enqueue the rest. */ + for (i = 0; i < iov_len; i++) { + driver_enq(port_num, io_vector[i].iov_base, io_vector[i].iov_len); + } + driver_deq(port_num, res); + driver_select(port_num, ofd[1], ERL_DRV_WRITE|ERL_DRV_USE, 1); + } + + erts_free(ERTS_ALC_T_TMP, io_vector); + } + + erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); + + if (new_environ != environ) + erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ); + + erts_smp_rwmtx_runlock(&environ_rwmtx); + + dd = create_driver_data(port_num, ifd[0], ofd[1], opts->packet_bytes, + DO_WRITE | DO_READ, opts->exit_status, + 0, 0); + + { + /* send ofd[0] + ifd[1] + stderrfd to forker port */ + ErtsSysForkerProto *proto = + erts_alloc(ERTS_ALC_T_DRV_CTRL_DATA, + sizeof(ErtsSysForkerProto)); + memset(proto, 0, sizeof(ErtsSysForkerProto)); + proto->action = ErtsSysForkerProtoAction_Start; + proto->u.start.fds[0] = ofd[0]; + proto->u.start.fds[1] = ifd[1]; + proto->u.start.fds[2] = stderrfd; + proto->u.start.port_id = opts->exit_status ? erts_drvport2id(port_num) : THE_NON_VALUE; + if (erl_drv_port_control(forker_port, 'S', (char*)proto, sizeof(*proto))) { + /* The forker port has been killed, we close both fd's which will + make open_port throw an epipe error */ + close(ofd[0]); + close(ifd[1]); + } + } + + /* we set these fds to negative to mark if + they should be closed after the handshake */ + if (!(opts->read_write & DO_READ)) + dd->ifd->fd *= -1; + + if (!(opts->read_write & DO_WRITE)) + dd->ofd->fd *= -1; + + return (ErlDrvData)dd; +#undef CMD_LINE_PREFIX_STR +#undef CMD_LINE_PREFIX_STR_SZ +} + +static ErlDrvSSizeT spawn_control(ErlDrvData e, unsigned int cmd, char *buf, + ErlDrvSizeT len, char **rbuf, ErlDrvSizeT rlen) +{ + ErtsSysDriverData *dd = (ErtsSysDriverData*)e; + ErtsSysForkerProto *proto = (ErtsSysForkerProto *)buf; + + ASSERT(len == sizeof(*proto)); + ASSERT(proto->action == ErtsSysForkerProtoAction_SigChld); + + dd->status = proto->u.sigchld.error_number; + dd->alive = -1; + + if (dd->ifd) + driver_select(dd->port_num, abs(dd->ifd->fd), ERL_DRV_READ | ERL_DRV_USE, 1); + + if (dd->ofd) + driver_select(dd->port_num, abs(dd->ofd->fd), ERL_DRV_WRITE | ERL_DRV_USE, 1); + + return 0; +} + +#define FD_DEF_HEIGHT 24 +#define FD_DEF_WIDTH 80 +/* Control op */ +#define FD_CTRL_OP_GET_WINSIZE 100 + +static int fd_get_window_size(int fd, Uint32 *width, Uint32 *height) +{ +#ifdef TIOCGWINSZ + struct winsize ws; + if (ioctl(fd,TIOCGWINSZ,&ws) == 0) { + *width = (Uint32) ws.ws_col; + *height = (Uint32) ws.ws_row; + return 0; + } +#endif + return -1; +} + +static ErlDrvSSizeT fd_control(ErlDrvData drv_data, + unsigned int command, + char *buf, ErlDrvSizeT len, + char **rbuf, ErlDrvSizeT rlen) +{ + int fd = (int)(long)drv_data; + char resbuff[2*sizeof(Uint32)]; + switch (command) { + case FD_CTRL_OP_GET_WINSIZE: + { + Uint32 w,h; + if (fd_get_window_size(fd,&w,&h)) + return 0; + memcpy(resbuff,&w,sizeof(Uint32)); + memcpy(resbuff+sizeof(Uint32),&h,sizeof(Uint32)); + } + break; + default: + return 0; + } + if (rlen < 2*sizeof(Uint32)) { + *rbuf = driver_alloc(2*sizeof(Uint32)); + } + memcpy(*rbuf,resbuff,2*sizeof(Uint32)); + return 2*sizeof(Uint32); +} + +static ErlDrvData fd_start(ErlDrvPort port_num, char* name, + SysDriverOpts* opts) +{ + int non_blocking = 0; + + if (((opts->read_write & DO_READ) && opts->ifd >= sys_max_files()) || + ((opts->read_write & DO_WRITE) && opts->ofd >= sys_max_files())) + return ERL_DRV_ERROR_GENERAL; + + /* + * Historical: + * + * "Note about nonblocking I/O. + * + * At least on Solaris, setting the write end of a TTY to nonblocking, + * will set the input end to nonblocking as well (and vice-versa). + * If erl is run in a pipeline like this: cat | erl + * the input end of the TTY will be the standard input of cat. + * And cat is not prepared to handle nonblocking I/O." + * + * Actually, the reason for this is not that the tty itself gets set + * in non-blocking mode, but that the "input end" (cat's stdin) and + * the "output end" (erlang's stdout) are typically the "same" file + * descriptor, dup()'ed from a single fd by one of this process' + * ancestors. + * + * The workaround for this problem used to be a rather bad kludge, + * interposing an extra process ("internal cat") between erlang's + * stdout and the original stdout, allowing erlang to set its stdout + * in non-blocking mode without affecting the stdin of the preceding + * process in the pipeline - and being a kludge, it caused all kinds + * of weird problems. + * + * So, this is the current logic: + * + * The only reason to set non-blocking mode on the output fd at all is + * if it's something that can cause a write() to block, of course, + * i.e. primarily if it points to a tty, socket, pipe, or fifo. + * + * If we don't set non-blocking mode when we "should" have, and output + * becomes blocked, the entire runtime system will be suspended - this + * is normally bad of course, and can happen fairly "easily" - e.g. user + * hits ^S on tty - but doesn't necessarily happen. + * + * If we do set non-blocking mode when we "shouldn't" have, the runtime + * system will end up seeing EOF on the input fd (due to the preceding + * process dying), which typically will cause the entire runtime system + * to terminate immediately (due to whatever erlang process is seeing + * the EOF taking it as a signal to halt the system). This is *very* bad. + * + * I.e. we should take a conservative approach, and only set non- + * blocking mode when we a) need to, and b) are reasonably certain + * that it won't be a problem. And as in the example above, the problem + * occurs when input fd and output fd point to different "things". + * + * However, determining that they are not just the same "type" of + * "thing", but actually the same instance of that type of thing, is + * unreasonably complex in many/most cases. + * + * Also, with pipes, sockets, and fifos it's far from obvious that the + * user *wants* non-blocking output: If you're running erlang inside + * some complex pipeline, you're probably not running a real-time system + * that must never stop, but rather *want* it to suspend if the output + * channel is "full". + * + * So, the bottom line: We will only set the output fd non-blocking if + * it points to a tty, and either a) the input fd also points to a tty, + * or b) we can make sure that setting the output fd non-blocking + * doesn't interfere with someone else's input, via a somewhat milder + * kludge than the above. + * + * Also keep in mind that while this code is almost exclusively run as + * a result of an erlang open_port({fd,0,1}, ...), that isn't the only + * case - it can be called with any old pre-existing file descriptors, + * the relations between which (if they're even two) we can only guess + * at - still, we try our best... + * + * Added note OTP 18: Some systems seem to use stdout/stderr to log data + * using unix pipes, so we cannot allow the system to block on a write. + * Therefore we use an async thread to write the data to fd's that could + * not be set to non-blocking. When no async threads are available we + * fall back on the old behaviour. + * + * Also the guarantee about what is delivered to the OS has changed. + * Pre 18 the fd driver did no flushing of data before terminating. + * Now it does. This is because we want to be able to guarantee that things + * such as escripts and friends really have outputted all data before + * terminating. This could potentially block the termination of the system + * for a very long time, but if the user wants to terminate fast she should + * use erlang:halt with flush=false. + */ + + /* Try to figure out if we can use non-blocking writes */ + if (opts->read_write & DO_WRITE) { + + /* If we don't have a read end, all bets are off - no non-blocking. */ + if (opts->read_write & DO_READ) { + + if (isatty(opts->ofd)) { /* output fd is a tty:-) */ + + if (isatty(opts->ifd)) { /* input fd is also a tty */ + + /* To really do this "right", we should also check that + input and output fd point to the *same* tty - but + this seems like overkill; ttyname() isn't for free, + and this is a very common case - and it's hard to + imagine a scenario where setting non-blocking mode + here would cause problems - go ahead and do it. */ + + non_blocking = 1; + SET_NONBLOCKING(opts->ofd); + + } else { /* output fd is a tty, input fd isn't */ + + /* This is a "problem case", but also common (see the + example above) - i.e. it makes sense to try a bit + harder before giving up on non-blocking mode: Try to + re-open the tty that the output fd points to, and if + successful replace the original one with the "new" fd + obtained this way, and set *that* one in non-blocking + mode. (Yes, this is a kludge.) + + However, re-opening the tty may fail in a couple of + (unusual) cases: + + 1) The name of the tty (or an equivalent one, i.e. + same major/minor number) can't be found, because + it actually lives somewhere other than /dev (or + wherever ttyname() looks for it), and isn't + equivalent to any of those that do live in the + "standard" place - this should be *very* unusual. + + 2) Permissions on the tty don't allow us to open it - + it's perfectly possible to have an fd open to an + object whose permissions wouldn't allow us to open + it. This is not as unusual as it sounds, one case + is if the user has su'ed to someone else (not + root) - we have a read/write fd open to the tty + (because it has been inherited all the way down + here), but we have neither read nor write + permission for the tty. + + In these cases, we finally give up, and don't set the + output fd in non-blocking mode. */ + + char *tty; + int nfd; + + if ((tty = ttyname(opts->ofd)) != NULL && + (nfd = open(tty, O_WRONLY)) != -1) { + dup2(nfd, opts->ofd); + close(nfd); + non_blocking = 1; + SET_NONBLOCKING(opts->ofd); + } + } + } + } + } + return (ErlDrvData)create_driver_data(port_num, opts->ifd, opts->ofd, + opts->packet_bytes, + opts->read_write, 0, -1, + !non_blocking); +} + +static void clear_fd_data(ErtsSysFdData *fdd) +{ + if (fdd->sz > 0) { + erts_free(ERTS_ALC_T_FD_ENTRY_BUF, (void *) fdd->buf); + ASSERT(erts_smp_atomic_read_nob(&sys_misc_mem_sz) >= fdd->sz); + erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*fdd->sz); + } + fdd->buf = NULL; + fdd->sz = 0; + fdd->remain = 0; + fdd->cpos = NULL; + fdd->psz = 0; +} + +static void nbio_stop_fd(ErlDrvPort prt, ErtsSysFdData *fdd) +{ + driver_select(prt, abs(fdd->fd), DO_READ|DO_WRITE, 0); + clear_fd_data(fdd); + SET_BLOCKING(abs(fdd->fd)); + +} + +static void fd_stop(ErlDrvData ev) /* Does not close the fds */ +{ + ErtsSysDriverData* dd = (ErtsSysDriverData*)ev; + ErlDrvPort prt = dd->port_num; + int sz = sizeof(ErtsSysDriverData); + +#if FDBLOCK + if (dd->blocking) { + erts_free(ERTS_ALC_T_SYS_BLOCKING, dd->blocking); + dd->blocking = NULL; + sz += sizeof(ErtsSysBlocking); + } +#endif + + if (dd->ifd) { + sz += sizeof(ErtsSysFdData); + nbio_stop_fd(prt, dd->ifd); + } + if (dd->ofd && dd->ofd != dd->ifd) { + sz += sizeof(ErtsSysFdData); + nbio_stop_fd(prt, dd->ofd); + } + + erts_free(ERTS_ALC_T_DRV_TAB, dd); + erts_smp_atomic_add_nob(&sys_misc_mem_sz, -sz); +} + +static void fd_flush(ErlDrvData ev) +{ + ErtsSysDriverData* dd = (ErtsSysDriverData*)ev; + if (!dd->terminating) + dd->terminating = 1; +} + +static ErlDrvData vanilla_start(ErlDrvPort port_num, char* name, + SysDriverOpts* opts) +{ + int flags, fd; + ErlDrvData res; + + flags = (opts->read_write == DO_READ ? O_RDONLY : + opts->read_write == DO_WRITE ? O_WRONLY|O_CREAT|O_TRUNC : + O_RDWR|O_CREAT); + if ((fd = open(name, flags, 0666)) < 0) + return ERL_DRV_ERROR_GENERAL; + if (fd >= sys_max_files()) { + close(fd); + return ERL_DRV_ERROR_GENERAL; + } + SET_NONBLOCKING(fd); + + res = (ErlDrvData)(long)create_driver_data(port_num, fd, fd, + opts->packet_bytes, + opts->read_write, 0, -1, 0); + return res; +} + +/* Note that driver_data[fd].ifd == fd if the port was opened for reading, */ +/* otherwise (i.e. write only) driver_data[fd].ofd = fd. */ + +static void stop(ErlDrvData ev) +{ + ErtsSysDriverData* dd = (ErtsSysDriverData*)ev; + ErlDrvPort prt = dd->port_num; + + if (dd->ifd) { + nbio_stop_fd(prt, dd->ifd); + driver_select(prt, abs(dd->ifd->fd), ERL_DRV_USE, 0); /* close(ifd); */ + } + + if (dd->ofd && dd->ofd != dd->ifd) { + nbio_stop_fd(prt, dd->ofd); + driver_select(prt, abs(dd->ofd->fd), ERL_DRV_USE, 0); /* close(ofd); */ + } + + erts_free(ERTS_ALC_T_DRV_TAB, dd); +} + +/* used by fd_driver */ +static void outputv(ErlDrvData e, ErlIOVec* ev) +{ + ErtsSysDriverData *dd = (ErtsSysDriverData*)e; + ErlDrvPort ix = dd->port_num; + int pb = dd->packet_bytes; + int ofd = dd->ofd ? dd->ofd->fd : -1; + ssize_t n; + ErlDrvSizeT sz; + char lb[4]; + char* lbp; + ErlDrvSizeT len = ev->size; + + /* (len > ((unsigned long)-1 >> (4-pb)*8)) */ + /* if (pb >= 0 && (len & (((ErlDrvSizeT)1 << (pb*8))) - 1) != len) {*/ + if (((pb == 2) && (len > 0xffff)) || (pb == 1 && len > 0xff)) { + driver_failure_posix(ix, EINVAL); + return; /* -1; */ + } + /* Handles 0 <= pb <= 4 only */ + put_int32((Uint32) len, lb); + lbp = lb + (4-pb); + + ev->iov[0].iov_base = lbp; + ev->iov[0].iov_len = pb; + ev->size += pb; + + if (dd->blocking && FDBLOCK) + driver_pdl_lock(dd->blocking->pdl); + + if ((sz = driver_sizeq(ix)) > 0) { + driver_enqv(ix, ev, 0); + + if (dd->blocking && FDBLOCK) + driver_pdl_unlock(dd->blocking->pdl); + + if (sz + ev->size >= (1 << 13)) + set_busy_port(ix, 1); + } + else if (!dd->blocking || !FDBLOCK) { + /* We try to write directly if the fd in non-blocking */ + int vsize = ev->vsize > MAX_VSIZE ? MAX_VSIZE : ev->vsize; + + n = writev(ofd, (const void *) (ev->iov), vsize); + if (n == ev->size) + return; /* 0;*/ + if (n < 0) { + if ((errno != EINTR) && (errno != ERRNO_BLOCK)) { + driver_failure_posix(ix, errno); + return; /* -1;*/ + } + n = 0; + } + driver_enqv(ix, ev, n); /* n is the skip value */ + driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1); + } +#if FDBLOCK + else { + if (ev->size != 0) { + driver_enqv(ix, ev, 0); + driver_pdl_unlock(dd->blocking->pdl); + driver_async(ix, &dd->blocking->pkey, + fd_async, dd, NULL); + } else { + driver_pdl_unlock(dd->blocking->pdl); + } + } +#endif + /* return 0;*/ +} + +/* Used by spawn_driver and vanilla driver */ +static void output(ErlDrvData e, char* buf, ErlDrvSizeT len) +{ + ErtsSysDriverData *dd = (ErtsSysDriverData*)e; + ErlDrvPort ix = dd->port_num; + int pb = dd->packet_bytes; + int ofd = dd->ofd ? dd->ofd->fd : -1; + ssize_t n; + ErlDrvSizeT sz; + char lb[4]; + char* lbp; + struct iovec iv[2]; + + /* (len > ((unsigned long)-1 >> (4-pb)*8)) */ + if (((pb == 2) && (len > 0xffff)) + || (pb == 1 && len > 0xff) + || dd->pid == 0 /* Attempt at output before port is ready */) { + driver_failure_posix(ix, EINVAL); + return; /* -1; */ + } + put_int32(len, lb); + lbp = lb + (4-pb); + + if ((sz = driver_sizeq(ix)) > 0) { + driver_enq(ix, lbp, pb); + driver_enq(ix, buf, len); + if (sz + len + pb >= (1 << 13)) + set_busy_port(ix, 1); + } + else { + iv[0].iov_base = lbp; + iv[0].iov_len = pb; /* should work for pb=0 */ + iv[1].iov_base = buf; + iv[1].iov_len = len; + n = writev(ofd, iv, 2); + if (n == pb+len) + return; /* 0; */ + if (n < 0) { + if ((errno != EINTR) && (errno != ERRNO_BLOCK)) { + driver_failure_posix(ix, errno); + return; /* -1; */ + } + n = 0; + } + if (n < pb) { + driver_enq(ix, lbp+n, pb-n); + driver_enq(ix, buf, len); + } + else { + n -= pb; + driver_enq(ix, buf+n, len-n); + } + driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1); + } + return; /* 0; */ +} + +static int port_inp_failure(ErtsSysDriverData *dd, int res) + /* Result: 0 (eof) or -1 (error) */ +{ + int err = errno; + + ASSERT(res <= 0); + if (dd->ifd) { + driver_select(dd->port_num, dd->ifd->fd, ERL_DRV_READ|ERL_DRV_WRITE, 0); + clear_fd_data(dd->ifd); + } + + if (dd->blocking && FDBLOCK) { + driver_pdl_lock(dd->blocking->pdl); + if (driver_sizeq(dd->port_num) > 0) { + driver_pdl_unlock(dd->blocking->pdl); + /* We have stuff in the output queue, so we just + set the state to terminating and wait for fd_async_ready + to terminate the port */ + if (res == 0) + dd->terminating = 2; + else + dd->terminating = -err; + return 0; + } + driver_pdl_unlock(dd->blocking->pdl); + } + + if (res == 0) { + if (dd->alive == 1) { + /* + * We have eof and want to report exit status, but the process + * hasn't exited yet. When it does ready_input will + * driver_select() this fd which will make sure that we get + * back here with dd->alive == -1 and dd->status set. + */ + return 0; + } + else if (dd->alive == -1) { + int status = dd->status; + + /* We need not be prepared for stopped/continued processes. */ + if (WIFSIGNALED(status)) + status = 128 + WTERMSIG(status); + else + status = WEXITSTATUS(status); + driver_report_exit(dd->port_num, status); + } + driver_failure_eof(dd->port_num); + } else if (dd->ifd) { + erl_drv_init_ack(dd->port_num, ERL_DRV_ERROR_ERRNO); + } else { + driver_failure_posix(dd->port_num, err); + } + return 0; +} + +/* fd is the drv_data that is returned from the */ +/* initial start routine */ +/* ready_fd is the descriptor that is ready to read */ + +static void ready_input(ErlDrvData e, ErlDrvEvent ready_fd) +{ + ErtsSysDriverData *dd = (ErtsSysDriverData*)e; + ErlDrvPort port_num; + int packet_bytes; + int res; + Uint h; + + port_num = dd->port_num; + packet_bytes = dd->packet_bytes; + + ASSERT(abs(dd->ifd->fd) == ready_fd); + + if (dd->pid == 0) { + /* the pid is sent from erl_child_setup. spawn driver only. */ + ErtsSysForkerProto proto; + int res; + + if((res = read(ready_fd, &proto, sizeof(proto))) <= 0) { + /* hmm, child setup seems to have closed the pipe too early... + we close the port as there is not much else we can do */ + if (res < 0 && errno == ERRNO_BLOCK) + return; + driver_select(port_num, ready_fd, ERL_DRV_READ, 0); + if (res == 0) + errno = EPIPE; + port_inp_failure(dd, -1); + return; + } + + ASSERT(proto.action == ErtsSysForkerProtoAction_Go); + dd->pid = proto.u.go.os_pid; + + if (dd->pid == -1) { + /* Setup failed! The only reason why this should happen is if + the fork fails. */ + errno = proto.u.go.error_number; + port_inp_failure(dd, -1); + return; + } + + proto.action = ErtsSysForkerProtoAction_Ack; + + if (driver_sizeq(port_num) > 0) { + driver_enq(port_num, (char*)&proto, sizeof(proto)); + } else { + if (write(abs(dd->ofd->fd), &proto, sizeof(proto)) < 0) + if (errno == ERRNO_BLOCK || errno == EINTR) + driver_enq(port_num, (char*)&proto, sizeof(proto)); + /* do nothing on failure here. If the ofd is broken, then + the ifd will probably also be broken and trigger + a port_inp_failure */ + } + + if (dd->ifd->fd < 0) { + driver_select(port_num, abs(dd->ifd->fd), ERL_DRV_READ|ERL_DRV_USE, 0); + erts_smp_atomic_add_nob(&sys_misc_mem_sz, -sizeof(ErtsSysFdData)); + dd->ifd = NULL; + } + + if (dd->ofd->fd < 0 || driver_sizeq(port_num) > 0) + /* we select in order to close fd or write to queue, + child setup will close this fd if fd < 0 */ + driver_select(port_num, abs(dd->ofd->fd), ERL_DRV_WRITE|ERL_DRV_USE, 1); + + erl_drv_set_os_pid(port_num, dd->pid); + erl_drv_init_ack(port_num, e); + return; + } + + if (packet_bytes == 0) { + byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF, + ERTS_SYS_READ_BUF_SZ); + res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ); + if (res < 0) { + if ((errno != EINTR) && (errno != ERRNO_BLOCK)) + port_inp_failure(dd, res); + } + else if (res == 0) + port_inp_failure(dd, res); + else + driver_output(port_num, (char*) read_buf, res); + erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf); + } + else if (dd->ifd->remain > 0) { /* We try to read the remainder */ + /* space is allocated in buf */ + res = read(ready_fd, dd->ifd->cpos, + dd->ifd->remain); + if (res < 0) { + if ((errno != EINTR) && (errno != ERRNO_BLOCK)) + port_inp_failure(dd, res); + } + else if (res == 0) { + port_inp_failure(dd, res); + } + else if (res == dd->ifd->remain) { /* we're done */ + driver_output(port_num, dd->ifd->buf, + dd->ifd->sz); + clear_fd_data(dd->ifd); + } + else { /* if (res < dd->ifd->remain) */ + dd->ifd->cpos += res; + dd->ifd->remain -= res; + } + } + else if (dd->ifd->remain == 0) { /* clean fd */ + byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF, + ERTS_SYS_READ_BUF_SZ); + /* We make one read attempt and see what happens */ + res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ); + if (res < 0) { + if ((errno != EINTR) && (errno != ERRNO_BLOCK)) + port_inp_failure(dd, res); + } + else if (res == 0) { /* eof */ + port_inp_failure(dd, res); + } + else if (res < packet_bytes - dd->ifd->psz) { + memcpy(dd->ifd->pbuf+dd->ifd->psz, + read_buf, res); + dd->ifd->psz += res; + } + else { /* if (res >= packet_bytes) */ + unsigned char* cpos = read_buf; + int bytes_left = res; + + while (1) { + int psz = dd->ifd->psz; + char* pbp = dd->ifd->pbuf + psz; + + while(bytes_left && (psz < packet_bytes)) { + *pbp++ = *cpos++; + bytes_left--; + psz++; + } + + if (psz < packet_bytes) { + dd->ifd->psz = psz; + break; + } + dd->ifd->psz = 0; + + switch (packet_bytes) { + case 1: h = get_int8(dd->ifd->pbuf); break; + case 2: h = get_int16(dd->ifd->pbuf); break; + case 4: h = get_int32(dd->ifd->pbuf); break; + default: ASSERT(0); return; /* -1; */ + } + + if (h <= (bytes_left)) { + driver_output(port_num, (char*) cpos, h); + cpos += h; + bytes_left -= h; + continue; + } + else { /* The last message we got was split */ + char *buf = erts_alloc_fnf(ERTS_ALC_T_FD_ENTRY_BUF, h); + if (!buf) { + errno = ENOMEM; + port_inp_failure(dd, -1); + } + else { + erts_smp_atomic_add_nob(&sys_misc_mem_sz, h); + sys_memcpy(buf, cpos, bytes_left); + dd->ifd->buf = buf; + dd->ifd->sz = h; + dd->ifd->remain = h - bytes_left; + dd->ifd->cpos = buf + bytes_left; + } + break; + } + } + } + erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf); + } +} + + +/* fd is the drv_data that is returned from the */ +/* initial start routine */ +/* ready_fd is the descriptor that is ready to read */ + +static void ready_output(ErlDrvData e, ErlDrvEvent ready_fd) +{ + ErtsSysDriverData *dd = (ErtsSysDriverData*)e; + ErlDrvPort ix = dd->port_num; + int n; + struct iovec* iv; + int vsize; + + if ((iv = (struct iovec*) driver_peekq(ix, &vsize)) == NULL) { + driver_select(ix, ready_fd, ERL_DRV_WRITE, 0); + if (dd->pid > 0 && dd->ofd->fd < 0) { + /* The port was opened with 'in' option, which means we + should close the output fd as soon as the command has + been sent. */ + driver_select(ix, ready_fd, ERL_DRV_WRITE|ERL_DRV_USE, 0); + erts_smp_atomic_add_nob(&sys_misc_mem_sz, -sizeof(ErtsSysFdData)); + dd->ofd = NULL; + } + if (dd->terminating) + driver_failure_atom(dd->port_num,"normal"); + return; /* 0; */ + } + vsize = vsize > MAX_VSIZE ? MAX_VSIZE : vsize; + if ((n = writev(ready_fd, iv, vsize)) > 0) { + if (driver_deq(ix, n) == 0) + set_busy_port(ix, 0); + } + else if (n < 0) { + if (errno == ERRNO_BLOCK || errno == EINTR) + return; /* 0; */ + else { + int res = errno; + driver_select(ix, ready_fd, ERL_DRV_WRITE, 0); + driver_failure_posix(ix, res); + return; /* -1; */ + } + } + return; /* 0; */ +} + +static void stop_select(ErlDrvEvent fd, void* _) +{ + close((int)fd); +} + +#if FDBLOCK + +static void +fd_async(void *async_data) +{ + int res; + ErtsSysDriverData *dd = (ErtsSysDriverData *)async_data; + SysIOVec *iov0; + SysIOVec *iov; + int iovlen; + int err = 0; + /* much of this code is stolen from efile_drv:invoke_writev */ + driver_pdl_lock(dd->blocking->pdl); + iov0 = driver_peekq(dd->port_num, &iovlen); + iovlen = iovlen < MAXIOV ? iovlen : MAXIOV; + iov = erts_alloc_fnf(ERTS_ALC_T_SYS_WRITE_BUF, + sizeof(SysIOVec)*iovlen); + if (!iov) { + res = -1; + err = ENOMEM; + driver_pdl_unlock(dd->blocking->pdl); + } else { + memcpy(iov,iov0,iovlen*sizeof(SysIOVec)); + driver_pdl_unlock(dd->blocking->pdl); + + do { + res = writev(dd->ofd->fd, iov, iovlen); + } while (res < 0 && errno == EINTR); + if (res < 0) + err = errno; + err = errno; + + erts_free(ERTS_ALC_T_SYS_WRITE_BUF, iov); + } + dd->blocking->res = res; + dd->blocking->err = err; +} + +void fd_ready_async(ErlDrvData drv_data, + ErlDrvThreadData thread_data) { + ErtsSysDriverData *dd = (ErtsSysDriverData *)thread_data; + ErlDrvPort port_num = dd->port_num; + + ASSERT(dd->blocking); + + if (dd->blocking->res > 0) { + driver_pdl_lock(dd->blocking->pdl); + if (driver_deq(port_num, dd->blocking->res) == 0) { + driver_pdl_unlock(dd->blocking->pdl); + set_busy_port(port_num, 0); + if (dd->terminating) { + /* The port is has been ordered to terminate + from either fd_flush or port_inp_failure */ + if (dd->terminating == 1) + driver_failure_atom(port_num, "normal"); + else if (dd->terminating == 2) + driver_failure_eof(port_num); + else if (dd->terminating < 0) + driver_failure_posix(port_num, -dd->terminating); + return; /* -1; */ + } + } else { + driver_pdl_unlock(dd->blocking->pdl); + /* still data left to write in queue */ + driver_async(port_num, &dd->blocking->pkey, fd_async, dd, NULL); + return /* 0; */; + } + } else if (dd->blocking->res < 0) { + if (dd->blocking->err == ERRNO_BLOCK) { + set_busy_port(port_num, 1); + /* still data left to write in queue */ + driver_async(port_num, &dd->blocking->pkey, fd_async, dd, NULL); + } else + driver_failure_posix(port_num, dd->blocking->err); + return; /* -1; */ + } + return; /* 0; */ +} + +#endif + +/* Forker driver */ + +static int forker_fd; + +static ErlDrvData forker_start(ErlDrvPort port_num, char* name, + SysDriverOpts* opts) +{ + + int i; + int fds[2]; + int res, unbind; + char bindir[MAXPATHLEN]; + size_t bindirsz = sizeof(bindir); + Uint csp_path_sz; + char *child_setup_prog; + + forker_port = erts_drvport2id(port_num); + + res = erts_sys_getenv_raw("BINDIR", bindir, &bindirsz); + if (res != 0) { + if (res < 0) + erl_exit(-1, + "Environment variable BINDIR is not set\n"); + if (res > 0) + erl_exit(-1, + "Value of environment variable BINDIR is too large\n"); + } + if (bindir[0] != DIR_SEPARATOR_CHAR) + erl_exit(-1, + "Environment variable BINDIR does not contain an" + " absolute path\n"); + csp_path_sz = (strlen(bindir) + + 1 /* DIR_SEPARATOR_CHAR */ + + sizeof(CHILD_SETUP_PROG_NAME) + + 1); + child_setup_prog = erts_alloc(ERTS_ALC_T_CS_PROG_PATH, csp_path_sz); + erts_snprintf(child_setup_prog, csp_path_sz, + "%s%c%s", + bindir, + DIR_SEPARATOR_CHAR, + CHILD_SETUP_PROG_NAME); + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) < 0) { + erl_exit(ERTS_ABORT_EXIT, + "Could not open unix domain socket in spawn_init: %d\n", + errno); + } + + forker_fd = fds[0]; + + unbind = erts_sched_bind_atfork_prepare(); + + i = fork(); + + if (i == 0) { + /* The child */ + char *cs_argv[FORKER_ARGV_NO_OF_ARGS] = + {CHILD_SETUP_PROG_NAME, NULL, NULL}; + char buff[128]; + + erts_sched_bind_atfork_child(unbind); + + snprintf(buff, 128, "%d", sys_max_files()); + cs_argv[FORKER_ARGV_MAX_FILES] = buff; + + /* We preallocate fd 3 for the uds fd */ + if (fds[1] != 3) { + dup2(fds[1], 3); + } + +#if defined(USE_SETPGRP_NOARGS) /* SysV */ + (void) setpgrp(); +#elif defined(USE_SETPGRP) /* BSD */ + (void) setpgrp(0, getpid()); +#else /* POSIX */ + (void) setsid(); +#endif + + execv(child_setup_prog, cs_argv); + _exit(1); + } + + erts_sched_bind_atfork_parent(unbind); + + erts_free(ERTS_ALC_T_CS_PROG_PATH, child_setup_prog); + + close(fds[1]); + + SET_NONBLOCKING(forker_fd); + + driver_select(port_num, forker_fd, ERL_DRV_READ|ERL_DRV_USE, 1); + + return (ErlDrvData)port_num; +} + +static void forker_stop(ErlDrvData e) +{ + /* we probably should do something here, + the port has been closed by the user. */ +} + +static void forker_ready_input(ErlDrvData e, ErlDrvEvent fd) +{ + int res; + ErtsSysForkerProto *proto; + + proto = erts_alloc(ERTS_ALC_T_DRV_CTRL_DATA, sizeof(*proto)); + + if ((res = read(fd, proto, sizeof(*proto))) < 0) { + if (errno == ERRNO_BLOCK) + return; + erl_exit(ERTS_DUMP_EXIT, "Failed to read from erl_child_setup: %d\n", errno); + } + + if (res == 0) + erl_exit(ERTS_DUMP_EXIT, "erl_child_setup closed\n"); + + ASSERT(res == sizeof(*proto)); + +#ifdef FORKER_PROTO_START_ACK + if (proto->action == ErtsSysForkerProtoAction_StartAck) { + /* Ideally we would like to not have to ack each Start + command being sent over the uds, but it would seem + that some operating systems (only observed on FreeBSD) + throw away data on the uds when the socket becomes full, + so we have to. + */ + ErlDrvPort port_num = (ErlDrvPort)e; + int vlen; + SysIOVec *iov = driver_peekq(port_num, &vlen); + ErtsSysForkerProto *proto = (ErtsSysForkerProto *)iov[0].iov_base; + + close(proto->u.start.fds[0]); + close(proto->u.start.fds[1]); + if (proto->u.start.fds[1] != proto->u.start.fds[2]) + close(proto->u.start.fds[2]); + + driver_deq(port_num, sizeof(*proto)); + + if (driver_sizeq(port_num) > 0) + driver_select(port_num, forker_fd, ERL_DRV_WRITE|ERL_DRV_USE, 1); + } else +#endif + { + ASSERT(proto->action == ErtsSysForkerProtoAction_SigChld); + + /* ideally this would be a port_command call, but as command is + already used by the spawn_driver, we use control instead. + Note that when using erl_drv_port_control it is an asynchronous + control. */ + erl_drv_port_control(proto->u.sigchld.port_id, 'S', + (char*)proto, sizeof(*proto)); + } + +} + +static void forker_ready_output(ErlDrvData e, ErlDrvEvent fd) +{ + ErlDrvPort port_num = (ErlDrvPort)e; + +#ifndef FORKER_PROTO_START_ACK + while (driver_sizeq(port_num) > 0) { +#endif + int vlen; + SysIOVec *iov = driver_peekq(port_num, &vlen); + ErtsSysForkerProto *proto = (ErtsSysForkerProto *)iov[0].iov_base; + ASSERT(iov[0].iov_len >= (sizeof(*proto))); + if (sys_uds_write(forker_fd, (char*)proto, sizeof(*proto), + proto->u.start.fds, 3, 0) < 0) { + if (errno == ERRNO_BLOCK) + return; + erl_exit(ERTS_DUMP_EXIT, "Failed to write to erl_child_setup: %d\n", errno); + } +#ifndef FORKER_PROTO_START_ACK + close(proto->u.start.fds[0]); + close(proto->u.start.fds[1]); + if (proto->u.start.fds[1] != proto->u.start.fds[2]) + close(proto->u.start.fds[2]); + driver_deq(port_num, sizeof(*proto)); + } +#endif + + driver_select(port_num, forker_fd, ERL_DRV_WRITE, 0); +} + +static ErlDrvSSizeT forker_control(ErlDrvData e, unsigned int cmd, char *buf, + ErlDrvSizeT len, char **rbuf, ErlDrvSizeT rlen) +{ + ErtsSysForkerProto *proto = (ErtsSysForkerProto *)buf; + ErlDrvPort port_num = (ErlDrvPort)e; + int res; + + driver_enq(port_num, buf, len); + if (driver_sizeq(port_num) > sizeof(*proto)) { + return 0; + } + + if ((res = sys_uds_write(forker_fd, (char*)proto, sizeof(*proto), + proto->u.start.fds, 3, 0)) < 0) { + if (errno == ERRNO_BLOCK) { + driver_select(port_num, forker_fd, ERL_DRV_WRITE|ERL_DRV_USE, 1); + return 0; + } + erl_exit(ERTS_DUMP_EXIT, "Failed to write to erl_child_setup: %d\n", errno); + } + +#ifndef FORKER_PROTO_START_ACK + ASSERT(res == sizeof(*proto)); + close(proto->u.start.fds[0]); + close(proto->u.start.fds[1]); + if (proto->u.start.fds[1] != proto->u.start.fds[2]) + close(proto->u.start.fds[2]); + driver_deq(port_num, sizeof(*proto)); +#endif + + return 0; +} diff --git a/erts/emulator/sys/unix/sys_uds.c b/erts/emulator/sys/unix/sys_uds.c new file mode 100644 index 0000000000..015d0346a1 --- /dev/null +++ b/erts/emulator/sys/unix/sys_uds.c @@ -0,0 +1,155 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2002-2009. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +#include "sys_uds.h" + +int +sys_uds_readv(int fd, struct iovec *iov, size_t iov_len, + int *fds, int fd_count, int flags) { + struct msghdr msg; + struct cmsghdr *cmsg = NULL; + char ancillary_buff[256] = {0}; + int res, i = 0; + + /* setup a place to fill in message contents */ + memset(&msg, 0, sizeof(struct msghdr)); + msg.msg_iov = iov; + msg.msg_iovlen = iov_len; + + /* provide space for the ancillary data */ + msg.msg_control = ancillary_buff; + msg.msg_controllen = sizeof(ancillary_buff); + + if((res = recvmsg(fd, &msg, flags)) < 0) { +#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__) + /* When some OS X versions run out of fd's + they give EMSGSIZE instead of EMFILE. + We remap this as we want the correct + error to appear for the user */ + if (errno == EMSGSIZE) + errno = EMFILE; +#endif + return res; + } + + if((msg.msg_flags & MSG_CTRUNC) == MSG_CTRUNC) + { + /* We assume that we have given enough space for any header + that are sent to us. So the only remaining reason to get + this flag set is if the caller has run out of file descriptors. + */ + errno = EMFILE; + return -1; + } + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg) ) { + if ((cmsg->cmsg_level == SOL_SOCKET) && + (cmsg->cmsg_type == SCM_RIGHTS)) { + int *cmsg_data = (int *)CMSG_DATA(cmsg); + while ((char*)cmsg_data < (char*)cmsg + cmsg->cmsg_len) { + if (i < fd_count) { + fds[i++] = *cmsg_data++; + } else { + /* for some strange reason, we have received more FD's + than we wanted... close them if we are not running + debug. */ + if(i >= fd_count) abort(); + close(*cmsg_data++); + } + } + } + } + + return res; +} + +int +sys_uds_read(int fd, char *buff, size_t len, + int *fds, int fd_count, int flags) { + struct iovec iov; + iov.iov_base = buff; + iov.iov_len = len; + return sys_uds_readv(fd, &iov, 1, fds, fd_count, flags); +} + + +int +sys_uds_writev(int fd, struct iovec *iov, size_t iov_len, + int *fds, int fd_count, int flags) { + + struct msghdr msg; + struct cmsghdr *cmsg = NULL; + int res, i; + + /* initialize socket message */ + memset(&msg, 0, sizeof(struct msghdr)); + + /* We flatten the iov if it is too long */ + if (iov_len > MAXIOV) { + int size = 0; + char *buff; + for (i = 0; i < iov_len; i++) + size += iov[i].iov_len; + buff = malloc(size); + + for (i = 0; i < iov_len; i++) { + memcpy(buff, iov[i].iov_base, iov[i].iov_len); + buff += iov[i].iov_len; + } + + iov[0].iov_base = buff - size; + iov[0].iov_len = size; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + } else { + msg.msg_iov = iov; + msg.msg_iovlen = iov_len; + } + + /* initialize the ancillary data */ + msg.msg_control = calloc(1, CMSG_SPACE(sizeof(int) * fd_count)); + msg.msg_controllen = CMSG_SPACE(sizeof(int) * fd_count); + + /* copy the fd array into the ancillary data */ + cmsg = CMSG_FIRSTHDR(&msg); + if(!cmsg) abort(); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int) * fd_count); + memcpy(CMSG_DATA(cmsg), fds, sizeof(int) * fd_count); + + res = sendmsg(fd, &msg, flags); + + if (iov_len > MAXIOV) + free(iov[0].iov_base); + + free(msg.msg_control); + + return res; +} + +int +sys_uds_write(int fd, char *buff, size_t len, + int *fds, int fd_count, int flags) { + struct iovec iov; + iov.iov_base = buff; + iov.iov_len = len; + return sys_uds_writev(fd, &iov, 1, fds, fd_count, flags); +} diff --git a/erts/emulator/sys/unix/sys_uds.h b/erts/emulator/sys/unix/sys_uds.h new file mode 100644 index 0000000000..844a2804d8 --- /dev/null +++ b/erts/emulator/sys/unix/sys_uds.h @@ -0,0 +1,57 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2002-2009. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +#ifndef _ERL_UNIX_UDS_H +#define _ERL_UNIX_UDS_H + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#if defined(__sun__) && !defined(_XOPEN_SOURCE) +#define _XOPEN_SOURCE 500 +#endif + +#include <limits.h> + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/uio.h> + +#if defined IOV_MAX +#define MAXIOV IOV_MAX +#elif defined UIO_MAXIOV +#define MAXIOV UIO_MAXIOV +#else +#define MAXIOV 16 +#endif + +#include "sys.h" + +int sys_uds_readv(int fd, struct iovec *iov, size_t iov_len, + int *fds, int fd_count, int flags); +int sys_uds_read(int fd, char *buff, size_t len, + int *fds, int fd_count, int flags); +int sys_uds_writev(int fd, struct iovec *iov, size_t iov_len, + int *fds, int fd_count, int flags); +int sys_uds_write(int fd, char *buff, size_t len, + int *fds, int fd_count, int flags); + +#endif /* #ifndef _ERL_UNIX_UDS_H */ diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c index fce76db28f..76ce25916a 100644 --- a/erts/emulator/sys/win32/sys.c +++ b/erts/emulator/sys/win32/sys.c @@ -1334,10 +1334,8 @@ spawn_start(ErlDrvPort port_num, char* utf8_name, SysDriverOpts* opts) retval = set_driver_data(dp, hFromChild, hToChild, opts->read_write, opts->exit_status); if (retval != ERL_DRV_ERROR_GENERAL && retval != ERL_DRV_ERROR_ERRNO) { - Port *prt = erts_drvport2port(port_num); - /* We assume that this cannot generate a negative number */ - ASSERT(prt != ERTS_INVALID_ERL_DRV_PORT); - prt->os_pid = (SWord) pid; + /* We assume that this cannot generate a negative number */ + erl_drv_set_os_pid(port_num, pid); } } @@ -1528,8 +1526,8 @@ create_child_process * Parse out the program name from the command line (it can be quoted and * contain spaces). */ - newcmdline = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP, 2048*sizeof(wchar_t)); cmdlength = parse_command(origcmd); + newcmdline = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP, (MAX_PATH+wcslen(origcmd)-cmdlength)*sizeof(wchar_t)); thecommand = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP, (cmdlength+1)*sizeof(wchar_t)); wcsncpy(thecommand, origcmd, cmdlength); thecommand[cmdlength] = L'\0'; @@ -3273,6 +3271,12 @@ void erl_sys_init(void) } void +erl_sys_late_init(void) +{ + /* do nothing */ +} + +void erts_sys_schedule_interrupt(int set) { erts_check_io_interrupt(set); |