From 40dc4722cdf23772832f95775e54a204b4bb72a4 Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Sat, 4 Nov 2023 10:26:24 -0600 Subject: [PATCH] ckp --- docs/nczarr.md | 5 +- libnczarr/zarr.h | 2 + libnczarr/zformat3.c | 158 ++++++++++++++++++++++--------------------- libnczarr/zutil.c | 149 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 236 insertions(+), 78 deletions(-) diff --git a/docs/nczarr.md b/docs/nczarr.md index c91e7e4692..f18f58f007 100644 --- a/docs/nczarr.md +++ b/docs/nczarr.md @@ -759,12 +759,15 @@ Within that object, the following Dictionary key and corresponding JSON value is "nczarr_format": "3.0.0", "dimensions": { "": {"size": , "unlimited": 1|0}, "": {"size": , "unlimited": 1|0} ... - } + }, + "groups": ["", "", ...], + "arrays": ["", "", ...], } ```` The "dimensions" key holds information about all the shared dimensions across all groups. This aggregation improves performance by not requiring all groups to be searched looking for dimension information. +Similarly, the gross structure of the subgroups and variables (aka arrays) is captured. FQN is an acronym for "Fully Qualified Name". It is a series of names separated by the "/" character, much diff --git a/libnczarr/zarr.h b/libnczarr/zarr.h index a4103c13e2..6b11648fc9 100644 --- a/libnczarr/zarr.h +++ b/libnczarr/zarr.h @@ -89,6 +89,8 @@ EXTERNL int NCZ_fixed2char(const void* fixed, char** charp, size_t count, int ma EXTERNL int NCZ_char2fixed(const char** charp, void* fixed, size_t count, int maxstrlen); EXTERNL int NCZ_copy_data(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, const void* memory, size_t count, int reading, void* copy); EXTERNL int NCZ_iscomplexjson(NCjson* value, nc_type typehint); +EXTERNL int NCZ_makeFQN(NC_GRP_INFO_T* parent, NC_OBJ* object, NCbytes* fqn); +EXTERNL int NCZ_parseFQN(NC_FILE_INFO_T* file, const char* fqn0, NClist* path); /* zwalk.c */ EXTERNL int NCZ_read_chunk(int ncid, int varid, size64_t* zindices, void* chunkdata); diff --git a/libnczarr/zformat3.c b/libnczarr/zformat3.c index 0b3997f6ac..5459eb96ac 100644 --- a/libnczarr/zformat3.c +++ b/libnczarr/zformat3.c @@ -40,7 +40,7 @@ static int read_dims(NC_FILE_INFO_T* file, NCZ_FILE_INFO_T* zinfo, NCZMAP* map, static int read_vars(NC_FILE_INFO_T* file, NCZ_FILE_INFO_T* zinfo, NCZMAP* map, NC_GRP_INFO_T* grp, NClist* varnames); static int read_subgrps(NC_FILE_INFO_T* file, NCZ_FILE_INFO_T* zinfo, NCZMAP* map, NC_GRP_INFO_T* grp, NClist* subgrpnames); -static int NCZ_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp); +static int NCZ_collect_dims(NC_FILE_INFO_T* file, NCjson** jdimsp); static int NCZ_parse_group_content(NCjson* jcontent, NClist* dimdefs, NClist* varnames, NClist* subgrps); static int NCZ_parse_group_content_pure(NC_FILE_INFO_T* file, NCZ_FILE_INFO_T* zinfo, NC_GRP_INFO_T* grp, NClist* varnames, NClist* subgrps); static int NCZ_read_atts(NC_FILE_INFO_T* file, NCZ_FILE_INFO_T* zfile, NC_OBJ* container, NCjson* jblock); @@ -146,89 +146,96 @@ write_grp(NC_FILE_INFO_T* file, NCZ_FILE_INFO_T* zfile, NCZMAP* map, NC_GRP_INFO int i,stat = NC_NOERR; char version[1024]; int purezarr = 0; + int rootgrp = 0; char* fullpath = NULL; char* key = NULL; - NCjson* jncgrp = NULL; NCjson* jgroup = NULL; NCjson* jdims = NULL; - NCjson* jvars = NULL; NCjson* jsubgrps = NULL; NCjson* jsuper = NULL; - NCjson* jtmp = NULL; NCjson* jatts = NULL; NCjson* jtypes = NULL; + NCjson* jarrays = NULL; + NCbytes* fqn = ncbytesnew(); ZTRACE(3,"file=%s grp=%s isclose=%d",file->controller->path,grp->hdr.name,isclose); purezarr = (zfile->controls.flags & FLAG_PUREZARR)?1:0; + rootgrp = (grp->parent == NULL); /* Construct grp key */ if((stat = NCZ_grpkey(grp,&fullpath))) goto done; - if(!purezarr) { + /* If the zarr.info for non-root group has attributes, + then build Z3GROUP contents + */ + if(rootgrp || ncindexsize(grp->att) > 0) { + if((stat = NCJnew(NCJ_DICT,&jgroup))) goto done; + if((stat = NCJinsertstring(jgroup,"node_type","group"))) goto done; + snprintf(version,sizeof(version),"%d",zfile->zarr.zarr_format); + if((stat = NCJinsertstring(jgroup,"zarr_format",version))) goto done; + /* Insert the group attributes */ + /* Build the attributes dictionary */ + assert(grp->att); + if((stat = build_atts(file,zfile,(NC_OBJ*)grp, grp->att, &jatts, &jtypes))) goto done; + if((stat = NCJinsert(jgroup,"attributes",jatts))) goto done; + if(!purezarr && jtypes) + {if((stat = NCJinsert(jgroup,NCZ_V3_ATTR,jtypes))) goto done;} + } + + if(!purezarr && rootgrp) { + /* Build the superblock */ + /* Track the library version that wrote this */ + strncpy(version,NCZARR_PACKAGE_VERSION,sizeof(version)); + if((stat = NCJnew(NCJ_DICT,&jsuper))) goto done; + if((stat = NCJinsertstring(jsuper,"version",version))) goto done; + snprintf(version,sizeof(version),"%u", (unsigned)zfile->zarr.nczarr_format); + if((stat = NCJinsertstring(jsuper,"format",version))) goto done; + /* Create dimensions dict */ - if((stat = NCZ_collect_dims(file,grp,&jdims))) goto done; + if((stat = NCZ_collect_dims(file,&jdims))) goto done; /* Create vars list */ - if((stat = NCJnew(NCJ_ARRAY,&jvars))) - goto done; + if((stat = NCJnew(NCJ_ARRAY,&jarrays))) goto done; for(i=0; ivars); i++) { NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)ncindexith(grp->vars,i); - if((stat = NCJaddstring(jvars,NCJ_STRING,var->hdr.name))) goto done; + ncbytesclear(fqn); + if((stat = NCZ_makeFQN(var->container,(NC_OBJ*)var,fqn))) goto done; + if((stat = NCJaddstring(jarrays,NCJ_STRING,ncbytescontents(fqn)))) goto done; } /* Create subgroups list */ - if((stat = NCJnew(NCJ_ARRAY,&jsubgrps))) - goto done; + if((stat = NCJnew(NCJ_ARRAY,&jsubgrps))) goto done; for(i=0; ichildren); i++) { NC_GRP_INFO_T* g = (NC_GRP_INFO_T*)ncindexith(grp->children,i); - if((stat = NCJaddstring(jsubgrps,NCJ_STRING,g->hdr.name))) goto done; + ncbytesclear(fqn); + if((stat = NCZ_makeFQN(g->parent,(NC_OBJ*)g,fqn))) goto done; + if((stat = NCJaddstring(jsubgrps,NCJ_STRING,ncbytescontents(fqn)))) goto done; } - /* Create the "_nczarr_group" dict */ - if((stat = NCJnew(NCJ_DICT,&jncgrp))) - goto done; - /* Insert the various dicts and arrays */ - if((stat = NCJinsert(jncgrp,"dims",jdims))) goto done; - jdims = NULL; /* avoid memory problems */ - if((stat = NCJinsert(jncgrp,"vars",jvars))) goto done; - jvars = NULL; /* avoid memory problems */ - if((stat = NCJinsert(jncgrp,"groups",jsubgrps))) goto done; - jsubgrps = NULL; /* avoid memory problems */ - } + /* Assemble the superblock */ - /* build Z3GROUP contents */ - if((stat = NCJnew(NCJ_DICT,&jgroup))) goto done; - if((stat = NCJinsertstring(jgroup,"node_type","group"))) goto done; - snprintf(version,sizeof(version),"%d",zfile->zarr.zarr_format); - if((stat = NCJinsertstring(jgroup,"zarr_format",version))) goto done; - - /* Insert the group attributes */ - /* Build the attributes dictionary */ - assert(grp->att); - if((stat = build_atts(file,zfile,(NC_OBJ*)grp, grp->att, &jatts, &jtypes))) goto done; - if((stat = NCJinsert(jgroup,"attributes",jatts))) goto done; - if(!purezarr && jtypes) - {if((stat = NCJinsert(jgroup,NCZ_V3_ATTR,jtypes))) goto done;} - - if(!purezarr && grp->parent == NULL) { /* Root group */ - /* Track the library version that wrote this */ - strncpy(version,NCZARR_PACKAGE_VERSION,sizeof(version)); - if((stat = NCJnew(NCJ_DICT,&jsuper))) goto done; - if((stat = NCJinsertstring(jsuper,"version",version))) goto done; - snprintf(version,sizeof(version),"%u", (unsigned)zfile->zarr.nczarr_format); - if((stat = NCJinsertstring(jsuper,"format",version))) goto done; - if((stat = NCJinsert(jgroup,NCZ_V3_SUPERBLOCK,jsuper))) goto done; - jsuper = NULL; + /* Insert the "dimensions" dict */ + if((stat = NCJinsert(jsuper,"dimensions",jdims))) goto done; + jdims = NULL; + + /* Insert the "arrays" dict */ + if((stat = NCJinsert(jsuper,"arrays",jarrays))) goto done; + jarrays = NULL; + + /* Insert the "groups" dict */ + if((stat = NCJinsert(jsuper,"groups",jsubgrps))) goto done; + jsubgrps = NULL; } - if(!purezarr) { + /* Insert superblock into root group */ + if(jsuper != NULL) { /* Disable must_understand */ if((stat = NCJinsertstring(jgroup,"must_understand","false"))) goto done; - /* Insert the "_nczarr_group" dict */ - if((stat = NCJinsert(jgroup,NCZ_V3_GROUP,jncgrp))) goto done; - jncgrp = NULL; + assert(jgroup != NULL); + if((stat = NCJinsert(jgroup,NCZ_V3_SUPERBLOCK,jsuper))) goto done; + jsuper = NULL; } /* build Z3GROUP path */ @@ -253,15 +260,14 @@ write_grp(NC_FILE_INFO_T* file, NCZ_FILE_INFO_T* zfile, NCZMAP* map, NC_GRP_INFO } done: - NCJreclaim(jtmp); + nullfree(key); + nullfree(fullpath); + ncbytesfree(fqn); NCJreclaim(jsuper); - NCJreclaim(jncgrp); NCJreclaim(jgroup); - NCJreclaim(jdims); - NCJreclaim(jvars); + NCJreclaim(jarrays); NCJreclaim(jsubgrps); - nullfree(fullpath); - nullfree(key); + NCJreclaim(jdims); return ZUNTRACE(THROW(stat)); } @@ -1369,48 +1375,45 @@ NCZ_load_jatts(NCjson* jblock, NCjson** jattsp, NClist** atypesp) /** * @internal Synchronize dimension data from memory to map. * - * @param grp Pointer to grp struct containing the dims. + * @param file pointer to file struct * * @return ::NC_NOERR No error. * @author Dennis Heimbigner */ static int -NCZ_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp) +NCZ_collect_dims(NC_FILE_INFO_T* file, NCjson** jdimsp) { int i, stat=NC_NOERR; NCjson* jdims = NULL; NCjson* jdimsize = NULL; NCjson* jdimargs = NULL; + NCbytes* fqn = ncbytesnew(); + char slen[64]; ZTRACE(3,"file=%s grp=%s",file->controller->path,grp->hdr.name); NCJnew(NCJ_DICT,&jdims); - for(i=0; idim); i++) { - NC_DIM_INFO_T* dim = (NC_DIM_INFO_T*)ncindexith(grp->dim,i); - char slen[128]; + + for(i=0; ialldims); i++) { + NC_DIM_INFO_T* dim = (NC_DIM_INFO_T*)nclistget(file->alldims,i); + + /* Compute FQN for dimension */ + ncbytesclear(fqn); + if((stat = NCZ_makeFQN(dim->container,(NC_OBJ*)dim,fqn))) goto done; snprintf(slen,sizeof(slen),"%llu",(unsigned long long)dim->len); if((stat = NCJnewstring(NCJ_INT,slen,&jdimsize))) goto done; - /* If dim is not unlimited, then write in the old format to provide - maximum back compatibility. - */ - if(dim->unlimited) { - NCJnew(NCJ_DICT,&jdimargs); - if((stat = NCJaddstring(jdimargs,NCJ_STRING,"size"))) goto done; - if((stat = NCJappend(jdimargs,jdimsize))) goto done; - jdimsize = NULL; - if((stat = NCJaddstring(jdimargs,NCJ_STRING,"unlimited"))) goto done; - if((stat = NCJaddstring(jdimargs,NCJ_INT,"1"))) goto done; - } else { /* !dim->unlimited */ - jdimargs = jdimsize; - jdimsize = NULL; - } - if((stat = NCJaddstring(jdims,NCJ_STRING,dim->hdr.name))) goto done; - if((stat = NCJappend(jdims,jdimargs))) goto done; + if((stat = NCJnew(NCJ_DICT,&jdimargs))) goto done; + if((stat = NCJinsert(jdimargs,"size",jdimsize))) goto done; + jdimsize = NULL; + if(dim->unlimited) + {if((stat = NCJinsertstring(jdimargs,"unlimited","1"))) goto done;} + if((stat = NCJinsert(jdims,ncbytescontents(fqn),jdimargs))) goto done; } if(jdimsp) {*jdimsp = jdims; jdims = NULL;} done: + ncbytesfree(fqn); NCJreclaim(jdims); return ZUNTRACE(THROW(stat)); } @@ -2214,3 +2217,4 @@ NCZF3_finalize(void) { return NC_NOERR; } + diff --git a/libnczarr/zutil.c b/libnczarr/zutil.c index 5e10ffaf91..c68b8f815f 100644 --- a/libnczarr/zutil.c +++ b/libnczarr/zutil.c @@ -108,6 +108,8 @@ NCJ_STRING, /*NC_STRING*/ }; /* Forward */ +static char* backslashescape(const char* s); +static char* deescape(const char* esc); /**************************************************/ @@ -1114,3 +1116,150 @@ NCZ_iscomplexjson(NCjson* json, nc_type typehint) done: return stat; } + +/* Caller must free return value */ +int +NCZ_makeFQN(NC_GRP_INFO_T* parent, NC_OBJ* object, NCbytes* fqn) +{ + int i, stat = NC_NOERR; + NClist* segments = nclistnew(); + NC_GRP_INFO_T* grp = NULL; + char* escaped = NULL; + + /* Add in the object name */ + if((escaped = backslashescape(object->name))==NULL) goto done; + nclistpush(segments,escaped); + escaped = NULL; + + /* Collect the group prefix segments (escaped) */ + for(grp=parent;grp->parent!=NULL;grp=grp->parent) { + /* Add in the group name */ + if((escaped = backslashescape(grp->hdr.name))==NULL) goto done; + nclistpush(segments,escaped); + escaped = NULL; + } + + /* Create the the fqn */ + for(i=(nclistlength(segments)-1);i>=0;i--) { + ncbytescat(fqn,"/"); + ncbytescat(fqn,nclistget(segments,i)); + } + +done: + nclistfreeall(segments); + nullfree(escaped); + return THROW(stat); +} + +/* Parse an fqn into a sequence of groups + some kind of object */ +int +NCZ_parseFQN(NC_FILE_INFO_T* file, const char* fqn0, NClist* path) +{ + int i,ret = NC_NOERR; + int count; + char* p; + char* start; + char* fqn = NULL; + char* descaped = NULL; + NC_GRP_INFO_T* grp = NULL; + + assert(fqn0 != NULL && fqn0[0] == '/'); + fqn = strdup(fqn0); + start = fqn+1; /* leave off the leading '/' */ + count = 0; + /* Step 1: Break fqn into pieces at occurrences of '/' */ + for(p=start;*p;) { + switch(*p) { + case '\\': + p+=2; + break; + case '/': /*capture the piece name */ + *p++ = '\0'; + start = p; /* mark start of the next part */ + count++; + break; + default: /* ordinary char */ + p++; + break; + } + } + /* Re-walk to convert to groups */ + p = fqn+1; + grp = file->root_grp; /* Starting group */ + for(i=0;ichildren,descaped); + if(object == NULL) object = ncindexlookup(grp->dim,descaped); + if(object == NULL) object = ncindexlookup(grp->vars,descaped); + if(object == NULL) object = ncindexlookup(grp->type,descaped); + if(object == NULL) object = ncindexlookup(grp->att,descaped); + if(object == NULL) {ret = NC_EINVAL; goto done;} + nclistpush(path,object); + p = p + strlen(p) + 1; /* skip past the terminating nul */ + nullfree(descaped); + descaped = NULL; + } + /* Verify this looks like an FQN */ + for(i=0;i<(count-1);i++) { + NC_OBJ* object = nclistget(path,i); + if(object->sort != NCGRP) {ret = NC_EINVAL; goto done;} + } +done: + nullfree(descaped); + nullfree(fqn); + return THROW(ret); +} + +static char* +backslashescape(const char* s) +{ + const char* p; + char* q; + size_t len; + char* escaped = NULL; + + len = strlen(s); + escaped = (char*)malloc(1+(2*len)); /* max is everychar is escaped */ + if(escaped == NULL) return NULL; + for(p=s,q=escaped;*p;p++) { + char c = *p; + switch (c) { + case '\\': + case '/': + case '.': + case '@': + *q++ = '\\'; *q++ = '\\'; + break; + default: *q++ = c; break; + } + } + *q = '\0'; + return escaped; +} + +static char* +deescape(const char* esc) +{ + size_t len; + char* s; + const char* p; + char* q; + + if(esc == NULL) return NULL; + len = strlen(esc); + s = (char*)malloc(len+1); + if(s == NULL) return NULL; + for(p=esc,q=s;*p;) { + switch (*p) { + case '\\': + p++; + /* fall thru */ + default: *q++ = *p++; break; + } + } + *q = '\0'; + return s; +}