/* ============================================================================= File Name : nxingest_parse.cpp Version : 1.8 Component : nxingest Developer : Laurent Lerusse e-Science Center - Facility Support - Data Management Group Purpose : nxingest extract the metadata from a NeXus file to create an XML file according to a mapping file. Revision History : Version 1.0 05/06/2007 First version. Version 1.1 13/06/2007 Remove trailing white space around string. Version 1.3 31/08/2007 Bug correction in test. (Logical OR and not bitwise OR) Add a filesize calculation function. Version 1.4 04/09/2007 The parameters will not be created if there is no value attached to it. Version 1.6 06/09/2007 Bug correction. Connected to changes of version 1.4. Bug Correction. Difference of path between linux / and windows \ Version 1.7 12/09/2007 Bug correction in parseMix function when parseSpecial return a null string. Version 1.8 12/09/2007 Add the capability to split a string according to ' ', ',' and ';' to store it in several tag with the same name e.g. keyword. The mapping is like that : name nexus:/{NXentry}/title | fix: , | nexus:/{NXentry}/notes Copyright : nxingest - extraction of metadata from NeXus files into a xml document. Copyright (C) 2007 STFC e-Science Center This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. ============================================================================= */ #include "nxingest_parse.h" enum param_type { STR, NUM}; // ***************************************************************************** // Function : parseXml // // This function will parse the mapping file to find the different elements. // It may be the name of a sub table, the user tables that need to be // parsed once for each user in the neXus file. a tag or a parameter. // The tag and the parameters are the element which will be holding the // metadata. // // ***************************************************************************** mxml_node_t *parseXml(mxml_node_t *inNode, mxml_node_t *topNode, mxml_node_t **outNode, NxClass nx) { Log log; mxml_node_t *inNextNode = 0; mxml_node_t *outNextNode; mxml_node_t *userInNode; mxml_node_t **userOutNode; int type_descent = MXML_DESCEND; const char *type; while( (inNode = mxmlWalkNext(inNode, topNode, type_descent)) != NULL ) { inNextNode = inNode; // Get the last inNode to pass back to the calling function if(inNode->type == MXML_ELEMENT) { type_descent = MXML_DESCEND; type = mxmlElementGetAttr(inNode, "type"); if(type != 0) { log.set("parseXml", "Found element : ", inNode->value.element.name).printLevel(NXING_LOG_DEBUG); if(strcmp(type, "tbl") == 0 ) // Simple table. { log.set("parseXml", "Get new table", inNode->value.element.name).printLevel(NXING_LOG_DEBUG); outNextNode = mxmlNewElement(*outNode, inNode->value.element.name ); // add the attributes of the input node : if(inNode->value.element.num_attrs > 1) for(int i = 0; i< inNode->value.element.num_attrs; i++) if(strcmp(inNode->value.element.attrs[i].name, "type") != 0) mxmlElementSetAttr(outNextNode, inNode->value.element.attrs[i].name, inNode->value.element.attrs[i].value); inNode = parseXml(inNode, inNode, &outNextNode, nx); } else if(strcmp(type, "user_tbl") == 0 ) // User table. There may be several user. Need to loop over the different NXusers. { userInNode = inNode; userOutNode = outNode; do{ userInNode = inNode; log.set("parseXml", type, "Start", inNode->value.element.name).printLevel(NXING_LOG_DEBUG); outNextNode = mxmlNewElement(*userOutNode, userInNode->value.element.name ); if(userInNode->value.element.num_attrs > 1) for(int i = 0; i< userInNode->value.element.num_attrs; i++) if(strcmp(userInNode->value.element.attrs[i].name, "type") != 0) mxmlElementSetAttr(outNextNode, userInNode->value.element.attrs[i].name, userInNode->value.element.attrs[i].value); log.set("parseXml", type, "Element added",userInNode->value.element.name).printLevel(NXING_LOG_DEBUG); parseXml(userInNode, topNode, &outNextNode, nx); log.set("parseXml", type, "Parsed", inNode->value.element.name).printLevel(NXING_LOG_DEBUG); }while(nx.nextUser() != -1); inNode = mxmlWalkNext(inNode, topNode, MXML_NO_DESCEND); } else if(strcmp(type, "keyword_tag") == 0 ) // simple Tag record { log.set("parseXml", type, "Read",inNode->value.element.name).printLevel(NXING_LOG_DEBUG); inNode = readKeyword(inNode, outNode, nx); type_descent = MXML_NO_DESCEND; } else if(strcmp(type, "tag") == 0 ) // simple Tag record { log.set("parseXml", type, "Read",inNode->value.element.name).printLevel(NXING_LOG_DEBUG); inNode = readRecord(inNode, outNode, nx); type_descent = MXML_NO_DESCEND; } else if(strcmp(type, "param_str") == 0 ) // Parameter with a string value { log.set("parseXml", type, "Read", inNode->value.element.name).printLevel(NXING_LOG_DEBUG); inNode = readParam(inNode, outNode, nx, STR); type_descent = MXML_NO_DESCEND; } else if(strcmp(type, "param_num") == 0 ) // Parameter with a numeric value { log.set("parseXml", type, "Read",inNode->value.element.name).printLevel(NXING_LOG_DEBUG); inNode = readParam(inNode, outNode, nx, NUM); type_descent = MXML_NO_DESCEND; } } } if(inNode->type == MXML_TEXT && strlen(inNode->value.text.string) > 0) { log.set("parseXml", "Unexpected text (may be comments)", inNode->value.text.string).printLevel(NXING_LOG_DEBUG); } } return (inNextNode); } // ***************************************************************************** // Function : readRecord // // This function will read a Tag that will end up as value // where name is the icat_name value from the mapping file and value // either the default value or the value from the neXus file // or a special value (still to be implemented). // // ***************************************************************************** mxml_node_t * readRecord(mxml_node_t *inNode, mxml_node_t **outNode, NxClass nx) { // Check that this is the correct inNode for the function. Log log; mxml_node_t *inNextNode = inNode; mxml_node_t *topNode = inNode; mxml_node_t *nextNode; char name[NXING_BIG_SIZE] = ""; char value[NXING_BIG_SIZE] = ""; // while( (inNode = mxmlWalkNext(inNode, topNode, MXML_DESCEND)) != NULL ) { if(inNode->type == MXML_ELEMENT) { if(strcmp(inNode->value.element.name, "icat_name") == 0) { log.set("readRecord", "ICAT name",mxmlGetItem(inNode, name)).printLevel(NXING_LOG_DEBUG); mxmlGetItem(inNode, name); log.set("readRecord", "ICAT name",name).printLevel(NXING_LOG_DEBUG); } else if(strcmp(inNode->value.element.name, "value") == 0) { getValue(inNode, nx, value); } } } if(strlen(name) > 0 && strlen(value) > 0) { nextNode = mxmlNewElement(*outNode, name ); mxmlNewText( nextNode, 0, value); } return inNextNode; } mxml_node_t * readKeyword(mxml_node_t *inNode, mxml_node_t **outNode, NxClass nx) { // Check that this is the correct inNode for the function. Log log; mxml_node_t *inNextNode = inNode; mxml_node_t *topNode = inNode; mxml_node_t *nextNode; char name[NXING_BIG_SIZE] = ""; char value[NXING_BIG_SIZE] = ""; char keywordStr[NXING_BIG_SIZE] = ""; char keys[] = " ,;"; // strcpy(keywordStr , inNode->value.element.name); while( (inNode = mxmlWalkNext(inNode, topNode, MXML_DESCEND)) != NULL ) { if(inNode->type == MXML_ELEMENT) { if(strcmp(inNode->value.element.name, "icat_name") == 0) { log.set("readRecord", "ICAT name",mxmlGetItem(inNode, name)).printLevel(NXING_LOG_DEBUG); mxmlGetItem(inNode, name); log.set("readRecord", "ICAT name",name).printLevel(NXING_LOG_DEBUG); } else if(strcmp(inNode->value.element.name, "value") == 0) { getValue(inNode, nx, value); } } } if(strlen(name) > 0 && strlen(value) > 0) { char* str; char* nextStr; int i = 0; nextStr = &value[0]; do { str = nextStr; i = strcspn (str,keys); if(i < strlen(str)) { nextStr = &str[i+1]; str[i] = 0; } else nextStr = 0; if(strlen(str) > 0 && strcmp(str, "=") != 0) { nextNode = mxmlNewElement(*outNode, keywordStr ); nextNode = mxmlNewElement(nextNode, name ); mxmlNewText( nextNode, 0, str); } } while( nextStr != 0 ); } return inNextNode; } // ***************************************************************************** // Function : readParam // // This function will read a Tag that will end up as // // icat_name // value // value_units // value_description // where name is the icat_name value from the mapping file and value // either the default value or the value from the neXus file // or a special value (still to be implemented). // // ***************************************************************************** mxml_node_t * readParam(mxml_node_t *inNode, mxml_node_t **outNode, NxClass nx, int paramType) { // Check that this is the correct inNode for the function. Log log; mxml_node_t *inNextNode = inNode; mxml_node_t *topNode = inNode; mxml_node_t *paramNode; mxml_node_t *nextNode; char str[NXING_BIG_SIZE] = ""; char* buff = 0; string name; string value; string units; string description; while( (inNode = mxmlWalkNext(inNode, topNode, MXML_DESCEND)) != NULL ) { if(inNode->type == MXML_ELEMENT) { if(strcmp(inNode->value.element.name, "icat_name") == 0) { log.set("readParam", "ICAT name", mxmlGetItem(inNode, str)).printLevel(NXING_LOG_DEBUG); buff = mxmlGetItem(inNode, str); if(buff != 0) name = buff; else name = ""; } else if(strcmp(inNode->value.element.name, "value") == 0) { log.set("readParam", "Value ", mxmlGetItem(inNode, str)).printLevel(NXING_LOG_DEBUG); buff = getValue(inNode, nx, str); if(buff != 0) value = buff; else value = ""; } else if(strcmp(inNode->value.element.name, "units") == 0) { log.set("readParam", "Units", mxmlGetItem(inNode, str)).printLevel(NXING_LOG_DEBUG); buff = getValue(inNode, nx, str); if(buff != 0) units = buff; else units = ""; } else if(strcmp(inNode->value.element.name, "description") == 0) { log.set("readParam", "Description", mxmlGetItem(inNode, str)).printLevel(NXING_LOG_DEBUG); buff = getValue(inNode, nx, str); if(buff != 0) description = buff; else description = ""; } } } if(value.size() > 0 && name.size() > 0) { paramNode = mxmlNewElement(*outNode, "parameter" ); // Name nextNode = mxmlNewElement(paramNode, "name" ); mxmlNewText( nextNode, 0, name.c_str()); // Value if(paramType == STR) nextNode = mxmlNewElement(paramNode, "string_value" ); else nextNode = mxmlNewElement(paramNode, "numeric_value" ); mxmlNewText( nextNode, 0, value.c_str()); // Units nextNode = mxmlNewElement(paramNode, "units" ); if(units.size() > 0) mxmlNewText( nextNode, 0, units.c_str()); else mxmlNewText( nextNode, 0, "N/A"); // Description if(description.size() > 0) { nextNode = mxmlNewElement(paramNode, "description" ); mxmlNewText( nextNode, 0, description.c_str()); } } return inNextNode; } // ***************************************************************************** // Function : getValue // // ***************************************************************************** char* getValue(mxml_node_t *inNode, NxClass nx, char* str) { char buff[NXING_BIG_SIZE] = ""; const char *type; type = mxmlElementGetAttr(inNode, "type"); if(strncmp(type, "nexus", 5) == 0) str = nx.readTag(mxmlGetItem(inNode, (char*)buff), str, -1) ; else if(strncmp(type, "fix", 3) == 0) str = mxmlGetItem(inNode, str) ; else if(strncmp(type, "special", 6) == 0) str = parseSpecial( mxmlGetItem(inNode, buff), str , nx) ; else if(strncmp(type, "mix", 3) == 0) str = parseMix( mxmlGetItem(inNode, buff), str , nx); else strcpy(str, "Unknown type"); // Remove trailing white spaces. while(str != 0 && str[strlen(str)-1] == ' ') str[strlen(str)-1] = 0; return str; } // ***************************************************************************** // Function : parseSpecial // // ***************************************************************************** char* parseSpecial(char* in, char* str, NxClass nx) { Log log; char buff[NXING_MED_SIZE]; try { if(strncmp(in, "time:", 5) == 0) { Time tt; char * pch; char timeCntr[3][NXING_MED_SIZE] = { "", "0", "0"}; int i=0; pch = strtok (&in[5],"; "); while (pch != NULL) { if(i<3) strncpy(timeCntr[i], pch, NXING_MED_SIZE); i++; pch = strtok (NULL, ";"); } int inType = atoi(timeCntr[1]); // Type in which the time is read int outType = atoi(timeCntr[2]); // Time in which the time is written if(strcmp(timeCntr[0], "now") == 0) { str = tt.now().getTime(str, inType); } else if(strncmp(timeCntr[0], "nexus(", 6) == 0) { timeCntr[0][strlen(timeCntr[0])-1] = 0; char * nxStr = 0; nxStr = nx.readTag(&timeCntr[0][6], (char*)buff, -1); if( nxStr != 0) str = tt.set(nxStr, inType).getTime(str, outType); else str = 0; } } else if (strncmp(in, "fix:", 4) == 0) { return(&in[4]); } else if (strncmp(in, "nexus:", 6) == 0) { str = nx.readTag(&in[6], str, -1); } else if (strncmp(in, "sys:", 4) == 0) { if(strncmp(&in[4], "filename", 8) == 0) { string location = nx.getLocation(str);; size_t found = location.find_last_of("/\\"); if(found == string::npos) found = 0; strcpy(str, location.substr(found+1).c_str()); } else if(strncmp(&in[4], "location", 8) == 0) { str = nx.getLocation(str); } else if(strncmp(&in[4], "size", 4) == 0) { str = nx.getLocation(str); long fSize = fileSize(str); sprintf( str, "%ld", fSize); } } else str = 0; return str; } catch(Log log) { log.printLevel(NXING_LOG_ERROR); return 0; } } // ***************************************************************************** // Function : parseMix // // This function will read a Tag that will end up as // // icat_name // value // value_units // value_description // where name is the icat_name value from the mapping file and value // either the default value or the value from the neXus file // or a special value (still to be implemented). // // ***************************************************************************** char* parseMix(char* in, char* str, NxClass nx) { Log log; char *buff = new char[NXING_MED_SIZE]; char hardpch[32][NXING_MED_SIZE]; int numMix = 0; strcpy(str, ""); char * pch = new char[NXING_BIG_SIZE]; pch = strtok (in, "|"); numMix = 0; while (pch != NULL) { while(pch[0] != 0 && pch[0] == ' ') pch = &pch[1]; while(strlen(pch) > 0 && pch[strlen(pch)-1] == ' ') pch[strlen(pch)-1] = 0; strcpy(hardpch[numMix], pch); numMix++; pch = strtok (NULL, "|"); } for(int i = 0 ; i < numMix; i++) { buff = parseSpecial(hardpch[i], (char*)buff, nx); if(buff != 0 ) strcat(str, buff); } return str; }