| 21 | | return null; |
|---|
| | 35 | Data inputData = convertInputData(data[0]); |
|---|
| | 36 | Table scopusTable = (Table) inputData.getData(); |
|---|
| | 37 | //normalize author names |
|---|
| | 38 | scopusTable = normalizeAuthorNames(scopusTable); |
|---|
| | 39 | Data[] outputData = formatAsData(scopusTable); |
|---|
| | 40 | return outputData; |
|---|
| | 41 | } |
|---|
| | 42 | |
|---|
| | 43 | |
|---|
| | 44 | private Data convertInputData(Data inputData) { |
|---|
| | 45 | DataConversionService converter = (DataConversionService) |
|---|
| | 46 | context.getService(DataConversionService.class.getName()); |
|---|
| | 47 | //this is a bit like a cast. We know the nsf format is also a csv, so we change the format to csv so |
|---|
| | 48 | //the Conversion service knows it is a csv when it tries to convert it to a prefuse.data.Table |
|---|
| | 49 | |
|---|
| | 50 | //printTable((Table) inputData.getData()); |
|---|
| | 51 | Data formatChangedData = new BasicData(inputData.getMetaData(), (File) inputData.getData(), "file:text/csv"); |
|---|
| | 52 | Data convertedData = converter.convert(formatChangedData, "prefuse.data.Table"); |
|---|
| | 53 | return convertedData; |
|---|
| | 54 | } |
|---|
| | 55 | |
|---|
| | 56 | private Table normalizeAuthorNames(Table scopusTable) { |
|---|
| | 57 | System.out.println("Normalizing Author Names..."); |
|---|
| | 58 | Column authorColumn = scopusTable.getColumn(AUTHOR_COLUMN_NAME); |
|---|
| | 59 | if (authorColumn == null) { |
|---|
| | 60 | printNoAuthorColumnWarning(); |
|---|
| | 61 | return scopusTable; |
|---|
| | 62 | } |
|---|
| | 63 | try { |
|---|
| | 64 | for (int rowIndex = scopusTable.getMinimumRow(); rowIndex < scopusTable.getMaximumRow(); rowIndex++) { |
|---|
| | 65 | String authors = authorColumn.getString(rowIndex); |
|---|
| | 66 | if (authors != null && ! authors.equals("")) { |
|---|
| | 67 | System.out.println(" normalizing:" + authors); |
|---|
| | 68 | String normalizedAuthors = normalizeAuthorNames(authors); |
|---|
| | 69 | authorColumn.setString(normalizedAuthors, rowIndex); |
|---|
| | 70 | System.out.println(" result :" + normalizedAuthors); |
|---|
| | 71 | } |
|---|
| | 72 | } |
|---|
| | 73 | } catch (DataTypeException e1) { |
|---|
| | 74 | printColumnNotOfTypeStringWarning(); |
|---|
| | 75 | return scopusTable; |
|---|
| | 76 | } |
|---|
| | 77 | return scopusTable; |
|---|
| | 78 | } |
|---|
| | 79 | |
|---|
| | 80 | private String normalizeAuthorNames(String authorNames) { |
|---|
| | 81 | //trim leading and trailing whitespace from each author name. |
|---|
| | 82 | StringBuilder normalizedAuthorNames = new StringBuilder(); |
|---|
| | 83 | String[] eachAuthorName = authorNames.split(AUTHOR_COLUMN_NAME_SEPARATOR); |
|---|
| | 84 | for (int i = 0; i < eachAuthorName.length; i++) { |
|---|
| | 85 | String authorName = eachAuthorName[i]; |
|---|
| | 86 | String normalizedAuthorName = authorName.trim(); |
|---|
| | 87 | normalizedAuthorNames.append(normalizedAuthorName); |
|---|
| | 88 | if (i < eachAuthorName.length) { |
|---|
| | 89 | //append separator to the end all but the last author name |
|---|
| | 90 | normalizedAuthorNames.append(AUTHOR_COLUMN_NAME_SEPARATOR); |
|---|
| | 91 | } |
|---|
| | 92 | } |
|---|
| | 93 | return normalizedAuthorNames.toString(); |
|---|
| | 94 | } |
|---|
| | 95 | |
|---|
| | 96 | private Data[] formatAsData(Table scopusTable) { |
|---|
| | 97 | try{ |
|---|
| | 98 | Data[] dm = new Data[] {new BasicData(scopusTable, "prefuse.data.Table")}; |
|---|
| | 99 | dm[0].getMetaData().put(DataProperty.LABEL, "Normalized Scopus table"); |
|---|
| | 100 | dm[0].getMetaData().put(DataProperty.TYPE, DataProperty.TEXT_TYPE); |
|---|
| | 101 | return dm; |
|---|
| | 102 | }catch (SecurityException exception){ |
|---|
| | 103 | log.log(LogService.LOG_ERROR, "SecurityException", exception); |
|---|
| | 104 | exception.printStackTrace(); |
|---|
| | 105 | return null; |
|---|
| | 106 | } |
|---|
| | 107 | } |
|---|
| | 108 | |
|---|
| | 109 | private void printNoAuthorColumnWarning() { |
|---|
| | 110 | this.log.log(LogService.LOG_WARNING, "Unable to find column with the name '" + |
|---|
| | 111 | AUTHOR_COLUMN_NAME + "' in scopus file. " + |
|---|
| | 112 | "We will continue on without attempting to normalize this column"); |
|---|
| | 113 | } |
|---|
| | 114 | |
|---|
| | 115 | private void printColumnNotOfTypeStringWarning() { |
|---|
| | 116 | this.log.log(LogService.LOG_WARNING, "The column '" + AUTHOR_COLUMN_NAME + |
|---|
| | 117 | "' in the scopus file cannot be normalized, because it cannot be interpreted as text. Skipping normalization of authors"); |
|---|