Changeset 1912

Show
Ignore:
Timestamp:
03/04/08 16:01:18 (10 months ago)
Author:
mwlinnem
Message:

Removes spaces in front of all author names.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/test-devel/edu.iu.converter.prefusescopus/META-INF/MANIFEST.MF

    r1911 r1912  
    88 org.cishell.framework.algorithm, 
    99 org.cishell.framework.data, 
     10 org.cishell.service.conversion, 
    1011 org.osgi.framework;version="1.3.0", 
    1112 org.osgi.service.component;version="1.0.0", 
    1213 org.osgi.service.log;version="1.3.0", 
    1314 org.osgi.service.metatype;version="1.1.0", 
    14  org.osgi.service.prefs;version="1.1.0" 
     15 org.osgi.service.prefs;version="1.1.0", 
     16 prefuse, 
     17 prefuse.action, 
     18 prefuse.action.layout, 
     19 prefuse.data, 
     20 prefuse.data.column, 
     21 prefuse.data.event, 
     22 prefuse.data.expression, 
     23 prefuse.data.expression.parser, 
     24 prefuse.data.io, 
     25 prefuse.data.tuple, 
     26 prefuse.data.util, 
     27 prefuse.util, 
     28 prefuse.util.collections, 
     29 prefuse.util.io 
    1530X-AutoStart: true 
    1631Service-Component: OSGI-INF/validator.xml, OSGI-INF/reader.xml 
  • trunk/test-devel/edu.iu.converter.prefusescopus/src/edu/iu/nwb/converter/prefusescopus/ScopusReaderAlgorithm.java

    r1911 r1912  
    11package edu.iu.nwb.converter.prefusescopus; 
    22 
     3import java.io.File; 
    34import java.util.Dictionary; 
    45 
    56import org.cishell.framework.CIShellContext; 
    67import org.cishell.framework.algorithm.Algorithm; 
     8import org.cishell.framework.data.BasicData; 
    79import org.cishell.framework.data.Data; 
     10import org.cishell.framework.data.DataProperty; 
     11import org.cishell.service.conversion.DataConversionService; 
     12import org.osgi.service.log.LogService; 
     13 
     14import prefuse.data.DataTypeException; 
     15import prefuse.data.Table; 
     16import prefuse.data.column.Column; 
    817 
    918public class ScopusReaderAlgorithm implements Algorithm { 
     
    1120    Dictionary parameters; 
    1221    CIShellContext context; 
     22    LogService log; 
     23     
     24    private static final String AUTHOR_COLUMN_NAME = "Authors"; 
     25    private static final String AUTHOR_COLUMN_NAME_SEPARATOR = ","; 
    1326     
    1427    public ScopusReaderAlgorithm(Data[] data, Dictionary parameters, CIShellContext context) { 
     
    1629        this.parameters = parameters; 
    1730        this.context = context; 
     31        this.log = (LogService) context.getService(LogService.class.getName()); 
    1832    } 
    1933 
    2034    public Data[] execute() { 
    21         return null; 
     35        Data inputData = convertInputData(data[0]); 
     36        Table scopusTable = (Table) inputData.getData(); 
     37        //normalize author names 
     38        scopusTable = normalizeAuthorNames(scopusTable); 
     39        Data[] outputData = formatAsData(scopusTable); 
     40        return outputData; 
     41    } 
     42     
     43     
     44    private Data convertInputData(Data inputData) { 
     45         DataConversionService converter = (DataConversionService) 
     46         context.getService(DataConversionService.class.getName()); 
     47                //this is a bit like a cast. We know the nsf format is also a csv, so we change the format to csv so 
     48                //the Conversion service knows it is a csv when it tries to convert it to a prefuse.data.Table 
     49                  
     50                //printTable((Table) inputData.getData()); 
     51                Data formatChangedData = new BasicData(inputData.getMetaData(), (File) inputData.getData(), "file:text/csv"); 
     52                Data convertedData = converter.convert(formatChangedData, "prefuse.data.Table"); 
     53                return convertedData; 
     54    } 
     55     
     56    private Table normalizeAuthorNames(Table scopusTable) { 
     57        System.out.println("Normalizing Author Names..."); 
     58        Column authorColumn = scopusTable.getColumn(AUTHOR_COLUMN_NAME); 
     59        if (authorColumn == null) { 
     60                printNoAuthorColumnWarning(); 
     61                return scopusTable; 
     62        } 
     63        try { 
     64        for (int rowIndex = scopusTable.getMinimumRow(); rowIndex < scopusTable.getMaximumRow(); rowIndex++) { 
     65                String authors = authorColumn.getString(rowIndex); 
     66                if (authors != null && ! authors.equals("")) { 
     67                        System.out.println("  normalizing:" + authors); 
     68                        String normalizedAuthors = normalizeAuthorNames(authors); 
     69                        authorColumn.setString(normalizedAuthors, rowIndex); 
     70                        System.out.println("  result         :" + normalizedAuthors); 
     71                } 
     72        } 
     73        } catch (DataTypeException e1) { 
     74                printColumnNotOfTypeStringWarning(); 
     75                return scopusTable; 
     76        } 
     77        return scopusTable; 
     78    } 
     79     
     80    private String normalizeAuthorNames(String authorNames) { 
     81        //trim leading and trailing whitespace from each author name. 
     82        StringBuilder normalizedAuthorNames = new StringBuilder(); 
     83        String[] eachAuthorName = authorNames.split(AUTHOR_COLUMN_NAME_SEPARATOR); 
     84        for (int i = 0; i < eachAuthorName.length; i++) { 
     85                String authorName = eachAuthorName[i]; 
     86                String normalizedAuthorName = authorName.trim(); 
     87                normalizedAuthorNames.append(normalizedAuthorName); 
     88                if (i < eachAuthorName.length) { 
     89                        //append separator to the end all but the last author name 
     90                        normalizedAuthorNames.append(AUTHOR_COLUMN_NAME_SEPARATOR); 
     91                } 
     92        } 
     93        return normalizedAuthorNames.toString(); 
     94    } 
     95     
     96    private Data[] formatAsData(Table scopusTable) { 
     97        try{ 
     98                        Data[] dm = new Data[] {new BasicData(scopusTable, "prefuse.data.Table")}; 
     99                        dm[0].getMetaData().put(DataProperty.LABEL, "Normalized Scopus table"); 
     100                        dm[0].getMetaData().put(DataProperty.TYPE, DataProperty.TEXT_TYPE); 
     101                        return dm; 
     102                }catch (SecurityException exception){ 
     103                        log.log(LogService.LOG_ERROR, "SecurityException", exception); 
     104                        exception.printStackTrace(); 
     105                        return null; 
     106                } 
     107    } 
     108     
     109    private void printNoAuthorColumnWarning() { 
     110        this.log.log(LogService.LOG_WARNING, "Unable to find column with the name '" + 
     111                        AUTHOR_COLUMN_NAME + "' in scopus file. " + 
     112                                        "We will continue on without attempting to normalize this column"); 
     113    } 
     114     
     115    private void printColumnNotOfTypeStringWarning() { 
     116        this.log.log(LogService.LOG_WARNING, "The column '" + AUTHOR_COLUMN_NAME +  
     117                        "' in the scopus file cannot be normalized, because it cannot be interpreted as text. Skipping normalization of authors"); 
    22118    } 
    23119}