From 9f7ca135debd05c42deb9b7d307a9bbe4945dede Mon Sep 17 00:00:00 2001 From: Marco Grassi Date: Tue, 9 Apr 2024 18:01:09 +0200 Subject: [PATCH 1/3] feat: inner and left joins --- .../template/utils/TemplateFunctions.java | 315 ++++++++++++++---- .../template/TemplateFunctionsTest.java | 148 ++++++++ 2 files changed, 389 insertions(+), 74 deletions(-) diff --git a/src/main/java/com/cefriel/template/utils/TemplateFunctions.java b/src/main/java/com/cefriel/template/utils/TemplateFunctions.java index 932286a..06f1201 100644 --- a/src/main/java/com/cefriel/template/utils/TemplateFunctions.java +++ b/src/main/java/com/cefriel/template/utils/TemplateFunctions.java @@ -31,10 +31,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; public class TemplateFunctions { @@ -43,35 +40,36 @@ public class TemplateFunctions { /** * If a prefix is set, removes it from the parameter {@code String s}. If a prefix is not set, * or the prefix is not contained in the given string it returns the string as it is. + * * @param s String representing an IRI * @return String value */ public String rp(String s) { - if (s!=null && prefix!=null) - if (s.contains(prefix)) { - return s.replace(prefix, ""); - } + if (s != null && prefix != null) if (s.contains(prefix)) { + return s.replace(prefix, ""); + } return s; } /** * Set the prefix used by the {@link #rp(String s)} function + * * @param prefix String prefix */ public void setPrefix(String prefix) { - if (prefix != null) - this.prefix = prefix; + if (prefix != null) this.prefix = prefix; } /** * Returns the substring of the parameter {@code String s} after the first occurrence * of the parameter {@code String substring}. - * @param s String to be modified + * + * @param s String to be modified * @param substring Pattern to get the substring * @return Suffix substring */ public String sp(String s, String substring) { - if (s!=null) { + if (s != null) { return s.substring(s.indexOf(substring) + substring.length()); } return s; @@ -80,12 +78,13 @@ public String sp(String s, String substring) { /** * Returns the substring of the parameter {@code String s} before the first occurrence * of the parameter {@code String substring}. - * @param s String to be modified + * + * @param s String to be modified * @param substring Pattern to get the substring * @return Prefix substring */ public String p(String s, String substring) { - if (s!=null) { + if (s != null) { return s.substring(0, s.indexOf(substring)); } return s; @@ -93,19 +92,20 @@ public String p(String s, String substring) { /** * Returns a string replacing all the occurrences of the regex with the replacement provided. - * @param s String to be modified - * @param regex Regex to be matched + * + * @param s String to be modified + * @param regex Regex to be matched * @param replacement String to be used as replacement * @return Modified string */ public String replace(String s, String regex, String replacement) { - if (s != null) - return s.replaceAll(regex, replacement); + if (s != null) return s.replaceAll(regex, replacement); return null; } /** * Returns a new line char. + * * @return A new line char. */ public String newline() { @@ -114,21 +114,22 @@ public String newline() { /** * Returns a string representing the hash of the parameter {@code String s}. + * * @param s String to be hashed. * @return String representing the computed hash. */ public String hash(String s) { - if (s == null) - return null; + if (s == null) return null; return Integer.toString(s.hashCode()); } /** * Returns {@code true} if the string is not null and not an empty string. + * * @param s String to be checked * @return boolean */ - public boolean checkString(String s){ + public boolean checkString(String s) { return s != null && !s.trim().isEmpty(); } @@ -136,8 +137,9 @@ public boolean checkString(String s){ * Creates a support data structure ({@link java.util.HashMap}) to access query results faster. Builds a map associating * a single row with its value w.r.t a specified column ({@code key} parameter). The assumption is * that for each row the value for the given column is unique, otherwise, the result will be incomplete. + * * @param results The result of a SPARL query - * @param key The variable to be used to build the map + * @param key The variable to be used to build the map * @return Map associating to each value of the variable {@code key} a row of the query results. */ public Map> getMap(List> results, String key) { @@ -152,8 +154,9 @@ public Map> getMap(List> results /** * Creates a support data structure ({@link java.util.HashMap}) to access query results faster. Builds a map associating * a value with all rows having that as value for a specified column ({@code key} parameter). + * * @param results The result of a SPARL query - * @param key The variable to be used to build the map + * @param key The variable to be used to build the map * @return Map associating to each value of the variable {@code key} a {@link java.util.List} of rows in the query results. */ public Map>> getListMap(List> results, String key) { @@ -167,11 +170,11 @@ public Map>> getListMap(List> splitColumn(List> df, String columnName, String regex) { - for (Map row: df) { + public List> splitColumn(List> df, String columnName, String regex) { + for (Map row : df) { String[] values = row.get(columnName).split(regex); Map x = new HashMap<>(); - for(int i=0; i < values.length; i++){ + for (int i = 0; i < values.length; i++) { String key = columnName + (i + 1); row.put(key, values[i]); } @@ -182,83 +185,89 @@ public List> splitColumn(List> df, String /** * Returns {@code true} if {@code l} is not null and not an empty string. - * @param l List to be checked + * + * @param l List to be checked * @param Type of objects contained in the list * @return boolean */ - public boolean checkList(List l){ + public boolean checkList(List l) { return l != null && !l.isEmpty(); } /** * Returns {@code true} if {@code l} is not null, not empty and contains {@code o}. - * @param l List to be checked - * @param o Value in the list to be checked + * + * @param l List to be checked + * @param o Value in the list to be checked * @param Type of objects contained in the list * @return boolean */ - public boolean checkList(List l, T o){ + public boolean checkList(List l, T o) { return checkList(l) && l.contains(o); } /** * Returns {@code true} if {@code m} is not null and not empty. - * @param m Map to be checked + * + * @param m Map to be checked * @param Type for keys in the map * @param Type for values in the map * @return boolean */ - public boolean checkMap(Map m){ + public boolean checkMap(Map m) { return m != null && !m.isEmpty(); } /** * Returns {@code true} if {@code m} is not null, not empty and contains {@code key} as key. - * @param m Map to be checked + * + * @param m Map to be checked * @param key Key to be checked * @param Type for keys in the map * @param Type for values in the map * @return boolean */ - public boolean checkMap(Map m, K key){ + public boolean checkMap(Map m, K key) { return checkMap(m) && m.containsKey(key); } /** * If {@link #checkMap(Map, Object)} is {@code true} returns the value for {@code key} in {@code map}, * otherwise returns {@code null}. - * @param map Map to be accessed - * @param key Key to be used to access the map - * @param Type for keys in the map - * @param Type for values in the map + * + * @param map Map to be accessed + * @param key Key to be used to access the map + * @param Type for keys in the map + * @param Type for values in the map * @param defaultValue Value to return when key is not found in map. Defaults to null if not passed as parameter. * @return The value of type {@code V} associated with {@code key} in the map */ - public V getMapValue(Map map, K key, V defaultValue){ + public V getMapValue(Map map, K key, V defaultValue) { return checkMap(map, key) ? map.get(key) : defaultValue; } - public V getMapValue(Map map, K key){ + + public V getMapValue(Map map, K key) { return getMapValue(map, key, null); } /** * If {@link #checkMap(Map, Object)} is {@code true} returns the list for {@code key} in {@code map}, * otherwise returns an empty {@link java.util.List}. + * * @param listMap Map to be accessed - * @param key Key to be used to access the map - * @param Type for keys in the map - * @param Type for lists used as value in the map + * @param key Key to be used to access the map + * @param Type for keys in the map + * @param Type for lists used as value in the map * @return The list of type {@code V} associated with {@code key} in the map */ - public List getListMapValue(Map> listMap, K key){ - if (checkMap(listMap, key)) - return listMap.get(key); - else - return new ArrayList<>(); + public List getListMapValue(Map> listMap, K key) { + if (checkMap(listMap, key)) return listMap.get(key); + else return new ArrayList<>(); } /** * Reads given file as a string. + * * @param fileName path to the file * @return the file's contents * @throws IOException if read fails for any reason @@ -271,19 +280,17 @@ public String getFileAsString(String fileName) throws IOException { /** * Get a RDFReader to query the RDF content of the provided file. * The RDF format is inferred from the extension of the file (default: Turtle). + * * @param fileName The file path for the RDF file. * @return An RDFReader * @throws Exception */ public RDFReader getRDFReaderFromFile(String fileName) throws Exception { RDFReader rdfReader = new RDFReader(); - if (fileName != null) - if ((new File(fileName)).exists()) { - RDFFormat format = Rio.getParserFormatForFileName(fileName).orElse(RDFFormat.TURTLE); - rdfReader.addFile(fileName, format); - } - else - throw new IllegalArgumentException("FILE: " + fileName + " FOR RDFREADER DOES NOT EXIST"); + if (fileName != null) if ((new File(fileName)).exists()) { + RDFFormat format = Rio.getParserFormatForFileName(fileName).orElse(RDFFormat.TURTLE); + rdfReader.addFile(fileName, format); + } else throw new IllegalArgumentException("FILE: " + fileName + " FOR RDFREADER DOES NOT EXIST"); return rdfReader; } @@ -291,7 +298,8 @@ public RDFReader getRDFReaderFromFile(String fileName) throws Exception { /** * Get a RDFReader to query the RDF content of the provided string. * The RDF format can be provided specifying the MIME type (default: text/turtle). - * @param s The RDF string. + * + * @param s The RDF string. * @param MIMEType The MIME type for the RDF format. * @return An RDFReader * @throws Exception @@ -306,21 +314,21 @@ public RDFReader getRDFReaderFromString(String s, String MIMEType) throws Except /** * Get a RDFReader to query the RDF content of a remote triplestore. - * @param address Address of the triplestore + * + * @param address Address of the triplestore * @param repositoryId Repository Id for the triplestore - * @param context Optional named graph to be considered + * @param context Optional named graph to be considered * @return An RDFReader * @throws Exception */ public RDFReader getRDFReaderForRepository(String address, String repositoryId, String context) throws Exception { - if (context != null) - return new RDFReader(address, repositoryId, context); - else - return new RDFReader(address, repositoryId); + if (context != null) return new RDFReader(address, repositoryId, context); + else return new RDFReader(address, repositoryId); } /** * Get a XMLReader to query the XML content of the provided file. + * * @param fileName The file path for the XML file. * @return An XMLReader * @throws Exception @@ -332,6 +340,7 @@ public XMLReader getXMLReaderFromFile(String fileName) throws Exception { /** * Get a XMLReader to query the XML content of the provided string. + * * @param s The XML string. * @return An XMLReader * @throws Exception @@ -345,6 +354,7 @@ public XMLReader getXMLReaderFromString(String s) throws Exception { /** * Get a JSONReader to query the JSON content of the provided file. + * * @param fileName The file path for the JSON file. * @return A JSONReader * @throws Exception @@ -356,6 +366,7 @@ public JSONReader getJSONReaderFromFile(String fileName) throws Exception { /** * Get a JSONReader to query the JSON content of the provided string. + * * @param s The JSON string. * @return An JSONReader * @throws Exception @@ -369,6 +380,7 @@ public JSONReader getJSONReaderFromString(String s) throws Exception { /** * Get a CSVReader to query the CSV content of the provided file. + * * @param fileName The file path for the CSV file. * @return A CSVReader * @throws Exception @@ -380,6 +392,7 @@ public CSVReader getCSVReaderFromFile(String fileName) throws Exception { /** * Get a CSVReader to query the CSV content of the provided string. + * * @param s The CSV string. * @return A CSVReader * @throws Exception @@ -393,31 +406,185 @@ public CSVReader getCSVReaderFromString(String s) throws Exception { /** * Get a SQLReader for a remote database. - * @param driver Driver id in JDBC for the database considered - * @param url URL to access the database + * + * @param driver Driver id in JDBC for the database considered + * @param url URL to access the database * @param databaseName Name of the database - * @param username Username for the database - * @param password Password for the database + * @param username Username for the database + * @param password Password for the database * @return An SQLReader * @throws Exception */ - public SQLReader getSQLReaderFromDatabase(String driver, String url, String databaseName, String username, String password) throws Exception { + public SQLReader getSQLReaderFromDatabase(String driver, String url, String databaseName, String username, String password) throws Exception { return new SQLReader(driver, url, databaseName, username, password); } /** * Merge two lists of results from queries on a {@link Reader}. - * @param results Results of a query + * + * @param results Results of a query * @param otherResults Results to be merged * @return Merged results */ - public List> mergeResults(List> results, List> otherResults) { + public List> mergeResults(List> results, List> otherResults) { if (checkList(results)) { - if (checkList(otherResults)) - results.addAll(otherResults); + if (checkList(otherResults)) results.addAll(otherResults); return results; - } else if(checkList(otherResults)) - return otherResults; + } else if (checkList(otherResults)) return otherResults; else return new ArrayList<>(); } + + private Set commonColumnNames(List> leftTable, List> rightTable, String leftKey, String rightKey) { + // check if tables share column names + Set leftTableKeys = new HashSet<>(leftTable.get(0).keySet()); + Set rightTableKeys = new HashSet<>(rightTable.get(0).keySet()); + + // Find common keys between left and right tables + Set commonKeys = new HashSet<>(leftTableKeys); + commonKeys.retainAll(rightTableKeys); + + if (!commonKeys.isEmpty()) { + // Check if joining on the same key, and it's the only shared column name + List commonKeyList = new ArrayList<>(commonKeys); + String commonKey = commonKeyList.get(0); + if (commonKeyList.size() == 1 && commonKey.equals(leftKey) && commonKey.equals(rightKey)) { + // If joining on the same key, and it's the only shared column name, proceed + // Example: Joining on key "b" and "b" and no other column names are shared + // return empty with "ok" semantic + return Collections.emptySet(); + } else { + // Throw exception if columns have the same name and can't perform join + return commonKeys; + } + } + return commonKeys; + } + + public List> leftJoin(List> leftTable, List> rightTable, String key) { + return leftJoin(leftTable, rightTable, key, key); + } + + public List> leftJoin(List> leftTable, List> rightTable, String leftKey, String rightKey) { + + if (leftTable == null && rightTable == null) + throw new IllegalArgumentException("tables in join cannot be null"); + if (leftTable == null) throw new IllegalArgumentException("leftTable cannot be null"); + if (rightTable == null) throw new IllegalArgumentException("rightTable cannot be null"); + + if (leftTable.isEmpty()) return Collections.emptyList(); + // if the right table is empty (columns but no rows, impossible with maps like we use) the result should be all the colums from the left table + all columns from right table with null as values + + var commonKeys = commonColumnNames(leftTable, rightTable, leftKey, rightKey); + if (!commonKeys.isEmpty()) { + throw new RuntimeException("Cannot perform inner join on tables due to duplicate column names: " + commonKeys + ". Column names can be renamed using the 'renameDataFrameColumn' function"); + } else { + Map>> rightTableMap = new HashMap<>(); + for (Map rightMapEntry : rightTable) { + String key = rightMapEntry.get(rightKey); + if (!rightTableMap.containsKey(key)) { + rightTableMap.put(key, List.of(rightMapEntry)); + } else { + List> value = new ArrayList<>(rightTableMap.get(key)); + value.add(rightMapEntry); + rightTableMap.put(key, value); + } + } + + Map emptyRightRow = new HashMap<>(); + + for (String k : rightTable.get(0).keySet()) { + emptyRightRow.put(k, null); + } + List> result = new ArrayList<>(); + + for (var leftRow : leftTable) { + List> matches = rightTableMap.get(leftRow.get(leftKey)); + HashMap joinedRow; + if (matches != null) { // add all columns from each table to the result + for (Map match : matches) { + joinedRow = new HashMap<>(match); + joinedRow.putAll(leftRow); + result.add(joinedRow); + } + + } else { + joinedRow = new HashMap<>(emptyRightRow); + // written in this order the null values in the emptyRightRow get overwritten (if present) by present values in leftRow + // TLDR do not swap the previous and next lines + joinedRow.putAll(leftRow); + result.add(joinedRow); + } + } + return result; + } + } + + public List> innerJoin(List> leftTable, List> rightTable, String key) { + return innerJoin(leftTable, rightTable, key, key); + } + + public List> innerJoin(List> leftTable, List> rightTable, String leftKey, String rightKey) { + + if (leftTable == null && rightTable == null) + throw new IllegalArgumentException("tables in join cannot be null"); + if (leftTable == null) throw new IllegalArgumentException("leftTable cannot be null"); + if (rightTable == null) throw new IllegalArgumentException("rightTable cannot be null"); + + // if either table is empty the return is an empty table/dataframe + if (leftTable.isEmpty() || rightTable.isEmpty()) return Collections.emptyList(); + + var commonKeys = commonColumnNames(leftTable, rightTable, leftKey, rightKey); + if (!commonKeys.isEmpty()) { + throw new RuntimeException("Cannot perform inner join on tables due to duplicate column names: " + commonKeys + ". Column names can be renamed using the 'renameDataFrameColumn' function"); + } else { + Map>> leftTableMap = new HashMap<>(); + for (Map leftMapEntry : leftTable) { + String key = leftMapEntry.get(leftKey); + if (!leftTableMap.containsKey(key)) { + leftTableMap.put(key, List.of(leftMapEntry)); + } else { + List> value = new ArrayList<>(leftTableMap.get(key)); + value.add(leftMapEntry); + leftTableMap.put(key, value); + } + } + + List> result = new ArrayList<>(); + + for (Map rightRow : rightTable) { + List> matches = leftTableMap.get(rightRow.get(rightKey)); + if (matches != null) { // add all columns from each table to the result + for (Map match : matches) { + HashMap joinedRow = new HashMap<>(match); + joinedRow.putAll(rightRow); + result.add(joinedRow); + } + } + } + return result; + } + } + + public List> renameDataFrameColumn(List> dataFrame, String oldColumn, String newColumn) { + if (dataFrame != null) { + if (!dataFrame.isEmpty()) { + Set columnNames = dataFrame.get(0).keySet(); + if (columnNames.contains(newColumn)) { + throw new IllegalArgumentException("dataframe already contain a column named " + newColumn); + } + + for (int i = 0; i < dataFrame.size(); i++) { + Map row = dataFrame.get(i); + Map updatedRow = new HashMap<>(row); + + String v = updatedRow.remove(oldColumn); + updatedRow.put(newColumn, v); + dataFrame.set(i, updatedRow); + } + } + return dataFrame; + } else { + throw new IllegalArgumentException("dataframe cannot be null"); + } + } } diff --git a/src/test/java/com/cefriel/template/TemplateFunctionsTest.java b/src/test/java/com/cefriel/template/TemplateFunctionsTest.java index d47530b..da54082 100644 --- a/src/test/java/com/cefriel/template/TemplateFunctionsTest.java +++ b/src/test/java/com/cefriel/template/TemplateFunctionsTest.java @@ -18,6 +18,7 @@ import com.cefriel.template.io.csv.CSVReader; import com.cefriel.template.utils.TemplateFunctions; +import org.eclipse.rdf4j.query.algebra.Str; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -96,5 +97,152 @@ public void testCustomFunctionsStreamMode() throws Exception { Assertions.assertEquals(result, customTemplateFunctions.printMessage()); } + @Test + public void testInnerJoinSameColumn() { + TemplateFunctions templateFunctions = new TemplateFunctions(); + List> table1 = new ArrayList<>(); + table1.add(Map.of("a", "1", "b", "4")); + table1.add(Map.of("a", "2", "b", "5")); + table1.add(Map.of("a", "1", "b", "5")); + List> table2 = new ArrayList<>(); + table2.add(Map.of("b", "1", "d", "5")); + table2.add(Map.of("b", "4", "d", "33")); + table2.add(Map.of("b", "5", "d", "99")); + table2.add(Map.of("b", "5", "d", "6")); + + var result = templateFunctions.innerJoin(table1, table2, "b"); + assert(result.size() == 5); + for (var row : result) { + assert (row.size() == 3); + } + } + + @Test + public void testInnerJoinDifferentColumn() { + TemplateFunctions templateFunctions = new TemplateFunctions(); + List> table1 = new ArrayList<>(); + table1.add(Map.of("a", "1", "b", "4")); + table1.add(Map.of("a", "2", "b", "5")); + table1.add(Map.of("a", "1", "b", "5")); + List> table2 = new ArrayList<>(); + table2.add(Map.of("c", "1", "d", "5")); + table2.add(Map.of("c", "4", "d", "33")); + table2.add(Map.of("c", "5", "d", "99")); + table2.add(Map.of("c", "5", "d", "6")); + + var result = templateFunctions.innerJoin(table1, table2, "b", "d"); + assert(result.size() == 2); + for (var row : result) { + assert (row.size() == 4); + } + } + + @Test + public void testInnerJoinDuplicateColumnNames() { + TemplateFunctions templateFunctions = new TemplateFunctions(); + List> table1 = new ArrayList<>(); + table1.add(Map.of("d", "1", "b", "4")); + table1.add(Map.of("d", "2", "b", "5")); + + List> table2 = new ArrayList<>(); + table2.add(Map.of("b", "1", "d", "5")); + table2.add(Map.of("b", "4", "d", "33")); + table2.add(Map.of("b", "5", "d", "99")); + + Assertions.assertThrows(RuntimeException.class, () -> templateFunctions.innerJoin(table1, table2, "b", "d")); + } + @Test + public void testInnerJoinRenameColumn() { + TemplateFunctions templateFunctions = new TemplateFunctions(); + List> table1 = new ArrayList<>(); + table1.add(Map.of("d", "1", "b", "4")); + table1.add(Map.of("d", "2", "b", "5")); + List> table2 = new ArrayList<>(); + table2.add(Map.of("b", "1", "d", "5")); + table2.add(Map.of("b", "4", "d", "33")); + table2.add(Map.of("b", "5", "d", "99")); + + table1 = templateFunctions.renameDataFrameColumn(table1, "d", "newName"); + table2 = templateFunctions.renameDataFrameColumn(table2, "b", "otherNewName"); + + var result = templateFunctions.innerJoin(table1, table2, "b", "d"); + assert(result.size() == 1); + for (var row : result) { + assert (row.size() == 4); + } + } + @Test + public void testLeftJoinSameColumn() { + TemplateFunctions templateFunctions = new TemplateFunctions(); + List> table1 = new ArrayList<>(); + table1.add(Map.of("a", "1", "b", "4")); + table1.add(Map.of("a", "2", "b", "5")); + table1.add(Map.of("a", "3", "b", "6")); + List> table2 = new ArrayList<>(); + table2.add(Map.of("b", "1", "d", "5")); + table2.add(Map.of("b", "4", "d", "33")); + table2.add(Map.of("b", "5", "d", "99")); + table2.add(Map.of("b", "5", "d", "6")); + + var result = templateFunctions.leftJoin(table1, table2, "b"); + assert(result.size() == 4); + for (var row : result) { + assert (row.size() == 3); + } + } + + @Test + public void testLeftJoinDifferentColumn() { + TemplateFunctions templateFunctions = new TemplateFunctions(); + List> table1 = new ArrayList<>(); + table1.add(Map.of("a", "1", "b", "4")); + table1.add(Map.of("a", "2", "b", "5")); + table1.add(Map.of("a", "1", "b", "5")); + List> table2 = new ArrayList<>(); + table2.add(Map.of("c", "1", "d", "5")); + table2.add(Map.of("c", "4", "d", "33")); + table2.add(Map.of("c", "5", "d", "99")); + table2.add(Map.of("c", "5", "d", "6")); + + var result = templateFunctions.leftJoin(table1, table2, "b", "d"); + assert(result.size() == 3); + for (var row : result) { + assert (row.size() == 4); + } + } + @Test + public void testLeftJoinDuplicateColumnNames() { + TemplateFunctions templateFunctions = new TemplateFunctions(); + List> table1 = new ArrayList<>(); + table1.add(Map.of("d", "1", "b", "4")); + table1.add(Map.of("d", "2", "b", "5")); + List> table2 = new ArrayList<>(); + table2.add(Map.of("b", "1", "d", "5")); + table2.add(Map.of("b", "4", "d", "33")); + table2.add(Map.of("b", "5", "d", "99")); + + Assertions.assertThrows(RuntimeException.class, () -> templateFunctions.leftJoin(table1, table2, "b", "d")); + } + @Test + public void testLeftJoinRenameColumn() { + TemplateFunctions templateFunctions = new TemplateFunctions(); + List> table1 = new ArrayList<>(); + table1.add(Map.of("d", "1", "b", "4")); + table1.add(Map.of("d", "2", "b", "5")); + table1.add(Map.of("a", "3", "b", "6")); + List> table2 = new ArrayList<>(); + table2.add(Map.of("b", "1", "d", "5")); + table2.add(Map.of("b", "4", "d", "33")); + table2.add(Map.of("b", "5", "d", "99")); + + table1 = templateFunctions.renameDataFrameColumn(table1, "d", "newName"); + table2 = templateFunctions.renameDataFrameColumn(table2, "b", "otherNewName"); + + var result = templateFunctions.innerJoin(table1, table2, "b", "d"); + assert(result.size() == 1); + for (var row : result) { + assert (row.size() == 4); + } + } } From f6b169c98b6c2d419742400706f634cbaf6be86b Mon Sep 17 00:00:00 2001 From: Marco Grassi Date: Tue, 9 Apr 2024 18:11:39 +0200 Subject: [PATCH 2/3] style: rename renameDataFrameColumn to renameDataframeColumn --- .../com/cefriel/template/utils/TemplateFunctions.java | 2 +- .../com/cefriel/template/TemplateFunctionsTest.java | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/cefriel/template/utils/TemplateFunctions.java b/src/main/java/com/cefriel/template/utils/TemplateFunctions.java index 06f1201..94ff5b6 100644 --- a/src/main/java/com/cefriel/template/utils/TemplateFunctions.java +++ b/src/main/java/com/cefriel/template/utils/TemplateFunctions.java @@ -565,7 +565,7 @@ public List> innerJoin(List> leftTable, } } - public List> renameDataFrameColumn(List> dataFrame, String oldColumn, String newColumn) { + public List> renameDataframeColumn(List> dataFrame, String oldColumn, String newColumn) { if (dataFrame != null) { if (!dataFrame.isEmpty()) { Set columnNames = dataFrame.get(0).keySet(); diff --git a/src/test/java/com/cefriel/template/TemplateFunctionsTest.java b/src/test/java/com/cefriel/template/TemplateFunctionsTest.java index da54082..b0459dc 100644 --- a/src/test/java/com/cefriel/template/TemplateFunctionsTest.java +++ b/src/test/java/com/cefriel/template/TemplateFunctionsTest.java @@ -16,15 +16,12 @@ package com.cefriel.template; -import com.cefriel.template.io.csv.CSVReader; import com.cefriel.template.utils.TemplateFunctions; -import org.eclipse.rdf4j.query.algebra.Str; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.io.File; import java.io.FileInputStream; -import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; @@ -162,8 +159,8 @@ public void testInnerJoinRenameColumn() { table2.add(Map.of("b", "4", "d", "33")); table2.add(Map.of("b", "5", "d", "99")); - table1 = templateFunctions.renameDataFrameColumn(table1, "d", "newName"); - table2 = templateFunctions.renameDataFrameColumn(table2, "b", "otherNewName"); + table1 = templateFunctions.renameDataframeColumn(table1, "d", "newName"); + table2 = templateFunctions.renameDataframeColumn(table2, "b", "otherNewName"); var result = templateFunctions.innerJoin(table1, table2, "b", "d"); assert(result.size() == 1); @@ -236,8 +233,8 @@ public void testLeftJoinRenameColumn() { table2.add(Map.of("b", "4", "d", "33")); table2.add(Map.of("b", "5", "d", "99")); - table1 = templateFunctions.renameDataFrameColumn(table1, "d", "newName"); - table2 = templateFunctions.renameDataFrameColumn(table2, "b", "otherNewName"); + table1 = templateFunctions.renameDataframeColumn(table1, "d", "newName"); + table2 = templateFunctions.renameDataframeColumn(table2, "b", "otherNewName"); var result = templateFunctions.innerJoin(table1, table2, "b", "d"); assert(result.size() == 1); From ec9adb267c0e4f8139b611bc9c67195b924e253f Mon Sep 17 00:00:00 2001 From: Marco Grassi Date: Mon, 22 Apr 2024 12:01:49 +0200 Subject: [PATCH 3/3] doc: code comment --- .../com/cefriel/template/utils/TemplateFunctions.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/cefriel/template/utils/TemplateFunctions.java b/src/main/java/com/cefriel/template/utils/TemplateFunctions.java index 94ff5b6..5500992 100644 --- a/src/main/java/com/cefriel/template/utils/TemplateFunctions.java +++ b/src/main/java/com/cefriel/template/utils/TemplateFunctions.java @@ -538,13 +538,15 @@ public List> innerJoin(List> leftTable, throw new RuntimeException("Cannot perform inner join on tables due to duplicate column names: " + commonKeys + ". Column names can be renamed using the 'renameDataFrameColumn' function"); } else { Map>> leftTableMap = new HashMap<>(); - for (Map leftMapEntry : leftTable) { - String key = leftMapEntry.get(leftKey); + + // support data structure that associates all values (these values are the key) for the column 'leftkey' with their corresponding row, or multiple rows if values are not unique + for (Map leftTableEntry : leftTable) { + String key = leftTableEntry.get(leftKey); if (!leftTableMap.containsKey(key)) { - leftTableMap.put(key, List.of(leftMapEntry)); + leftTableMap.put(key, List.of(leftTableEntry)); } else { List> value = new ArrayList<>(leftTableMap.get(key)); - value.add(leftMapEntry); + value.add(leftTableEntry); leftTableMap.put(key, value); } }