Sorting a CSV file by District
Question
Source: https://stackoverflow.com/questions/67517258/sorting-a-csv-file-by-district
I want to create a program that sorts the following CSV file by district. The following program reads the file and is able to sort it, but it sorts it by name. Is there any method to make it sort by district?
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
public class SortDistrict
{
private static final String COLUMN_SEPARATOR = ",";
public static void main(String[] args) throws Exception
{
InputStream inputStream = new FileInputStream("data.csv");
List<List<String>> lines = readCsv(inputStream);
// Create a comparator that compares the elements from column 0,
// in ascending order
Comparator<List<String>> c0 = createAscendingComparator(0);
// Create a comparator that compares the elements from column 2,
// in descending order
Comparator<List<String>> c1 = createDesendingComparator(2);
// Create a comparator that compares primarily by using c0,
// and secondarily by using c1
Comparator<List<String>> comparator = createComparator(c0, c1);
Collections.sort(lines, comparator);
OutputStream outputStream = new FileOutputStream("output.csv");
String header = "Last Name, First Name, Email, Address, Age, District, Gender";
writeCsv(header, lines, outputStream);
}
private static List<List<String>> readCsv(
InputStream inputStream) throws IOException
{
BufferedReader reader = new BufferedReader(
new InputStreamReader(inputStream));
List<List<String>> lines = new ArrayList<>();
String line = null;
// Skip header
line = reader.readLine();
while (true)
{
line = reader.readLine();
if (line == null)
{
break;
}
List<String> list = Arrays.asList(line.split(COLUMN_SEPARATOR));
lines.add(list);
}
return lines;
}
private static void writeCsv(
String header, List<List<String>> lines, OutputStream outputStream)
throws IOException
{
Writer writer = new OutputStreamWriter(outputStream);
writer.write(header+"\n");
for (List<String> list : lines)
{
for (int i = 0; i < list.size(); i++)
{
writer.write(list.get(i));
if (i < list.size() - 1)
{
writer.write(COLUMN_SEPARATOR);
}
}
writer.write("\n");
}
writer.close();
}
@SafeVarargs
private static <T> Comparator<T>
createComparator(Comparator<? super T>... districts)
{
return (t0, t1) ->
{
for (Comparator<? super T> district : districts)
{
int n = district.compare(t0, t1);
if (n != 0)
{
return n;
}
}
return 0;
};
}
private static <T extends Comparable<? super T>> Comparator<List<T>>
createAscendingComparator(int index)
{
return createListAtIndexComparator(Comparator.naturalOrder(), index);
}
private static <T extends Comparable<? super T>> Comparator<List<T>>
createDesendingComparator(int index)
{
return createListAtIndexComparator(Comparator.reverseOrder(), index);
}
private static <T> Comparator<List<T>>
createListAtIndexComparator(Comparator<? super T> delegate, int index)
{
return (list0, list1) ->
delegate.compare(list0.get(index), list1.get(index));
}
}
I also have a Person class if it'll be any use
public class Person implements Comparable<Person> {
private String name;
private String email;
private String address;
private String residency;
private String gender;
private int age;
private int district;
public Person(String name, String email, String address, String gender, String residency, int district, int age) {
this.name = name;
this.address = address;
this.age = age;
this.district = district;
this.residency = residency;
this.gender = gender;
}
public String getName() {
return name;
}
public String getAddress() {
return address;
}
public int getAge() {
return age;
}
public String getResidency() {
return residency;
}
public int getDistrict() {
return district;
}
public String getGender() {
return gender;
}
public String toString() {
return (name + "," + email + "," + address + "," + age + "," + residency + "," + district + "," + gender);
}
public int compareTo(Person another) {
if (district == another.getDistrict())
return 0;
else if (district < another.getDistrict())
return -1;
else
return 1;
} // end of compareTo
} // end of Person
The CSV file is quite big. Here are a few lines (First Name, Last Name, Email, Address, Age, Residency, District, Gender):
Colleen,Joyner,commodo.auctor@elementumat.net,Ap #697-1279 Nullam Road,30,Resident,4,Female
Fay,Parker,augue.ut.lacus@egetvarius.edu,"P.O. Box 234, 6576 Et, Ave",24,Resident,4,Female
TaShya,Atkinson,sem.egestas@urna.com,"6319 At, St.",45,Resident,15,Female
Curran,Shannon,massa@arcu.com,"980 In, Rd.",57,Resident,8,Male
Yolanda,Snyder,ipsum.ac@Sednullaante.org,"P.O. Box 769, 8207 Egestas Avenue",54,Non-Resident,4,Female
Candice,Weaver,ligula@Aenean.ca,"Ap #599-9287 Tellus, Rd.",35,Resident,9,Female
Yoshio,Silva,fames@Cumsociisnatoque.co.uk,Ap #327-6404 Dui St.,19,Resident,4,Male
Thanks in advance
Answer
You need to sort a large CSV file by a specific field and output result to another CSV file. The code will be complex and lengthy if you try to do this in Java.
Yet, with SPL, the open-source Java package, you only need one line of code:
A |
|
1 |
>file("output.csv").export@ct(file("data.csv").cursor@cqt().sortx(District)) |
SPL offers JDBC driver to be invoked by Java. Just store the above SPL script as sort.splx and invoke it in Java as you call a stored procedure:
…
Class.forName("com.esproc.jdbc.InternalDriver");
con= DriverManager.getConnection("jdbc:esproc:local://");
st=con.prepareCall("call sortx()");
st.execute();
…
Or execute the SPL string within a Java program as we execute a SQL statement:
…
st = con.prepareStatement("=>file(\"output.csv\").export@ct(file(\"data.csv\").cursor@cqt().sortx(District))");
st.execute();
…
View SPL source code.
SPL Official Website 👉 https://www.scudata.com
SPL Feedback and Help 👉 https://www.reddit.com/r/esProcSPL
SPL Learning Material 👉 https://c.scudata.com
SPL Source Code and Package 👉 https://github.com/SPLWare/esProc
Discord 👉 https://discord.gg/2bkGwqTj
Youtube 👉 https://www.youtube.com/@esProc_SPL
Chinese version