apache · theshoeshiner · Aug 15, 2023 · Aug 21, 2023 · Aug 21, 2023 · Aug 21, 2023
diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.cases;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.commons.lang3.CharUtils;
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * Case implementation that parses and formats strings of the form 'myCamelCase'
+ * <p>
+ * This case separates tokens on uppercase ascii alpha characters, with the exception
+ * that the first token begin with a lowercase ascii alpha character.
+ * </p>
+ */
+public class CamelCase implements Case {
+
+    /** constant reuseable instance of this case. */
+    public static final CamelCase INSTANCE = new CamelCase();
+
+    /**
+     * Constructs new CamelCase instance.
+     */
+    public CamelCase() {
+        super();
+    }
+
+    /**
+     * Parses string tokens from a Camel Case formatted string.
+     * <p>
+     * Parses each character of the string parameter and creates new tokens when uppercase ascii
+     * letters are encountered. The upppercase letter is considered part of the new token. The very
+     * first character of the string is an exception to this rule and must be a lowercase ascii
+     * character. This method places no other restrictions on the content of the string. <br>
+     * Note: This method should never produce empty tokens.
+     * </p>
+     * @param string Camel Case formatted string to parse
+     * @return list of tokens parsed from the string
+     */
+    @Override
+    public List<String> parse(String string) {
+        List<String> tokens = new LinkedList<>();
+        if (string.length() == 0) {
+            return tokens;
+        }
+        if (!CharUtils.isAsciiAlphaLower(string.charAt(0))) {
+            throw new IllegalArgumentException("Character '" + string.charAt(0) + "' at index 0 must be an ascii lowercase letter");
+        }
+        /*StringBuilder tokenBuilder = new StringBuilder();
+        for (int i = 0; i < string.length(); i++) {
+            char c = string.charAt(i);
+            if (CharUtils.isAsciiAlphaUpper(c)) {
+                tokens.add(tokenBuilder.toString());
+                tokenBuilder.setLength(0);
+            }
+            tokenBuilder.append(c);
+        }
+        tokens.add(tokenBuilder.toString());*/
+        int strLen = string.length();
+        int[] tokenCodePoints = new int[strLen];
+        int tokenCodePointsOffset = 0;
+        for (int i = 0; i < string.length();) {
+            final int codePoint = string.codePointAt(i);
+            if (CharUtils.isAsciiAlphaUpper((char) codePoint)) {
+                if (tokenCodePointsOffset > 0) {
+                    tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset));
+                    tokenCodePoints = new int[strLen];
+                    tokenCodePointsOffset = 0;
+                }
+                tokenCodePoints[tokenCodePointsOffset++] = codePoint;
+                i += Character.charCount(codePoint);
+            } else {
+                tokenCodePoints[tokenCodePointsOffset++] = codePoint;
+                i += Character.charCount(codePoint);
+            }
+        }
+        tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset));
+        return tokens;
+    }
+
+    /**
+     * Formats tokens into a Camel Case string.
+     * <p>
+     * Iterates each token and creates a camel case formatted string. Each token must begin with an
+     * ascii letter, which will be forced uppercase in the output, except for the very first token,
+     * which will have a lowercase first character. The remaining characters in all tokens will be
+     * forced lowercase. This Case does not support empty tokens.<br>
+     * No other restrictions are placed on token contents.
+     * </p>
+     * @param tokens String tokens to format into CamelCase
+     * @return Camel Case formatted string
+     */
+    @Override
+    public String format(Iterable<String> tokens) {
+        StringBuilder formattedString = new StringBuilder();
+        int i = 0;
+        for (String token : tokens) {
+            if (token.length() == 0) {
+                throw new IllegalArgumentException("Unsupported empty token at index " + i);
+            }
+            if (!CharUtils.isAsciiAlpha(token.charAt(0))) {
+                throw new IllegalArgumentException("First character '" + token.charAt(0) + "' in token " + i + " must be an ascii letter");
+            }
+            String formattedToken = (i == 0 ? token.substring(0, 1).toLowerCase() : token.substring(0, 1).toUpperCase())
+                    + (token.length() > 1 ? token.substring(1).toLowerCase() : StringUtils.EMPTY);
+            i++;
+            formattedString.append(formattedToken);
+        }
+        return formattedString.toString();
+    }
+
+}
diff --git a/src/main/java/org/apache/commons/text/cases/Case.java b/src/main/java/org/apache/commons/text/cases/Case.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.cases;
+
+import java.util.List;
+
+/**
+ * Handles formatting and parsing tokens to/from a String. For most implementations tokens returned
+ * by the parse method should abide by any restrictions present in the format method. i.e. Calling
+ * format() with the results of a call to parse() on the same Case instance should return a
+ * matching String.
+ *
+ * @since 1.11
+ */
+public interface Case {
+
+    /**
+     * Formats a set of tokens into a string. The tokens do not necessarily have to meet the syntax
+     * requirements of the Case. The documentation for each implementation should specify what input
+     * is supported.
+     *
+     * @param tokens string tokens to be formatted by this Case
+     * @return the formatted string
+     */
+    String format(Iterable<String> tokens);
+
+    /**
+     * Parses a string into a series of tokens. The string must abide by certain restrictions,
+     * dependent on each Case implementation.
+     *
+     * @param string The string to be parsed by the Case into a list of tokens
+     * @return The list of parsed tokens
+     */
+    List<String> parse(String string);
+
+}
diff --git a/src/main/java/org/apache/commons/text/cases/DelimitedCase.java b/src/main/java/org/apache/commons/text/cases/DelimitedCase.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.cases;
+
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.lang3.CharUtils;
+
+/**
+ * DelimitedCase is a case in which the true alphabetic case of the characters is ignored by default
+ * and tokens themselves are determined by the presence of a delimiter between each token.
+ */
+public class DelimitedCase implements Case {
+
+    /** delimiters to be used when parsing. */
+    private Set<Integer> parseDelimiters;
+
+    /** delimited to be used when formatting. */
+    private String formatDelimiter;
+
+    /**
+     * Constructs a new Delimited Case.
+     * @param delimiter the character to use as both the parse and format delimiter
+     */
+    public DelimitedCase(char delimiter) {
+        this(new char[] { delimiter }, CharUtils.toString(delimiter));
+    }
+
+    /**
+     * Constructs a new delimited case.
+     * @param parseDelimiters The array of delimiters to use when parsing
+     * @param formatDelimiter The delimiter to use when formatting
+     */
+    public DelimitedCase(char[] parseDelimiters, String formatDelimiter) {
+        super();
+        if (parseDelimiters == null || parseDelimiters.length == 0) {
+            throw new IllegalArgumentException("Parse Delimiters cannot be null or empty");
+        }
+        if (formatDelimiter == null || formatDelimiter.length() == 0) {
+            throw new IllegalArgumentException("Format Delimiters cannot be null or empty");
+        }
+        this.parseDelimiters = generateDelimiterSet(parseDelimiters);
+        this.formatDelimiter = formatDelimiter;
+    }
+
+    /**
+     * Formats tokens into Delimited Case.
+     * <p>
+     * Tokens are iterated on and appended to an output stream, with an instance of a
+     * delimiter character between them. This method validates that the delimiter character is not
+     * part of the token. If it is found within the token an exception is thrown.<br>
+     * No other restrictions are placed on the contents of the tokens.
+     * Note: This Case does support empty tokens.<br>
+     * </p>
+     * @param tokens the tokens to be formatted into a delimited string
+     * @return The delimited string
+     */
+    @Override
+    public String format(Iterable<String> tokens) {
+        StringBuilder formattedString = new StringBuilder();
+        int i = 0;
+        for (String token : tokens) {
+            int delimiterFoundIndex = token.indexOf(formatDelimiter);
+            if (delimiterFoundIndex > -1) {
+                throw new IllegalArgumentException("Token " + i + " contains delimiter character '" + formatDelimiter + "' at index " + delimiterFoundIndex);
+            }
+            if (i > 0) {
+                formattedString.append(formatDelimiter);
+            }
+            i++;
+            formattedString.append(token);
+        }
+        return formattedString.toString();
+    }
+
+    /**
+     * Parses delimited string into tokens.
+     * <p>
+     * Input string is parsed one character at a time until a delimiter character is reached.
+     * When a delimiter character is reached a new token begins. The delimiter character is
+     * considered reserved, and is omitted from the returned parsed tokens.<br>
+     * No other restrictions are placed on the contents of the input string. <br>
+     * </p>
+     * @param string The delimited string to be parsed
+     * @return The list of tokens found in the string
+     */
+    @Override
+    public List<String> parse(String string) {
+        List<String> tokens = new LinkedList<>();
+        if (string.length() == 0) {
+            return tokens;
+        }
+        int strLen = string.length();
+        int[] tokenCodePoints = new int[strLen];
+        int tokenCodePointsOffset = 0;
+        for (int i = 0; i < string.length();) {
+            final int codePoint = string.codePointAt(i);
+            if (parseDelimiters.contains(codePoint)) {
+                tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset));
+                tokenCodePoints = new int[strLen];
+                tokenCodePointsOffset = 0;
+                i++;
+            } else {
+                tokenCodePoints[tokenCodePointsOffset++] = codePoint;
+                i += Character.charCount(codePoint);
+            }
+        }
+        tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset));
+        return tokens;
+    }
+
+    /**
+     * Converts an array of delimiters to a hash set of code points. The generated hash set provides O(1) lookup time.
+     *
+     * @param delimiters set of characters to determine capitalization, null means whitespace
+     * @return Set<Integer>
+     */
+    private static Set<Integer> generateDelimiterSet(final char[] delimiters) {
+        final Set<Integer> delimiterHashSet = new HashSet<>();
+        for (int index = 0; index < delimiters.length; index++) {
+            delimiterHashSet.add(Character.codePointAt(delimiters, index));
+        }
+        return delimiterHashSet;
+    }
+
+}
diff --git a/src/main/java/org/apache/commons/text/cases/KebabCase.java b/src/main/java/org/apache/commons/text/cases/KebabCase.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.cases;
+
+/**
+ * Case implementation which parses and formats strings of the form 'my-kebab-string'
+ * <p>
+ * KebabCase is a delimited case where the delimiter is a hyphen character '-'.
+ * </p>
+ */
+public class KebabCase extends DelimitedCase {
+
+    /** constant for delimiter. */
+    public static final char DELIMITER = '-';
+
+    /** constant reuseable instance of this case. */
+    public static final KebabCase INSTANCE = new KebabCase();
+
+    /**
+     * Constructs a new KebabCase instance.
+     */
+    public KebabCase() {
+        super(DELIMITER);
+    }
+
+}