[MINOR] Fix Typos 'a -> an'

## What changes were proposed in this pull request? `a` -> `an` I use regex to generate potential error lines: `grep -in ' a [aeiou]' mllib/src/main/scala/org/apache/spark/ml/*/*scala` and review them line by line. ## How was this patch tested? local build `lint-java` checking Author: Zheng RuiFeng <[email protected]> Closes apache#13317 from zhengruifeng/a_an.
leonelfolmer · May 27, 2016 · 6b1a618 · 6b1a618
1 parent ee3609a
commit 6b1a618
Show file tree

Hide file tree

Showing 61 changed files with 78 additions and 78 deletions.
diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
@@ -166,7 +166,7 @@ private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock)
   }
 
   /**
-   * Send ExecutorRemoved to the event loop to remove a executor. Only for test.
+   * Send ExecutorRemoved to the event loop to remove an executor. Only for test.
    *
    * @return if HeartbeatReceiver is stopped, return None. Otherwise, return a Some(Future) that
    *         indicate if this operation is successful.

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -929,7 +929,7 @@ private[deploy] class Master(
     exec.state = ExecutorState.KILLED
   }
 
-  /** Generate a new app ID given a app's submission date */
+  /** Generate a new app ID given an app's submission date */
   private def newApplicationId(submitDate: Date): String = {
     val appId = "app-%s-%04d".format(createDateFormat.format(submitDate), nextAppNumber)
     nextAppNumber += 1

diff --git a/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala b/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala
@@ -24,7 +24,7 @@ import scala.collection.mutable
 import org.apache.spark.internal.Logging
 
 /**
- * Implements policies and bookkeeping for sharing a adjustable-sized pool of memory between tasks.
+ * Implements policies and bookkeeping for sharing an adjustable-sized pool of memory between tasks.
  *
  * Tries to ensure that each task gets a reasonable share of memory, instead of some task ramping up
  * to a large amount first and then causing others to spill to disk repeatedly.

diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -1054,7 +1054,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
       val warningMessage =
         s"$outputCommitterClass may be an output committer that writes data directly to " +
           "the final location. Because speculation is enabled, this output committer may " +
-          "cause data loss (see the case in SPARK-10063). If possible, please use a output " +
+          "cause data loss (see the case in SPARK-10063). If possible, please use an output " +
           "committer that does not have this behavior (e.g. FileOutputCommitter)."
       logWarning(warningMessage)
     }
@@ -1142,7 +1142,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
       val warningMessage =
         s"$outputCommitterClass may be an output committer that writes data directly to " +
           "the final location. Because speculation is enabled, this output committer may " +
-          "cause data loss (see the case in SPARK-10063). If possible, please use a output " +
+          "cause data loss (see the case in SPARK-10063). If possible, please use an output " +
           "committer that does not have this behavior (e.g. FileOutputCommitter)."
       logWarning(warningMessage)
     }

diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
@@ -52,7 +52,7 @@ private[netty] case class RemoteProcessConnectionError(cause: Throwable, remoteA
   extends InboxMessage
 
 /**
- * A inbox that stores messages for an [[RpcEndpoint]] and posts messages to it thread-safely.
+ * An inbox that stores messages for an [[RpcEndpoint]] and posts messages to it thread-safely.
  */
 private[netty] class Inbox(
     val endpointRef: NettyRpcEndpointRef,

diff --git a/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala b/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
@@ -20,7 +20,7 @@ package org.apache.spark.scheduler
 import org.apache.spark.executor.ExecutorExitCode
 
 /**
- * Represents an explanation for a executor or whole slave failing or exiting.
+ * Represents an explanation for an executor or whole slave failing or exiting.
  */
 private[spark]
 class ExecutorLossReason(val message: String) extends Serializable {

diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
@@ -134,7 +134,7 @@ private[spark] class SerializerManager(defaultSerializer: Serializer, conf: Spar
   }
 
   /**
-   * Deserializes a InputStream into an iterator of values and disposes of it when the end of
+   * Deserializes an InputStream into an iterator of values and disposes of it when the end of
    * the iterator is reached.
    */
   def dataDeserializeStream[T: ClassTag](

diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md
@@ -36,7 +36,7 @@ Any exception in the receiving threads should be caught and handled properly to
 failures of the receiver. `restart(<exception>)` will restart the receiver by
 asynchronously calling `onStop()` and then calling `onStart()` after a delay.
 `stop(<exception>)` will call `onStop()` and terminate the receiver. Also, `reportError(<error>)`
-reports a error message to the driver (visible in the logs and UI) without stopping / restarting
+reports an error message to the driver (visible in the logs and UI) without stopping / restarting
 the receiver.
 
 The following is a custom receiver that receives a stream of text over a socket. It treats

diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
@@ -612,7 +612,7 @@ as well as to run the receiver(s).
 
 - When running a Spark Streaming program locally, do not use "local" or "local[1]" as the master URL.
   Either of these means that only one thread will be used for running tasks locally. If you are using
-  a input DStream based on a receiver (e.g. sockets, Kafka, Flume, etc.), then the single thread will
+  an input DStream based on a receiver (e.g. sockets, Kafka, Flume, etc.), then the single thread will
   be used to run the receiver, leaving no thread for processing the received data. Hence, when
   running locally, always use "local[*n*]" as the master URL, where *n* > number of receivers to run
   (see [Spark Properties](configuration.html#spark-properties) for information on how to set
@@ -1788,7 +1788,7 @@ This example appends the word counts of network data into a file.
 This behavior is made simple by using `JavaStreamingContext.getOrCreate`. This is used as follows.
 
 {% highlight java %}
-// Create a factory object that can create a and setup a new JavaStreamingContext
+// Create a factory object that can create and setup a new JavaStreamingContext
 JavaStreamingContextFactory contextFactory = new JavaStreamingContextFactory() {
   @Override public JavaStreamingContext create() {
     JavaStreamingContext jssc = new JavaStreamingContext(...);  // new context

diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java
@@ -70,7 +70,7 @@ public static void main(String[] args) throws Exception {
     SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver");
     JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000));
 
-    // Create a input stream with the custom receiver on target ip:port and count the
+    // Create an input stream with the custom receiver on target ip:port and count the
     // words in input stream of \n delimited text (eg. generated by 'nc')
     JavaReceiverInputDStream<String> lines = ssc.receiverStream(
       new JavaCustomReceiver(args[0], Integer.parseInt(args[1])));

diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
@@ -50,7 +50,7 @@ object CustomReceiver {
     val sparkConf = new SparkConf().setAppName("CustomReceiver")
     val ssc = new StreamingContext(sparkConf, Seconds(1))
 
-    // Create a input stream with the custom receiver on target ip:port and count the
+    // Create an input stream with the custom receiver on target ip:port and count the
     // words in input stream of \n delimited text (eg. generated by 'nc')
     val lines = ssc.receiverStream(new CustomReceiver(args(0), args(1).toInt))
     val words = lines.flatMap(_.split(" "))

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -686,7 +686,7 @@ object LogisticRegressionModel extends MLReadable[LogisticRegressionModel] {
 /**
  * MultiClassSummarizer computes the number of distinct labels and corresponding counts,
  * and validates the data to see if the labels used for k class multi-label classification
- * are in the range of {0, 1, ..., k - 1} in a online fashion.
+ * are in the range of {0, 1, ..., k - 1} in an online fashion.
  *
  * Two MultilabelSummarizer can be merged together to have a statistical summary of the
  * corresponding joint dataset.
@@ -923,7 +923,7 @@ class BinaryLogisticRegressionSummary private[classification] (
 
 /**
  * LogisticAggregator computes the gradient and loss for binary logistic loss function, as used
- * in binary classification for instances in sparse or dense vector in a online fashion.
+ * in binary classification for instances in sparse or dense vector in an online fashion.
  *
  * Note that multinomial logistic loss is not supported yet!
  *

diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -790,7 +790,7 @@ trait Params extends Identifiable with Serializable {
  * :: DeveloperApi ::
  * Java-friendly wrapper for [[Params]].
  * Java developers who need to extend [[Params]] should use this class instead.
- * If you need to extend a abstract class which already extends [[Params]], then that abstract
+ * If you need to extend an abstract class which already extends [[Params]], then that abstract
  * class should be Java-friendly as well.
  */
 @DeveloperApi

diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -396,7 +396,7 @@ object AFTSurvivalRegressionModel extends MLReadable[AFTSurvivalRegressionModel]
 
 /**
  * AFTAggregator computes the gradient and loss for a AFT loss function,
- * as used in AFT survival regression for samples in sparse or dense vector in a online fashion.
+ * as used in AFT survival regression for samples in sparse or dense vector in an online fashion.
  *
  * The loss function and likelihood function under the AFT model based on:
  * Lawless, J. F., Statistical Models and Methods for Lifetime Data,

diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -731,7 +731,7 @@ class LinearRegressionSummary private[regression] (
 
 /**
  * LeastSquaresAggregator computes the gradient and loss for a Least-squared loss function,
- * as used in linear regression for samples in sparse or dense vector in a online fashion.
+ * as used in linear regression for samples in sparse or dense vector in an online fashion.
  *
  * Two LeastSquaresAggregator can be merged together to have a summary of loss and gradient of
  * the corresponding joint dataset.

diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
@@ -74,7 +74,7 @@ class ParamGridBuilder @Since("1.2.0") {
   }
 
   /**
-   * Adds a int param with multiple values.
+   * Adds an int param with multiple values.
    */
   @Since("1.2.0")
   def addGrid(param: IntParam, values: Array[Int]): this.type = {

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala
@@ -126,7 +126,7 @@ private[fpm] object FPTree {
     def isRoot: Boolean = parent == null
   }
 
-  /** Summary of a item in an FP-Tree. */
+  /** Summary of an item in an FP-Tree. */
   private class Summary[T] extends Serializable {
     var count: Long = 0L
     val nodes: ListBuffer[Node[T]] = ListBuffer.empty

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -24,7 +24,7 @@ import org.apache.spark.mllib.linalg.{Vector, Vectors}
  * :: DeveloperApi ::
  * MultivariateOnlineSummarizer implements [[MultivariateStatisticalSummary]] to compute the mean,
  * variance, minimum, maximum, counts, and nonzero counts for instances in sparse or dense vector
- * format in a online fashion.
+ * format in an online fashion.
  *
  * Two MultivariateOnlineSummarizer can be merged together to have a statistical summary of
  * the corresponding joint dataset.

diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
@@ -648,7 +648,7 @@ def predict(self, x):
 
     @since("1.4.0")
     def save(self, sc, path):
-        """Save a IsotonicRegressionModel."""
+        """Save an IsotonicRegressionModel."""
         java_boundaries = _py2java(sc, self.boundaries.tolist())
         java_predictions = _py2java(sc, self.predictions.tolist())
         java_model = sc._jvm.org.apache.spark.mllib.regression.IsotonicRegressionModel(
@@ -658,7 +658,7 @@ def save(self, sc, path):
     @classmethod
     @since("1.4.0")
     def load(cls, sc, path):
-        """Load a IsotonicRegressionModel."""
+        """Load an IsotonicRegressionModel."""
         java_model = sc._jvm.org.apache.spark.mllib.regression.IsotonicRegressionModel.load(
             sc._jsc.sc(), path)
         py_boundaries = _java2py(sc, java_model.boundaryVector()).toArray()
@@ -694,7 +694,7 @@ class IsotonicRegression(object):
     @since("1.4.0")
     def train(cls, data, isotonic=True):
         """
-        Train a isotonic regression model on the given data.
+        Train an isotonic regression model on the given data.
 
         :param data:
           RDD of (label, feature, weight) tuples.

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
@@ -1177,7 +1177,7 @@ def sha2(col, numBits):
 
 @since(2.0)
 def hash(*cols):
-    """Calculates the hash code of given columns, and returns the result as a int column.
+    """Calculates the hash code of given columns, and returns the result as an int column.
 
     >>> spark.createDataFrame([('ABC',)], ['a']).select(hash('a').alias('hash')).collect()
     [Row(hash=-757602832)]

diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
@@ -847,7 +847,7 @@ def orc(self, path, mode=None, partitionBy=None, compression=None):
 
     @since(1.4)
     def jdbc(self, url, table, mode=None, properties=None):
-        """Saves the content of the :class:`DataFrame` to a external database table via JDBC.
+        """Saves the content of the :class:`DataFrame` to an external database table via JDBC.
 
         .. note:: Don't create too many partitions in parallel on a large cluster; \
         otherwise Spark might crash your external database systems.

diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py
@@ -623,7 +623,7 @@ def __init__(self, prev, func):
         self._jdstream_val = None
 
         # Using type() to avoid folding the functions and compacting the DStreams which is not
-        # not strictly a object of TransformedDStream.
+        # not strictly an object of TransformedDStream.
         # Changed here is to avoid bug in KafkaTransformedDStream when calling offsetRanges().
         if (type(prev) is TransformedDStream and
                 not prev.is_cached and not prev.is_checkpointed):

diff --git a/python/pyspark/streaming/kafka.py b/python/pyspark/streaming/kafka.py
@@ -228,7 +228,7 @@ class OffsetRange(object):
 
     def __init__(self, topic, partition, fromOffset, untilOffset):
         """
-        Create a OffsetRange to represent  range of offsets
+        Create an OffsetRange to represent range of offsets
         :param topic: Kafka topic name.
         :param partition: Kafka partition id.
         :param fromOffset: Inclusive starting offset.

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -215,7 +215,7 @@ trait CheckAnalysis extends PredicateHelper {
               if (!RowOrdering.isOrderable(expr.dataType)) {
                 failAnalysis(
                   s"expression ${expr.sql} cannot be used as a grouping expression " +
-                    s"because its data type ${expr.dataType.simpleString} is not a orderable " +
+                    s"because its data type ${expr.dataType.simpleString} is not an orderable " +
                     s"data type.")
               }
 

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala
@@ -37,7 +37,7 @@ object TypeCheckResult {
 
   /**
    * Represents the failing result of `Expression.checkInputDataTypes`,
-   * with a error message to show the reason of failure.
+   * with an error message to show the reason of failure.
    */
   case class TypeCheckFailure(message: String) extends TypeCheckResult {
     def isSuccess: Boolean = false

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -178,8 +178,8 @@ object TypeCoercion {
         q transformExpressions {
           case a: AttributeReference =>
             inputMap.get(a.exprId) match {
-              // This can happen when a Attribute reference is born in a non-leaf node, for example
-              // due to a call to an external script like in the Transform operator.
+              // This can happen when an Attribute reference is born in a non-leaf node, for
+              // example due to a call to an external script like in the Transform operator.
               // TODO: Perhaps those should actually be aliases?
               case None => a
               // Leave the same if the dataTypes match.

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
@@ -26,7 +26,7 @@ object JarResource extends FunctionResourceType("jar")
 
 object FileResource extends FunctionResourceType("file")
 
-// We do not allow users to specify a archive because it is YARN specific.
+// We do not allow users to specify an archive because it is YARN specific.
 // When loading resources, we will throw an exception and ask users to
 // use --archive with spark submit.
 object ArchiveResource extends FunctionResourceType("archive")

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.types._
 
 /**
  * Returns the first value of `child` for a group of rows. If the first value of `child`
- * is `null`, it returns `null` (respecting nulls). Even if [[First]] is used on a already
+ * is `null`, it returns `null` (respecting nulls). Even if [[First]] is used on an already
  * sorted column, if we do partial aggregation and final aggregation (when mergeExpression
  * is used) its result will not be deterministic (unless the input table is sorted and has
  * a single partition, and we use a single reducer to do the aggregation.).

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.types._
 
 /**
  * Returns the last value of `child` for a group of rows. If the last value of `child`
- * is `null`, it returns `null` (respecting nulls). Even if [[Last]] is used on a already
+ * is `null`, it returns `null` (respecting nulls). Even if [[Last]] is used on an already
  * sorted column, if we do partial aggregation and final aggregation (when mergeExpression
  * is used) its result will not be deterministic (unless the input table is sorted and has
  * a single partition, and we use a single reducer to do the aggregation.).

diff --git a/...alyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala b/...alyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala
@@ -51,7 +51,7 @@ object PivotFirst {
 }
 
 /**
- * PivotFirst is a aggregate function used in the second phase of a two phase pivot to do the
+ * PivotFirst is an aggregate function used in the second phase of a two phase pivot to do the
  * required rearrangement of values into pivoted form.
  *
  * For example on an input of

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -182,7 +182,7 @@ case class Literal protected (value: Any, dataType: DataType)
 
   override protected def jsonFields: List[JField] = {
     // Turns all kinds of literal values to string in json field, as the type info is hard to
-    // retain in json format, e.g. {"a": 123} can be a int, or double, or decimal, etc.
+    // retain in json format, e.g. {"a": 123} can be an int, or double, or decimal, etc.
     val jsonValue = (value, dataType) match {
       case (null, _) => JNull
       case (i: Int, DateType) => JString(DateTimeUtils.toJavaDate(i).toString)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
@@ -214,7 +214,7 @@ class GenericRowWithSchema(values: Array[Any], override val schema: StructType)
 }
 
 /**
- * A internal row implementation that uses an array of objects as the underlying storage.
+ * An internal row implementation that uses an array of objects as the underlying storage.
  * Note that, while the array is not copied, and thus could technically be mutated after creation,
  * this is not allowed.
  */

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
@@ -129,7 +129,7 @@ object PredicateSubquery {
 
 /**
  * A [[ListQuery]] expression defines the query which we want to search in an IN subquery
- * expression. It should and can only be used in conjunction with a IN expression.
+ * expression. It should and can only be used in conjunction with an IN expression.
  *
  * For example (SQL):
  * {{{

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -926,7 +926,7 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper {
       case e @ CaseWhen(branches, _) if branches.headOption.map(_._1) == Some(TrueLiteral) =>
         // If the first branch is a true literal, remove the entire CaseWhen and use the value
         // from that. Note that CaseWhen.branches should never be empty, and as a result the
-        // headOption (rather than head) added above is just a extra (and unnecessary) safeguard.
+        // headOption (rather than head) added above is just an extra (and unnecessary) safeguard.
         branches.head._2
     }
   }

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -59,7 +59,7 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
     }
   }
 
-  /** Get the builder (visitor) which converts a ParseTree into a AST. */
+  /** Get the builder (visitor) which converts a ParseTree into an AST. */
   protected def astBuilder: AstBuilder
 
   protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = {

diff --git a/...st/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/...st/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -530,7 +530,7 @@ private[sql] object Expand {
 
   /**
    * Apply the all of the GroupExpressions to every input row, hence we will get
-   * multiple output rows for a input row.
+   * multiple output rows for an input row.
    *
    * @param bitmasks The bitmask set represents the grouping sets
    * @param groupByAliases The aliased original group by expressions
@@ -572,7 +572,7 @@ private[sql] object Expand {
 
 /**
  * Apply a number of projections to every input row, hence we will get multiple output rows for
- * a input row.
+ * an input row.
  *
  * @param projections to apply
  * @param output of all projections.

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/package.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst
 
 /**
- * A a collection of common abstractions for query plans as well as
+ * A collection of common abstractions for query plans as well as
  * a base logical plan representation.
  */
 package object plans