diff --git a/examples/binary_classification/Main.hs b/examples/binary_classification/Main.hs index b6add3a..90db014 100644 --- a/examples/binary_classification/Main.hs +++ b/examples/binary_classification/Main.hs @@ -47,7 +47,9 @@ main = do LGBM.trainNewModel modelName trainParams trainingData testData 100 case model of Left e -> print e - Right m -> LGBM.predict m testData predictionFile + Right m -> do + _ <- LGBM.predict m testData predictionFile + return () modelB <- fileDiff modelName "golden_model.txt" modelP <- fileDiff predictionFile "golden_prediction.txt" diff --git a/examples/lambdarank/Main.hs b/examples/lambdarank/Main.hs index e439016..48f3b1f 100644 --- a/examples/lambdarank/Main.hs +++ b/examples/lambdarank/Main.hs @@ -46,7 +46,9 @@ main = do case model of Left e -> print e - Right m -> LGBM.predict m testData predictionFile + Right m -> do + _ <- LGBM.predict m testData predictionFile + return () modelB <- fileDiff modelName "golden_model.txt" modelP <- fileDiff predictionFile "golden_prediction.txt" diff --git a/examples/multiclass_classification/Main.hs b/examples/multiclass_classification/Main.hs index 087a5ac..210a1f3 100644 --- a/examples/multiclass_classification/Main.hs +++ b/examples/multiclass_classification/Main.hs @@ -40,7 +40,9 @@ main = do LGBM.trainNewModel modelName trainParams trainingData testData 100 case model of Left e -> print e - Right m -> LGBM.predict m testData predictionFile + Right m -> do + _ <- LGBM.predict m testData predictionFile + return () modelB <- fileDiff modelName "golden_model.txt" modelP <- fileDiff predictionFile "golden_prediction.txt" diff --git a/examples/regression/Main.hs b/examples/regression/Main.hs index eaea800..fe3b70d 100644 --- a/examples/regression/Main.hs +++ b/examples/regression/Main.hs @@ -45,7 +45,9 @@ main = do LGBM.trainNewModel modelName trainParams trainingData testData 100 case model of Left e -> print e - Right m -> LGBM.predict m testData predictionFile + Right m -> do + _ <- LGBM.predict m testData predictionFile + return () modelB <- fileDiff modelName "golden_model.txt" modelP <- fileDiff predictionFile "golden_prediction.txt" diff --git a/examples/titanic/Main.hs b/examples/titanic/Main.hs index 4f6beee..3801d9c 100644 --- a/examples/titanic/Main.hs +++ b/examples/titanic/Main.hs @@ -69,9 +69,8 @@ trainModel = Right m -> do print $ "Model trained and saved to file: " ++ modelName - LGBM.predict m validationData predictionFile - predictions <- - map read . lines <$> readFile predictionFile :: IO [Double] + predictionSet <- LGBM.predict m validationData predictionFile + predictions <- LGBM.dsToList predictionSet :: IO [Double] valData <- BSL.readFile valFile let knowns = V.toList $ readColumn 0 CSV.HasHeader valData :: [Int] print $ "Self Accuracy: " ++ show (accuracy (round <$> predictions) knowns :: Double) @@ -91,7 +90,7 @@ main = do hClose testHandle TMP.withSystemTempFile "predictions" $ \predFile predHandle -> do hClose predHandle - LGBM.predict m (loadData testFile) predFile + _ <- LGBM.predict m (loadData testFile) predFile withFile "TitanicSubmission.csv" WriteMode $ \submHandle -> do testBytes <- BSL.readFile testFile diff --git a/src/LightGBM.hs b/src/LightGBM.hs index 073f3dc..aba146b 100644 --- a/src/LightGBM.hs +++ b/src/LightGBM.hs @@ -44,6 +44,7 @@ module LightGBM loadDataFromFile , DataSet , HasHeader(..) + , dsToList -- * Models , Model , trainNewModel @@ -85,6 +86,10 @@ newtype HasHeader = HasHeader loadDataFromFile :: HasHeader -> FilePath -> DataSet loadDataFromFile = flip DataSet +-- | Convert a DataSet into a list of records for whatever type is relevant. +dsToList :: Read a => DataSet -> IO [a] +dsToList ds = map read . lines <$> readFile (dataPath ds) + -- | A model to use to make predictions data Model = Model { modelPath :: FilePath @@ -117,18 +122,13 @@ trainNewModel modelOutputPath trainingParams trainingData validationData numRoun loadModelFromFile :: FilePath -> Model loadModelFromFile = Model --- FIXME: --- - we might want to return the predictions in a better form --- than just the file... --- - Duplication of the exec path between predict and --- train. Use a Reader monad maybe? -- | Predict the results of new inputs and persist the results to an -- output file. predict :: Model -- ^ A model to do prediction with -> DataSet -- ^ The new input data for prediction -> FilePath -- ^ Where to persist the prediction outputs - -> IO () + -> IO DataSet -- ^ The prediction output DataSet predict model inputData predictionOutputPath = do let dataParams = [P.HasHeader (getHeader . hasHeader $ inputData)] runParams = @@ -138,4 +138,4 @@ predict model inputData predictionOutputPath = do , P.OutputResult predictionOutputPath ] _ <- CLW.run lightgbmExe $ concat [dataParams, runParams] - return () + return $ DataSet predictionOutputPath (HasHeader False)