-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
179 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,6 +40,7 @@ dependencies: | |
- streaming-utils | ||
- warc | ||
- HTTP | ||
- fgl | ||
|
||
library: | ||
source-dirs: src | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
-- Sorry for the awful module; I stole it from graph-utils which no longer | ||
-- compiles. | ||
{-# LANGUAGE BangPatterns #-} | ||
|
||
module Data.Graph.PageRank (pageRanks) where | ||
|
||
import Control.Monad.RWS (RWS(..), asks, gets, execRWS, get, put) | ||
import Data.Graph.Inductive hiding (size) | ||
import Prelude hiding (lookup) | ||
import Data.Map (Map, lookup, fromList, foldrWithKey, findWithDefault) | ||
import Data.Maybe (fromJust) | ||
import Control.Monad | ||
import Debug.Trace (traceM) | ||
|
||
data Env = Env {node :: [Node], size :: Int, from :: Map Node [Node], outdegrees :: Map Node Int} | ||
|
||
-- |'RankDic' is the Map for holding PageRank data. | ||
type PRMachine = RWS Env () (Map Node Double) | ||
|
||
lookupEnv :: (Ord a) => (Env -> Map a b) -> a -> PRMachine b | ||
lookupEnv f a = do{ dic<-asks f; pure $ fromJust $ lookup a dic} | ||
|
||
outdegree :: Node -> PRMachine Int | ||
outdegree = lookupEnv outdegrees | ||
|
||
froms :: Node -> PRMachine [Node] | ||
froms = lookupEnv from | ||
|
||
currentRank :: Node -> PRMachine Double | ||
currentRank nd = gets (fromJust.lookup nd) | ||
|
||
-- |'pageRanks' calculate the PageRank for each node in the Graph 'gr' | ||
pageRanks :: Graph gr => gr a b -> Double -> Double -> Map Node Double | ||
pageRanks gr epsilon error = fst $ execRWS (steps 0) Env{node=nds, size=count, from=froms, outdegrees=outdegs} initRanks | ||
where nds = nodes gr | ||
count :: (Num a) => a | ||
count = fromIntegral $ noNodes gr | ||
froms = fromList $ zip nds $ fmap (pre gr) nds | ||
outdegs = fromList $ zip nds $ fmap (outdeg gr) nds | ||
initRanks = fromList $ zip nds $ replicate count (1/count) | ||
steps 1000 = get | ||
steps n = do | ||
!_ <- traceM $ "stepping " <> show n | ||
old <- get | ||
new <- calcPageRank epsilon | ||
let cond = foldrWithKey (\k a b -> b && ((findWithDefault (1/0) k new)-a < error)) True old | ||
if cond then pure new else steps (n + 1) | ||
|
||
|
||
|
||
calcPageRank :: Double -> PRMachine (Map Node Double) | ||
calcPageRank epsilon = do | ||
nds <- asks node | ||
count <- asks $ fromIntegral . size | ||
dic <- forM nds $ \n -> do | ||
frms <- froms n | ||
ranks <- forM frms $ \m -> do | ||
deg <- outdegree m | ||
rank <- currentRank m | ||
pure (rank/fromIntegral deg) | ||
pure (n, epsilon/count + (1-epsilon)*(sum ranks)) | ||
let rdic = fromList dic | ||
put rdic | ||
pure rdic | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,3 +26,6 @@ rank | |
, round $ (fromIntegral para / fromIntegral heads) * 5 | ||
] | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
{-# LANGUAGE DuplicateRecordFields #-} | ||
{-# LANGUAGE RankNTypes #-} | ||
{-# LANGUAGE ScopedTypeVariables #-} | ||
{-# LANGUAGE TupleSections #-} | ||
{-# LANGUAGE TypeApplications #-} | ||
|
||
module WebGraph where | ||
|
||
import DB | ||
import Rel8 | ||
import Control.Applicative | ||
import Data.Int (Int64) | ||
import Hasql.Statement | ||
import Hasql.Connection (Connection, acquire) | ||
import Hasql.Session | ||
import Data.Graph.PageRank (pageRanks) | ||
import Data.Map (toDescList) | ||
import Streaming (Stream, Of) | ||
import qualified Streaming as S | ||
import qualified Streaming.ByteString.Char8 as SB | ||
import qualified Streaming.Prelude as S | ||
import Data.ByteString.Streaming.HTTP (MonadResource, runResourceT) | ||
import Data.ByteString.Char8 (ByteString) | ||
import Data.Function ((&)) | ||
import qualified Data.Text as T | ||
import Data.Text.Encoding (decodeUtf8) | ||
import Data.Bifunctor (bimap) | ||
import Control.Monad.Trans (lift) | ||
|
||
pagerankCsv :: forall m. MonadResource m => Stream (Of (DocId, Double)) m () | ||
pagerankCsv | ||
= SB.readFile @m "/home/sandy/pagerank.csv" | ||
& SB.lines | ||
& S.mapped SB.toStrict | ||
& S.drop 1 | ||
& S.map (break (== ',') . T.unpack . decodeUtf8) | ||
& S.map (bimap (DocId . read) $ read . tail) | ||
|
||
main :: IO () | ||
main = do | ||
Right conn <- acquire connectionSettings | ||
runResourceT $ do | ||
flip S.mapM_ (S.zip (S.each [0..]) pagerankCsv) $ \x@(n, (d, pr)) -> lift $ do | ||
print n | ||
flip run conn $ statement () $ update $ Update | ||
{ target = discoverySchema | ||
, from = pure () | ||
, set = const $ \e -> e { d_rank = lit pr } | ||
, updateWhere = const $ \e -> d_docId e ==. lit d | ||
, returning = pure () | ||
} | ||
|
||
-- gr <- buildGraph conn | ||
-- print "built graph" | ||
-- let pr = pageRanks gr 0.1 0.1 | ||
-- print $ head $ toDescList $ pr | ||
|
||
-- buildGraph :: Connection -> IO (Gr () ()) | ||
-- buildGraph conn = do | ||
-- Right nodes <- flip run conn $ statement () $ select selNodes | ||
-- print "got nodes" | ||
-- Right edges <- flip run conn $ statement () $ select selEdges | ||
-- print "got edges" | ||
-- let f = fromIntegral . unDocId | ||
-- pure $ mkGraph (fmap (\ di -> (f di, ())) nodes) (fmap (\ (src, dst) -> (f src, f dst, ())) edges) | ||
|
||
|
||
-- selEdges :: Query (Expr DocId, Expr DocId) | ||
-- selEdges = do | ||
-- Edges _ src dst _ <- each edgesSchema | ||
-- pure $ (src, dst) | ||
|
||
|
||
-- selNodes :: Query (Expr DocId) | ||
-- selNodes = do | ||
-- d <- each discoverySchema | ||
-- pure $ d_docId d | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters