{-# language OverloadedStrings #-}
{-# language DeriveGeneric #-}
{-# language GeneralizedNewtypeDeriving #-}
-- | Haskell bindings for the CoNaLa dataset [1], Code/Natural Language Challenge
--
-- This challenge was designed to test systems for generating program snippets from natural language. For example, if the input is sort list x in reverse order, then the system would be required to output x.sort(reverse=True) in Python.
--
-- References
--
-- 1. https://conala-corpus.github.io/
module CoNaLa (sourceDataset
               -- * Types
              , Item(..)
              , QId(..)) where

import GHC.Generics (Generic)

import Data.Aeson (FromJSON(..), ToJSON(..), withObject, (.:))
import Conduit (ConduitT, runConduitRes, sourceFile, (.|), printC, MonadResource)
import Data.Conduit.Aeson (conduitArrayEither, ParserError)
import Data.Text (Text)

t0 :: IO ()
t0 :: IO ()
t0 = do
  let
    fpath :: FilePath
fpath = FilePath
"test/cdata.txt"
  ConduitT () Void (ResourceT IO) () -> IO ()
forall (m :: * -> *) r.
MonadUnliftIO m =>
ConduitT () Void (ResourceT m) r -> m r
runConduitRes (ConduitT () Void (ResourceT IO) () -> IO ())
-> ConduitT () Void (ResourceT IO) () -> IO ()
forall a b. (a -> b) -> a -> b
$ FilePath -> ConduitT () (Either ParserError Item) (ResourceT IO) ()
forall (m :: * -> *) a.
MonadResource m =>
FilePath -> ConduitT a (Either ParserError Item) m ()
sourceDataset FilePath
fpath ConduitT () (Either ParserError Item) (ResourceT IO) ()
-> ConduitM (Either ParserError Item) Void (ResourceT IO) ()
-> ConduitT () Void (ResourceT IO) ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM (Either ParserError Item) Void (ResourceT IO) ()
forall a (m :: * -> *) o. (Show a, MonadIO m) => ConduitT a o m ()
printC

-- | Stream the dataset from file
--
-- As of May 2022 the dataset can be downloaded from http://www.phontron.com/download/conala-corpus-v1.1.zip
sourceDataset
  :: (MonadResource m) =>
     FilePath -- ^ path of dataset file
  -> ConduitT a (Either ParserError Item) m ()
sourceDataset :: FilePath -> ConduitT a (Either ParserError Item) m ()
sourceDataset FilePath
fp = FilePath -> ConduitT a ByteString m ()
forall (m :: * -> *) i.
MonadResource m =>
FilePath -> ConduitT i ByteString m ()
sourceFile FilePath
fp ConduitT a ByteString m ()
-> ConduitM ByteString (Either ParserError Item) m ()
-> ConduitT a (Either ParserError Item) m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
                   ConduitM ByteString (Either ParserError Item) m ()
forall v (m :: * -> *).
(FromJSON v, Monad m) =>
ConduitM ByteString (Either ParserError v) m ()
conduitArrayEither

-- | Dataset item
data Item = Item {
  Item -> Text
intent :: Text -- ^ Natural Language intent (i.e., the title of a Stack Overflow question)
  , Item -> Text
rewritten_intent :: Text -- ^ Crowdsourced revised intents that try to better reflect the full meaning of the code, typically done by incorporating variable names and function arguments that appeared in the code into the intent. This is the input to be used by systems in the CoNaLa challenge.
  , Item -> Text
snippet :: Text -- ^ Python code snippet that implements the intent. This is the output of systems in the challenge.
  , Item -> QId
question_id :: QId -- ^ Id of the Stack Overflow question
                 } deriving (Item -> Item -> Bool
(Item -> Item -> Bool) -> (Item -> Item -> Bool) -> Eq Item
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Item -> Item -> Bool
$c/= :: Item -> Item -> Bool
== :: Item -> Item -> Bool
$c== :: Item -> Item -> Bool
Eq, Int -> Item -> ShowS
[Item] -> ShowS
Item -> FilePath
(Int -> Item -> ShowS)
-> (Item -> FilePath) -> ([Item] -> ShowS) -> Show Item
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [Item] -> ShowS
$cshowList :: [Item] -> ShowS
show :: Item -> FilePath
$cshow :: Item -> FilePath
showsPrec :: Int -> Item -> ShowS
$cshowsPrec :: Int -> Item -> ShowS
Show, (forall x. Item -> Rep Item x)
-> (forall x. Rep Item x -> Item) -> Generic Item
forall x. Rep Item x -> Item
forall x. Item -> Rep Item x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Item x -> Item
$cfrom :: forall x. Item -> Rep Item x
Generic)
instance FromJSON Item
instance ToJSON Item

-- | Stack Overflow Question ID
newtype QId = QId Int deriving (QId -> QId -> Bool
(QId -> QId -> Bool) -> (QId -> QId -> Bool) -> Eq QId
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: QId -> QId -> Bool
$c/= :: QId -> QId -> Bool
== :: QId -> QId -> Bool
$c== :: QId -> QId -> Bool
Eq, Int -> QId -> ShowS
[QId] -> ShowS
QId -> FilePath
(Int -> QId -> ShowS)
-> (QId -> FilePath) -> ([QId] -> ShowS) -> Show QId
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [QId] -> ShowS
$cshowList :: [QId] -> ShowS
show :: QId -> FilePath
$cshow :: QId -> FilePath
showsPrec :: Int -> QId -> ShowS
$cshowsPrec :: Int -> QId -> ShowS
Show, (forall x. QId -> Rep QId x)
-> (forall x. Rep QId x -> QId) -> Generic QId
forall x. Rep QId x -> QId
forall x. QId -> Rep QId x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep QId x -> QId
$cfrom :: forall x. QId -> Rep QId x
Generic, Value -> Parser [QId]
Value -> Parser QId
(Value -> Parser QId) -> (Value -> Parser [QId]) -> FromJSON QId
forall a.
(Value -> Parser a) -> (Value -> Parser [a]) -> FromJSON a
parseJSONList :: Value -> Parser [QId]
$cparseJSONList :: Value -> Parser [QId]
parseJSON :: Value -> Parser QId
$cparseJSON :: Value -> Parser QId
FromJSON, [QId] -> Encoding
[QId] -> Value
QId -> Encoding
QId -> Value
(QId -> Value)
-> (QId -> Encoding)
-> ([QId] -> Value)
-> ([QId] -> Encoding)
-> ToJSON QId
forall a.
(a -> Value)
-> (a -> Encoding)
-> ([a] -> Value)
-> ([a] -> Encoding)
-> ToJSON a
toEncodingList :: [QId] -> Encoding
$ctoEncodingList :: [QId] -> Encoding
toJSONList :: [QId] -> Value
$ctoJSONList :: [QId] -> Value
toEncoding :: QId -> Encoding
$ctoEncoding :: QId -> Encoding
toJSON :: QId -> Value
$ctoJSON :: QId -> Value
ToJSON)



{-
    "intent": "How to convert a list of multiple integers into a single integer?",
    "rewritten_intent": "Concatenate elements of a list 'x' of multiple integers to a single integer",
    "snippet": "sum(d * 10 ** i for i, d in enumerate(x[::-1]))",
    "question_id": 41067960
  },
-}