import React from "react";
import styled from "@emotion/styled";
import { Global, css } from "@emotion/react";

import Cards from "./components/Cards";
import { mq } from "./utilities/styles";

import FirstFig from "./assets/first_fig.png";
import Flowchart from "./assets/flowchart.png";
import Benchmark from "./assets/benchmark.png";
import Ablation from "./assets/ablation.png";
import Interpretation from "./assets/interpretation.png";
import Github from "./assets/github.png";
import Paper from "./assets/paper.png";

import test48 from './assets/imgs/good/test48.gif'
import test333 from './assets/imgs/good/test333.gif'
import test161 from './assets/imgs/good/test161.gif'
import test841 from './assets/imgs/good/test841.gif'
import test222 from './assets/imgs/good/test222.gif'
import test770 from './assets/imgs/good/test770.gif'
import test271 from './assets/imgs/good/test271.gif'
import test866 from './assets/imgs/good/test866.gif'
import test140 from './assets/imgs/good/test140.gif'
import test1112 from './assets/imgs/good/test1112.gif'
import test374 from './assets/imgs/good/test374.gif'
import test368 from './assets/imgs/good/test368.gif'
import test56 from './assets/imgs/good/test56.gif'
import test329 from './assets/imgs/good/test329.gif'
import test431 from './assets/imgs/good/test431.gif'
import test381 from './assets/imgs/good/test381.gif'
import test22 from './assets/imgs/good/test22.gif'
import test406 from './assets/imgs/good/test406.gif'
import test669 from './assets/imgs/good/test669.gif'
import test304 from './assets/imgs/good/test304.gif'
import test366 from './assets/imgs/good/test366.gif'
import test1150 from './assets/imgs/good/test1150.gif'
import test35 from './assets/imgs/good/test35.gif'
import test169 from './assets/imgs/good/test169.gif'
import test388 from './assets/imgs/good/test388.gif'
import test1011 from './assets/imgs/good/test1011.gif'
import test322 from './assets/imgs/good/test322.gif'
import test276 from './assets/imgs/good/test276.gif'
import test708 from './assets/imgs/good/test708.gif'
import test349 from './assets/imgs/good/test349.gif'
import ReactGA from "react-ga";

import test99 from './assets/imgs/bad/test99.gif'
import test481 from './assets/imgs/bad/test481.gif'
import test378 from './assets/imgs/bad/test378.gif'
import test86 from './assets/imgs/bad/test86.gif'
import test26 from './assets/imgs/bad/test26.gif'
import test266 from './assets/imgs/bad/test266.gif'
import test542 from './assets/imgs/bad/test542.gif'
import test453 from './assets/imgs/bad/test453.gif'
import test2 from './assets/imgs/bad/test2.gif'
import test66 from './assets/imgs/bad/test66.gif'
import test43 from './assets/imgs/bad/test43.gif'
import test846 from './assets/imgs/bad/test846.gif'
import test40 from './assets/imgs/bad/test40.gif'
import test1114 from './assets/imgs/bad/test1114.gif'
import test396 from './assets/imgs/bad/test396.gif'
const images_bad = [test99,test481,test378,test86,test26,test266,test542,test453,
                    test2,test66,test43,test846,test40,test1114,test396];
const images = [test48,test333,test161,test841,test222,test770,test271,
  test866,test140,test1112,test374,test368,test56,test329,
  test431,test381,test22,test406,test669,test304,test366,test1150,
  test35,test169,test388,test1011,test322,test276,test708,test349];
ReactGA.initialize("G-2RT0REBNBV");
ReactGA.pageview(window.location.pathname + window.location.search);

function App() {
  return (
    <Container>
      <Global
        styles={css`
          body {
            font-family: "Inter", sans-serif;
          }
        `}
      />

      <BlackContainer>
        <Section1>
          <Subtitle>Introducing</Subtitle>
          <Title>Mind-Video</Title>
          <PaperTitle
            href="https://arxiv.org/abs/2305.11675"
            target="_blank"
            rel="noopener noreferrer"
          >
            Cinematic Mindscapes: High-quality Video Reconstruction from Brain
            Activity
          </PaperTitle>
          <TwoColumn>
            <Left>
              <Description>
                We propose Mind-Video, which progressively learns spatiotemporal
                information from continuous fMRI data through masked brain
                modeling + multimodal contrastive learning + spatiotemporal
                attention + co-training with an augmented Stable Diffusion model
                that incorporates network temporal inflation. 
                <br />
                <br />
                This work has been accepted by NeurIPS 2023 for oral presentation.
                <br />
                <br />
                This is an extension of our previous fMRI-Image reconstruction
                work:{" "}
                <a
                  href="https://mind-vis.github.io/"
                  target="_blank"
                  rel="noopener noreferrer"
                >
                  MinD-Vis
                </a>
                (CVPR 2023)
              </Description>
              <LogoSection>
                <a
                  href="https://arxiv.org/abs/2305.11675"
                  target="_blank"
                  rel="noopener noreferrer"
                  style={{ marginRight: 15 }}
                >
                  <BigLogo src={Paper} alt="paper" />
                </a>
                <a
                  href="https://github.com/jqin4749/MindVideo"
                  target="_blank"
                  rel="noopener noreferrer"
                  style={{ marginLeft: 15 }}
                >
                  <BigLogo src={Github} alt="github" />
                </a>
              </LogoSection>
            </Left>
            <Right>
              <CarouselDescription>
                <LeftText>Ground truth Videos</LeftText>
                <RightText>Reconstructed Videos</RightText>
              </CarouselDescription>
              <Cards />
            </Right>
          </TwoColumn>
        </Section1>

        <Section2>
          <SmallColumn>
            <SmallText>May 15, 2023</SmallText>
          </SmallColumn>
          <SmallColumn>
            <SmallLink
              href="https://arxiv.org/abs/2305.11675"
              target="_blank"
              rel="noopener noreferrer"
            >
              Read Paper
            </SmallLink>
            <SmallLink
              href="https://github.com/jqin4749/MindVideo"
              target="_blank"
              rel="noopener noreferrer"
            >
              View Code
            </SmallLink>
            <SmallLink
              href="https://drive.google.com/drive/folders/1swYQD-69phlJUz4_HmdM0RFk_7okLK4v"
              target="_blank"
              rel="noopener noreferrer"
            >
              More Samples
            </SmallLink>
          </SmallColumn>
          <SmallColumn>
            <SmallLink
              href="https://scholar.google.com/citations?user=gCTUx9oAAAAJ&hl=en"
              target="_blank"
              rel="noopener noreferrer"
            >
              Zijiao Chen*
            </SmallLink>
            <SmallLink
              href="https://scholar.google.com/citations?user=jpUlRiYAAAAJ&hl=en"
              target="_blank"
              rel="noopener noreferrer"
            >
              Jiaxin Qing*
            </SmallLink>
            <SmallLink
              href="https://scholar.google.com.sg/citations?user=4Z1S3_oAAAAJ&hl=en"
              target="_blank"
              rel="noopener noreferrer"
            >
              Juan Helen Zhou<sup style={{ fontSize: '0.5em' }}>#</sup>
            </SmallLink>

          </SmallColumn>

          <LargeColumn>
            <SmallLink
              href="https://medicine.nus.edu.sg/csc/"
              target="_blank"
              rel="noopener noreferrer"
            >
              National University of Singapore, Center for Sleep and Cognition,
              Centre for Translational Magnetic Resonance Research
            </SmallLink>
            <SmallLink
              href="https://www.ie.cuhk.edu.hk/main/index.shtml"
              target="_blank"
              rel="noopener noreferrer"
            >
              The Chinese University of Hong Kong, Department of Information
              Engineering
            </SmallLink>
            <SmallLink
              href="https://medicine.nus.edu.sg/csc/"
              target="_blank"
              rel="noopener noreferrer"
            >
              National University of Singapore, Center for Sleep and Cognition,
              Centre for Translational Magnetic Resonance Research
            </SmallLink>
          </LargeColumn>
    
        </Section2>
        <SmallText>*Equal contribution &nbsp; <sup>#</sup>Corresponding author</SmallText> 
      </BlackContainer>

      <WhiteContainer>
        <ContentBody>
          <Header>Motivation & Research Gap</Header>
          <Diagram src={FirstFig} alt="first_fig" />
          <Caption>Brain decoding & video reconstruction</Caption>
          <Text>
            Reconstructing human vision from brain activities has been an
            appealing task that helps to understand our cognitive process. Even
            though recent research has seen great success in reconstructing
            static images from non-invasive brain recordings, work on recovering
            continuous visual experiences in the form of videos is limited.
          </Text>
          <Text>
            We identified three gaps between video reconstruction and our
            previous image reconstruction work:
          </Text>
          <ul>
            <ListItem>
              The hemodynamic response results in a time delay when processing
              dynamic neural activities. This time lag can make it challenging
              to accurately track real-time brain responses to stimuli.
            </ListItem>
            <ListItem>
              Our previous work, Mind-Vis, currently lacks both pixel-level and
              semantic-level guidance. This omission could impact the tool's
              effectiveness in generating accurate reconstructions.
            </ListItem>
            <ListItem>
              There is a need to enhance the generation consistency in our
              process while ensuring the dynamics of the scene within one fMRI
              frame are preserved. This balance is key to accurate and stable
              reconstruction over one fMRI time frame.
            </ListItem>
          </ul>

          <Header>Mind-Video Design</Header>
          <Diagram src={Flowchart} alt="flowchart" />
          <Text>
            In this work, we present Mind-Video, a two-module pipeline designed
            to bridge the gap between image and video brain decoding. These two
            modules are trained separately, then finetuned together.{" "}
          </Text>
          <Text>
            Our model progressively learns from brain signals, gaining a deeper
            understanding of the semantic space through multiple stages in the
            first module.{" "}
          </Text>
          <ul>
            <ListItem>
              Initially, we leverage large-scale unsupervised learning with
              masked brain modeling to learn general visual fMRI features. A
              spatiotemporal attention is also designed to process multiple fMRI
              in a sliding window.{" "}
            </ListItem>
            <ListItem>
              We then distill semantic-related features using the multimodality
              of the annotated dataset, training the fMRI encoder in the CLIP
              space with contrastive learning.{" "}
            </ListItem>
            <ListItem>
              In the second module, the learned features are fine-tuned through
              co-training with an augmented stable diffusion model, which is
              specifically tailored for video generation under fMRI guidance.
            </ListItem>
          </ul>

          <Header>Contribution</Header>
          <ul>
            <ListItem>
              We introduced a flexible and adaptable brain decoding pipeline
              decoupled into two modules: an fMRI encoder and an augmented
              stable diffusion model, trained separately and finetuned together.{" "}
            </ListItem>
            <ListItem>
              We designed a progressive learning scheme where the encoder learns
              brain features through multiple stages, including multimodal
              contrastive learning with spatiotemporal attention for windowed
              fMRI.{" "}
            </ListItem>
            <ListItem>
              We recovered high-quality videos with accurate semantics, e.g.,
              motions and scene dynamics. Results are evaluated with semantic
              and pixel metrics at video and frame levels. An accuracy of 85% is
              achieved in semantic metrics and 0.19 in SSIM, outperforming the
              previous state-of-the-art approaches by 45%.
            </ListItem>
            <ListItem>
              The attention analysis revealed mapping to the visual cortex and
              higher cognitive networks suggesting our model is biologically
              plausible and interpretable.
            </ListItem>
          </ul>

          <Header>Results - Compare with Benchmarks</Header>
          <Diagram src={Benchmark} alt="benchmark" />
          <Caption>
            We compare our results with the samples provided in multiple
            previous literature in fMRI-Video reconstruction task. We also
            compare our results with our fMRI-Image pipeline. Our method
            generates samples that are more semantically meaningful and match
            with the groundtruth.
          </Caption>

          <Header>Results - Ablation study</Header>
          <Diagram src={Ablation} alt="ablation" />

          <Header>Results - Learn from Brain</Header>
          <Diagram src={Interpretation} alt="interpretation" />
          <Text>
            Our attention analysis of the transformers decoding fMRI data has
            yielded three significant insights:{" "}
          </Text>
          <ul>
            <ListItem>
              Dominance of the Visual Cortex: Our analysis underscores the
              critical role of the visual cortex in processing visual
              spatiotemporal information. However, higher cognitive networks,
              such as the dorsal attention network and the default mode network,
              also contribute to the visual perception process.{" "}
            </ListItem>
            <ListItem>
              Layer-Dependent Hierarchy: The layers of our fMRI encoder operate
              in a hierarchical fashion. Initial layers focus on structural
              information, while deeper layers shift toward learning more
              abstract visual features, indicating a gradient of complexity in
              feature extraction.{" "}
            </ListItem>
            <ListItem>
              Progressive Semantic Learning: Our fMRI encoder evolves through
              each learning stage, showing increased attention to higher
              cognitive networks and decreased focus on the visual cortex over
              time. This progression suggests the encoder improves its ability
              to assimilate more nuanced, semantic information throughout its
              training stages.
            </ListItem>
          </ul>

          <Header>More Samples</Header>

          <HeaderGrid>
          {Array(5).fill().map((_, i) => (
            <HeaderItem>Groundtruth / Generated</HeaderItem>
          ))}
        </HeaderGrid>

          <ImageGrid>
            {images.map((image, index) => (
              <Image src={image} />
            ))}
          </ImageGrid>
          

          <Header>Fail Cases</Header>
          <Text>
          In short, the failure cases can be attributed by two factors:       
          </Text>
          <ul>
            <ListItem>
            Lack of pixel-level controllability. Due to the probabilistic nature of the diffusion model and the current 
            conditioning method, the generation process lacks strong control from the fMRI latent to generate strictly 
            matching low-level features, such as shapes, color, and geometric information. We believe this would be an 
            important perspective for future research on this task.{" "}
            </ListItem>
            <ListItem>
            Uncontrollable factors during the scan. Mind wandering and imagination of the subject are usually inevitable 
            during the scan. It has been shown that imagination is involved and can be decoded to some extent from the visual 
            cortex, which can lead to mismatching between the ground truth and the generation results.{" "}
            </ListItem>
          </ul>
          
          <HeaderGrid>
          {Array(5).fill().map((_, i) => (
            <HeaderItem>Groundtruth / Generated</HeaderItem>
          ))}
          </HeaderGrid>

          <ImageGrid>
            {images_bad.map((image, index) => (
              <Image src={image} />
            ))}
          </ImageGrid>


          <Header>Media Coverage</Header>  

          <VideoContainer>
            <ResponsiveIframe
            src="https://www.youtube.com/embed/TYbRNQ3LxwU" 
            title="YouTube video player" 
            frameborder="0" 
            allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" 
            allowfullscreen
          />
          </VideoContainer>
  
      
          <VideoContainer>
            <ResponsiveIframe
            src="https://www.youtube.com/embed/QvfsJSaHCuA" 
            title="YouTube video player" 
            frameborder="0" 
            allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" 
            allowfullscreen
            />
            </VideoContainer>
  



          <Header>Mind-X</Header>          
          <Text>
          Mind-X is a research interest group that aims to explore multimodal brain decoding with large models. It was first initiated by 
          &nbsp;<a href="https://scholar.google.com/citations?user=gCTUx9oAAAAJ&hl=en">Zijiao Chen (NUS CSC)</a>, &nbsp;
          <a href="https://jqin4749.github.io/">Jiaxin Qing (CUHK IE)</a>, &nbsp;
          <a href="https://tiangexiang.github.io/">Tiange Xiang (Stanford AI Lab)</a> and &nbsp;
          <a href="https://neuroimaginglab.org/members.html">Prof. Juan Helen Zhou (NUS CSC)</a> in 2022.
          We aim to exploit the power of recent advances in large models and AGI to advance the field of brain decoding. 
          Our ultimate goal is to develop general-purpose brain decoding models that empowers various applications in
          brain-computer interface, neuroimaging, and neuroscience.
          </Text>        

          <Header>Acknowledgments</Header>
          <Text>
            Big shoutout to our friend (
            <a
              href="https://tiangexiang.github.io/"
              target="_blank"
              rel="noopener noreferrer"
            >
              Tiange Xiang
            </a>
            ) for all the stimulating chats and feedback in this work, and to (
            <a
              href="https://twitter.com/jonxuxu"
              target="_blank"
              rel="noopener noreferrer"
            >
              Jonathan Xu
            </a>
            ) for crafting the website for our project. 
            And thanks all members in 
            &nbsp;<a href="https://neuroimaginglab.org/index.html">Multimodal Neuroimaging in Neuropsychiatric Disorders Laboratory</a>
            &nbsp; for all the support and help. 🙌{" "}
          </Text>
          <Text>
            Huge thanks to the Human Connectome Project (
            <a
              href="https://www.humanconnectome.org/study/hcp-young-adult/data-releases"
              target="_blank"
              rel="noopener noreferrer"
            >
              HCP
            </a>
            ) for the large-scale fMRI data and to Prof. Zhongming Liu and Dr.
            Haiguang Wen for the awesome (
            <a
              href="https://academic.oup.com/cercor/article/28/12/4136/4560155"
              target="_blank"
              rel="noopener noreferrer"
            >
              fMRI-Video dataset
            </a>
            ).
          </Text>
          <Text>
            Can't forget the (
            <a
              href="https://stablediffusionweb.com/"
              target="_blank"
              rel="noopener noreferrer"
            >
              Stable Diffusion team
            </a>
            ) for sharing their super impressive large model with everyone - you
            guys rock! And kudos to the (
            <a
              href="https://tuneavideo.github.io/"
              target="_blank"
              rel="noopener noreferrer"
            >
              Tune-A-Video team
            </a>
            ), you inspired us with your text-to-video pipeline. 🚀👏
          </Text>
        </ContentBody>
      </WhiteContainer>

      <SmallBlackContainer>
        {/* <FooterText>Site made with 🍵 </FooterText> */}
      </SmallBlackContainer>
    </Container>
  );
}

export default App;

const Container = styled.div`
  width: 100%;
  background-color: white;
`;

const BlackContainer = styled.div`
  background-color: black;
  color: white;
  padding: 50px;
  ${mq[2]} {
    padding: 20px;
  }
`;

const WhiteContainer = styled.div`
  background-color: white;
  color: black;
  padding: 20px;
  display: flex;
  justify-content: center;
`;

const ContentBody = styled.div`
  max-width: 800px;
`;

const VideoContainer = styled.div`
  position: relative;
  overflow: hidden;
  width: 100%;
  padding-top: 56.25%; /* 16:9 Aspect Ratio */
`;

const ResponsiveIframe = styled.iframe`
  position: absolute;
  top: 0;
  left: 0;
  bottom: 0;
  right: 0;
  width: 100%;
  height: 100%;
`;

const ImageGrid = styled.div`
  display: grid;
  grid-template-columns: repeat(5, 1fr);
  grid-gap: 1px;
`;

const Image = styled.img`
  width: 100%;
  height: auto;
`;


const HeaderGrid = styled.div`
  display: grid;
  grid-template-columns: repeat(5, 1fr);
  grid-gap: 1px;
`;

const HeaderItem = styled.div`
  font-size: 1vw;
  text-align: center;
`;

const TopLogos = styled.div`
  display: flex;
  flex-direction: row;
  justify-content: flex-end;
`;

const LogoSection = styled.div`
  display: flex;
  flex-direction: row;
  justify-content: center;
  margin-top: 50px;
`;

const Logo = styled.img`
  width: 40px;
  margin-left: 20px;
`;
const BigLogo = styled.img`
  width: 150px;
`;
const Section1 = styled.div`
  padding: 20px;
`;

const Subtitle = styled.h2`
  font-size: 20px;
  font-weight: 400;
  ${mq[1]} {
    font-size: 16px;
  }
`;

const Title = styled.h1`
  font-size: 75px;
  margin: 0px;
  font-weight: 400;
  ${mq[1]} {
    font-size: 50px;
  }
`;

const WhiteLink = styled.a`
  color: white;
`;

const PaperTitle = styled(WhiteLink)`
  font-size: 25px;
  ${mq[1]} {
    font-size: 16px;
  }
`;

const TwoColumn = styled.div`
  display: flex;
  flex-direction: row;
  margin-top: 50px;
  ${mq[4]} {
    margin-top: 20px;
  }
  ${mq[1]} {
    flex-direction: column;
    margin-top: 0px;
  }
`;
const CarouselDescription = styled.div`
  display: flex;
  flex-direction: row;
  justify-content: center;
`;

const LeftText = styled.p`
  text-align: right;
  padding-right: 15px;
`;

const RightText = styled.p`
  text-align: left;
  padding-left: 15px;
`;

const Left = styled.div`
  width: 50%;
  display: flex;
  align-items: center;
  flex-direction: column;

  ${mq[1]} {
    width: 100%;
    margin-top: 70px;
  }
  ${mq[0]} {
    margin-top: 40px;
  }
`;
const Description = styled.div`
  border-left: 3px solid white;
  padding-left: 20px;
  padding-top: 25px;
  padding-bottom: 25px;
  font-size: 22px;
  ${mq[3]} {
    font-size: 20px;
  }
  ${mq[2]} {
    font-size: 16px;
  }
`;

const Right = styled.div`
  width: 50%;
  ${mq[1]} {
    width: 100%;
  }
  ${mq[0]} {
    margin-top: 20px;
  }
`;

const Section2 = styled.div`
  display: flex;
  flex-direction: row;
  margin-top: 20px;
  flex-wrap: wrap;
  border-top: 1px solid white;
  padding-bottom: 50px;
  ${mq[1]} {
    flex-direction: column;
    padding-bottom: 30px;
  }
`;

const SmallColumn = styled.div`
  width: 150px;
  ${mq[1]} {
    margin-top: 15px;
  }
`;

const LargeColumn = styled.div`
  flex-grow: 1;
  flex-basis: 0;
  ${mq[2]} {
    flex-basis: auto;
    margin-top: 15px;
  }
`;

const SmallText = styled.p`
  font-size: 16px;
  ${mq[1]} {
    margin-top: 0px;
    margin-bottom: 0px;
  }
`;

const SmallLink = styled(WhiteLink)`
  font-size: 16px;
  display: block;
  margin-top: 15px;
  ${mq[1]} {
    margin-top: 8px;
  }
`;




const Header = styled.h3`
  font-size: 30px;
  font-weight: bold;
  margin-top: 60px;
  margin-bottom: 15px;
`;

const Text = styled.p`
  font-size: 18px;
  line-height: 1.5;
`;

const ListItem = styled.li`
  font-size: 18px;
  line-height: 1.5;
  margin-bottom: 15px;
`;

const Diagram = styled.img`
  width: 100%;
`;

const Caption = styled(SmallText)`
  text-align: center;
  font-style: italic;
`;

const SmallBlackContainer = styled.div`
  background-color: black;
  color: white;
  padding: 10px;
  display: flex;
  justify-content: center;
`;

// const FooterText = styled(SmallText)`
//   margin:0px;
//   `
