--- # ------------------------ # Deploy the general stuff # ------------------------ - hosts: all sudo: yes vars: ssh_public_key_content: "{{ lookup('file', ssh_public_key ) }}" vars_files: - vars/main.yml pre_tasks: - name: Update APT cache apt: update_cache=yes tasks: # General tasks - name: install java apt: name=openjdk-7-jre state=present update_cache=yes - name: disable net.ipv6.conf.all.disable_ipv6 sysctl: name=net.ipv6.conf.all.disable_ipv6 value=1 state=present - name: disable net.ipv6.conf.default.disable_ipv6 sysctl: name=net.ipv6.conf.default.disable_ipv6 value=1 state=present - name: disable net.ipv6.conf.lo.disable_ipv6 sysctl: name=net.ipv6.conf.lo.disable_ipv6 value=1 state=present - name: distribute host file template: src=templates/hosts.j2 dest=/etc/hosts # Install hadoop - name: create hadoop group group: name=hadoop state=present - name: create hadoop user user: name={{ hadoop_user }} comment="Hadoop user" group=hadoop shell=/bin/bash - name: Get hadoop get_url: url=http://apache.mirrors.spacedump.net/hadoop/common/stable/hadoop-2.6.0.tar.gz dest=/usr/local/ sha256sum=7a2ef6e7f468afcae95d0f7214816033c7e5c7982454061ccb117896d58d279f - name: unzip hadoop unarchive: copy=no src=/usr/local/hadoop-2.6.0.tar.gz dest=/usr/local/ owner={{ hadoop_user }} group=hadoop - name: set user and priviliges on hadoop file: path=/usr/local/hadoop-2.6.0 owner={{ hadoop_user }} group=hadoop recurse=yes - name: distribute hadoop conf template: src=templates/hadoop-env.sh.j2 dest=/usr/local/hadoop-2.6.0/etc/hadoop/hadoop-env.sh - name: distribute hadoop core-site.xml template: src=templates/core-site.xml.j2 dest=/usr/local/hadoop-2.6.0/etc/hadoop/core-site.xml - name: distribute hadoop hdfs-site.xml template: src=templates/hdfs-site.xml.j2 dest=/usr/local/hadoop-2.6.0/etc/hadoop/hdfs-site.xml # Setup ssh-setup for normal user and hadoop user - name: deploy authorized keys authorized_key: user={{ item }} key="{{ ssh_public_key_content }}" with_items: - "{{ user }}" - "{{ hadoop_user }}" - name: deploy ssh-keys copy: src={{ssh_keys_to_use}} dest=/home/{{ item }}/.ssh/ with_items: - "{{ user }}" - "{{ hadoop_user }}" - name: distribute ssh config template: src=templates/config.j2 dest=/home/{{ item }}/.ssh/config with_items: - "{{ user }}" - "{{ hadoop_user }}" # Spark stuff - name: download spark get_url: url=http://d3kbcqa49mib13.cloudfront.net/spark-1.2.1-bin-hadoop2.4.tgz dest=/opt/ sha256sum=8e618cf67b3090acf87119a96e5e2e20e51f6266c44468844c185122b492b454 - name: unzip spark unarchive: copy=no src=/opt/spark-1.2.1-bin-hadoop2.4.tgz dest=/opt - name: deploy slaves configuration template: src=templates/slaves.j2 dest=/opt/spark-1.2.1-bin-hadoop2.4/conf/slaves - name: deploy spark-env.sh configuration template: src=templates/spark-env.sh.j2 dest=/opt/spark-1.2.1-bin-hadoop2.4/conf/spark-env.sh # -------------------------------------------------- # Start the hadoop master # -------------------------------------------------- - hosts: spark_masters vars_files: - vars/main.yml tasks: - name: distribute hadoop slaves file template: src=templates/masters.j2 dest=/usr/local/hadoop-2.6.0/etc/hadoop/masters - name: distribute hadoop slaves file template: src=templates/slaves.j2 dest=/usr/local/hadoop-2.6.0/etc/hadoop/slaves - name: format hdfs (unless it's already been done) command: /usr/local/hadoop-2.6.0/bin/hadoop namenode -format creates=/usr/local/hadoop-2.6.0/ansible-format-hdfs - name: touch hfds formatted file (indicates if hdfs has been formatted) file: state=touch path=/usr/local/hadoop-2.6.0/ansible-format-hdfs - name: add hadoop and spark binaries to path for hadoop user lineinfile: dest=/home/{{ hadoop_user }}/.bashrc state=present insertafter=EOF line="export PATH=$PATH:/usr/local/hadoop-2.6.0/bin/:/opt/spark-1.2.1-bin-hadoop2.4/bin/" create=true tags: - current - name: stop hadoop (if running) command: /usr/local/hadoop-2.6.0/sbin/stop-dfs.sh - name: start hadoop command: /usr/local/hadoop-2.6.0/sbin/start-dfs.sh # -------------------------------------------------- # Kick of spark (making the master start the slaves) # and configure spark-master as hadoop master # -------------------------------------------------- - hosts: spark_masters tasks: - name: stop spark master (if running) command: /opt/spark-1.2.1-bin-hadoop2.4/sbin/stop-master.sh - name: start spark master shell: SPARK_MASTER_IP="{{ ansible_hostname }}" /opt/spark-1.2.1-bin-hadoop2.4/sbin/start-master.sh - name: stop the slaves (if running) shell: /opt/spark-1.2.1-bin-hadoop2.4/sbin/stop-slaves.sh - name: start the slaves shell: /opt/spark-1.2.1-bin-hadoop2.4/sbin/start-slaves.sh