Bulk Loading of Isozyme data using postgreSQL

From ICISWiki

Jump to: navigation, search

Contents

SET UP THE INSTALLATION

  • Start with a local GMS+DMS template. A script for building a postgreSQL icislocal database template is at http://cropforge.irri.org/frs/?group_id=5 under the Schema sublevel.
  • Identify the installation in the INSTLN table and local users in the USERS table if necessary
  • Import the GEMS tables. Here is the script for building the GEMS database template.
  • Export the ISO table from MS Access to PostgreSQL. ISO table can be exported in two ways: Using an ODBC connection and exporting the table in ASCII format. It can also be imported using a 3rd party software(downloadable)that can convert MS Access database to PostgreSQL.
    • USING ODBC connection
      • Download and install an ODBC driver
      • Create a System Data Source to the PostgreSQL server
      • From Microsoft Access, export the ISO table into the newly created SDN
    • ASCII format
      • From Microsoft Access, export the ISO table into text format (can be tab delimited or comma delimited or any user-defined delimiter)
      • From postgreSQL, create a table with the same definition as the ISO table
CREATE TABLE "ISO"
(order_adj float8 default 0,
 order_accno float8 default 0,
 order_merge float8 default 0,
 supplier varchar(254),
 date varchar(254),
 "ORIGIN" int4,
 ori_country_given varchar(254),
 ori_country varchar(254),
 prov_given varchar(254),
 prov varchar(254),
 tradtype varchar(254),
 "GID" int4 default 0,
 accno_given float8 default 0,
 accno_guess float8 default 0,
 accno_all float8 default 0,
 var_name_given varchar(254),
 varname varchar(254),
 prev_name varchar(254),
 prev_value varchar(254),
 gm_source varchar(254),
 accno_other float8 default 0,
 remarks varchar(254),
 test_no float8 default 0,
 test_no_ed float8 default 0,
 adh1 varchar(254),
 adh1_0 float8 default 0,
 adh1_1 float8 default 0,
 adh1_2 float8 default 0,
 adh1_3 float8 default 0, 
 amp1 varchar(254),
 amp1_0 float8 default 0,
 amp1_1 float8 default 0,
 amp1_2 float8 default 0,
 amp1_3 float8 default 0,
 amp1_4 float8 default 0,
 amp1_5 float8 default 0,
 amp1_6 float8 default 0,
 amp2 varchar(254),
 amp2_0 float8 default 0,
 amp2_1 float8 default 0,
 amp2_2 float8 default 0,
 amp2_3 float8 default 0,
 amp2_4 float8 default 0,
 amp3 varchar(254),
 amp3_0 float8 default 0,
 amp3_1 float8 default 0,
 amp3_2 float8 default 0,
 amp3_3 float8 default 0,
 amp3_4 float8 default 0,
 amp3_5 float8 default 0,
 amp3_6 float8 default 0,
 amp4 varchar(254),
 amp4_0 float8 default 0,
 amp4_1 float8 default 0,
 amp4_2 float8 default 0,
 amp4_3 float8 default 0,
 amp4_4 float8 default 0,
 amp4_5 float8 default 0,
 cat1 varchar(254),
 cat1_0 float8 default 0,
 cat1_1 float8 default 0,
 cat1_2 float8 default 0,
 cat1_3 float8 default 0,
 est1 varchar(254),
 est1_0 float8 default 0,
 est1_1 float8 default 0,
 est1_2 float8 default 0,
 est2 varchar(254),
 est2_0 float8 default 0,
 est2_1 float8 default 0,
 est2_2 float8 default 0,
 est5 varchar(254),
 est5_0 float8 default 0,
 est5_1 float8 default 0,
 est5_2 float8 default 0,
 est9 varchar(254),
 est9_0 float8 default 0,
 est9_1 float8 default 0,
 est9_2 float8 default 0,
 enp1 varchar(254),
 enp1_0 float8 default 0,
 enp1_1 float8 default 0,
 enp1_2 float8 default 0,
 got1 varchar(254),
 got1_1 float8 default 0,
 got1_2 float8 default 0,
 got1_3 float8 default 0,
 got3 varchar(254),
 got3_1 float8 default 0,
 got3_2 float8 default 0,
 got3_3 float8 default 0,
 icd1 varchar(254),
 icd1_1 float8 default 0,
 icd1_2 float8 default 0,
 icd1_3 float8 default 0,
 pgd1 varchar(254),
 pgd1_1 float8 default 0,
 pgd1_2 float8 default 0,
 pgd1_3 float8 default 0,
 pgd2 varchar(254),
 pgd2_1 float8 default 0,
 pgd2_2 float8 default 0,
 pgi1 varchar(254),
 pgi1_0 float8 default 0,
 pgi1_1 float8 default 0,
 pgi1_2 float8 default 0,
 pgi1_3 float8 default 0,
 pgi1_4 float8 default 0,
 pgi2 varchar(254),
 pgi2_0 float8 default 0,
 pgi2_1 float8 default 0,
 pgi2_2 float8 default 0,
 pgi2_3 float8 default 0,
 pgi2_4 float8 default 0,
 pgi2_5 float8 default 0,
 sdh1 varchar(254),
 sdh1_1 float8 default 0,
 sdh1_2 float8 default 0,
 sdh1_3 float8 default 0,
 sdh1_4 float8 default 0,
 sdh1_5 float8 default 0,
 acp1 varchar(254),
 acp1_0 float8 default 0,
 acp1_1 float8 default 0,
 acp1_2 float8 default 0,
 acp1_3 float8 default 0,
 mal1 varchar(254),
 mal1_1 float8 default 0,
 mal1_2 float8 default 0,
 pox5 varchar(254),
 pox5_1 float8 default 0,
 pox5_2 float8 default 0,
 vg_bc varchar(254),
 vg varchar(254),
 vg_algo varchar(254))
  • insert the data from the ASCII file to the ISO table in PostgreSQL
copy "ISO" from <filename of the ASCII file>

DATA PREPARATION FOR LOADING THE ALLELE EFFECT

create the isoser_allele table

  • create the table with the following fields:
FIELD FIELD TYPE
ORDERNOserial
OUNITIDint
SAMPLENOint
GID int
SOURCE varchar (255)
ORIGINint
NAME varchar (255)
ALLELE varchar (50)
MVID int
PDID int
FREQ double precision
SNLEVEL int
SNVALUE double precision
MVLEVEL int
MVVALUE double precision
CREATE TABLE isoser_allele (orderno serial,ounitid int ,sampleno double precision,
gid int,source varchar(255),origin int ,name varchar(255),allele varchar(50),mvid int,
pdid int ,freq double precision,snlevel int,snvalue double precision,mvlevel int ,
mvvalue double precision) ;

Populate the ISOSER_ALLELE table

395528 records will be inserted into the isoser_allele table in approx. 5 mins.

BEGIN;
INSERT INTO isoser_allele ( sampleno,gid, name,origin, source, allele, freq )
SELECT temp.order_merge, temp."GID", temp.NAME, temp."ORIGIN", temp.source, 
temp.ALLELE, temp.freq
FROM (
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'acp1_0' 
AS allele, acp1_0 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'acp1_1' 
AS allele, acp1_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'acp1_2' 
AS allele, acp1_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'acp1_3' 
AS allele, acp1_3 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'adh1_0' 
AS allele, adh1_0 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'adh1_1' 
AS allele, adh1_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'adh1_2' 
AS allele, adh1_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'adh1_3' 
AS allele, adh1_3 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp1_0' 
AS allele, amp1_0 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp1_1' 
AS allele, amp1_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp1_2' 
AS allele, amp1_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp1_3' 
AS allele, amp1_3 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp1_4' 
AS allele, amp1_4 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp1_5' 
AS allele, amp1_5 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp1_6'
AS allele, amp1_6 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp2_0'
AS allele, amp2_0 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp2_1'
AS allele, amp2_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp2_2'
AS allele, amp2_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp2_3'
AS allele, amp2_3 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp2_4'
AS allele, amp2_4 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp3_0'
AS allele, amp3_0 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp3_1'
AS allele, amp3_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp3_2'
AS allele, amp3_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp3_3'
AS allele, amp3_3 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp3_4'
AS allele, amp3_4 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp3_5'
AS allele, amp3_5 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp3_6'
AS allele, amp3_6 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp4_0'
AS allele, amp4_0 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp4_1'
AS allele, amp4_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp4_2'
AS allele, amp4_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp4_3'
AS allele, amp4_3 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp4_4'
AS allele, amp4_4 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'amp4_5'
AS allele, amp4_5 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'cat1_0'
AS allele, cat1_0 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'cat1_1'
AS allele, cat1_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'cat1_2'
AS allele, cat1_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'cat1_3'
AS allele, cat1_3 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'enp1_0'
AS allele, enp1_0 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'enp1_1'
AS allele, enp1_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'enp1_2'
AS allele, enp1_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'est1_0'
AS allele, est1_0 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'est1_1'
AS allele, est1_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'est1_2'
AS allele, est1_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'est2_0'
AS allele, est2_0 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'est2_1'
AS allele, est2_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'est2_2'
AS allele, est2_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'est5_0'
AS allele, est5_0 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'est5_1'
AS allele, est5_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'est5_2'
AS allele, est5_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'est9_0'
AS allele, est9_0 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'est9_1'
AS allele, est9_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'est9_2'
AS allele, est9_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'got1_1'
AS allele, got1_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'got1_2'
AS allele, got1_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'got1_3'
AS allele, got1_3 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'got3_1'
AS allele, got3_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'got3_2'
AS allele, got3_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'got3_3'
AS allele, got3_3 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'icd1_1'
AS allele, icd1_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'icd1_2'
AS allele, icd1_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'icd1_3'
AS allele, icd1_3 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'mal1_1'
AS allele, mal1_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'mal1_2'
AS allele, mal1_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgd1_1'
AS allele, pgd1_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgd1_2'
AS allele, pgd1_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgd1_3'
AS allele, pgd1_3 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgd2_1'
AS allele, pgd2_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgd2_2'
AS allele, pgd2_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgi1_0'
AS allele, pgi1_0 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgi1_1'
AS allele, pgi1_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgi1_2'
AS allele, pgi1_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgi1_3'
AS allele, pgi1_3 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgi1_4'
AS allele, pgi1_4 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgi2_0'
AS allele, pgi2_0 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgi2_1'
AS allele, pgi2_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgi2_2'
AS allele, pgi2_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgi2_3'
AS allele, pgi2_3 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgi2_4'
AS allele, pgi2_4 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pgi2_5'
AS allele, pgi2_5 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pox5_1'
AS allele, pox5_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'pox5_2'
AS allele, pox5_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'sdh1_1'
AS allele, sdh1_1 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'sdh1_2'
AS allele, sdh1_2 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'sdh1_3'
AS allele, sdh1_3 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'sdh1_4'
AS allele, sdh1_4 AS freq FROM "ISO" UNION ALL
SELECT order_merge,"GID",varname as name,"ORIGIN",supplier as source, 'sdh1_5'
AS allele, sdh1_5 AS freq FROM "ISO" ) AS temp
WHERE (((temp."ORIGIN") Is Not Null) AND ((temp.freq)=1));
COMMIT;

Make sure all the distinct loci are identified in the gems_locname

  • Create iso_mvnames table
CREATE TABLE iso_mvnames (mvid serial,levelno int ,mvname varchar (50)); 
  • Append distinct alleles in the isoser_allele table. Inserts 85 distinct alleles to iso_mvnames table.
BEGIN;
INSERT INTO iso_mvnames ( mvname )
SELECT DISTINCT allele AS expr1
FROM isoser_allele ORDER BY allele;
COMMIT;
  • Create ISO_LOCUS table. This table contains the distinct locus.
CREATE TABLE iso_locus ( locusid serial, lname varchar (255),ltype varchar (50));
  • Append distinct locus to the ISO_LOCUS table. Appends 22 distinct locus to the table
INSERT INTO iso_locus ( lname, ltype) 
SELECT DISTINCT substr(allele,1,instr(allele,'_')-1) AS Expr1, 'allele' AS Expr2 
FROM isoser_allele;
  • Append the locus in ISO_LOCUS table in gems_locus table. Appends distinct locus to the gems_table. It first gets the maximum value of locusid in the gems_locus table (offset). The new record's locusid value is equal to the iso_locus.locusid + the offset value.
BEGIN;
INSERT INTO gems_locname ( locusid, lnval ) SELECT newlocus, lname
FROM(
     SELECT (SELECT (case  when max(gems_locname.locusid) is  null 
                     then 0 
                     else max(gems_locname.locusid)
                     end )as maxlocus
FROM gems_locname as Y)+ locusid AS newlocus, lname FROM iso_locus) AS x;
COMMIT;

Complete other gems information as required.

Populate the tables gems_mvnames, gems_mv, gems_pd and gems_locus tables.

  • Append MVNAMES from ISO_MVNAMES to gems_mvnames. 85 new allele names will be appended to the gems_mvnames table.
BEGIN;
INSERT INTO gems_mvnames ( mvid, mvnval ) 
SELECT newmv, mvname
FROM(
     SELECT (SELECT (case  when max(gems_mvnames.mvid) is  null 
                     then 0 
                     else max(gems_mvnames.mvid)
                     end )as maxvid
FROM gems_mvnames as Y)+ mvid AS newmv, mvname FROM iso_mvnames) AS x;
COMMIT;
  • Append 'unobserved' allele names to gems_mvnames table. Unobserved alleles can also be encoded directly into the gems_mvnames table. In this case, the allele pgi1_3 is considered as 'unobserved' as there is not enought information in the ISO table about this allele.
INSERT INTO gems_mvnames ( mvid, mvnval ) VALUES (86, 'pgi1_3');
  • Append pdid and pdtype to gems_pd tables
INSERT INTO gems_pd ( pdid, pdtype ) SELECT locusid, 'isozyme' FROM gems_locname;
  • Append MVIDs to gems_mv table. This will insert 86 records.
BEGIN;
INSERT INTO gems_mv ( mvid, mvtype, pdid ) 
SELECT gems_mvnames.mvid, iso_locus.ltype, iso_locus.locusid
FROM iso_locus, gems_mvnames
WHERE iso_locus.lname =substr(gems_mvnames.mvnval,1, strpos(gems_mvnames.mvnval,'_')-1);
COMMIT;
  • Append locus to gems_locus table
BEGIN;
INSERT INTO gems_locus ( mvid, locusid, pdid )
SELECT gems_mvnames.mvid, gems_locname.locusid, gems_mv.pdid
FROM gems_locname, gems_mv,gems_mvnames 
WHERE ((gems_mv.mvid = gems_mvnames.mvid) AND (gems_locname.locusid =gems_mv.pdid)AND
(gems_locname.lnval=substr(gems_mvnames.mvnval,1, strpos(gems_mvnames.mvnval,'_')-1)));	
COMMIT;

Update MVID and PDID in the ISOSER_ALLELE table

  • Update MVID field in the ISOSER_ALLELE table
BEGIN;
UPDATE isoser_allele
SET mvid = gems_mvnames.mvid from gems_mvnames
WHERE gems_mvnames.mvnval=isoser_allele.allele;
COMMIT;
  • Update the PDID field in the ISOSER_ALLELE table
BEGIN;
UPDATE  isoser_allele
SET pdid = gems_mv.pdid from gems_mv
WHERE gems_mv.mvid = isoser_allele.mvid ;
COMMIT;

Look for putative heterozygotes and change its frequency to 0.5.

BEGIN;
UPDATE isoser_allele 
SET freq = 0.5 from (SELECT b.sampleno AS sampleno, b.pdid, Count(b.sampleno) AS sncount
                     FROM isoser_allele AS b GROUP BY b.sampleno, b.pdid 
                     HAVING (((Count(b.sampleno))>1)))as c 
WHERE c.sampleno = isoser_allele.sampleno AND c.sncount =2 and c.pdid = isoser_allele.pdid;
COMMIT;

SET UP THE STUDY

It is assumed that the tables STUDY, FACTOR,LEVEL_C, EFFECT and OINDEX are empty.

  • Set up the study in the study table
STUDYID SNAME PMKEY TITLE OBJECTIV INVESTID STYPE SDATE EDATE
-1 ISOZYME04 0 ISOZYME04 - 0 E 0 0
INSERT INTO study ( studyid, sname, pmkey, title, objectiv, investid, stype,
sdate,edate) VALUES (-1, 'isozyme04', 0, 'isozyme04', '-', 0, 'E', 0, 0);
  • Define the study factor in the FACTOR table (look up the appropriate TRAITID SCALEID and TMETHID in the central TRAIT , SCALE and TMETHOD tables) LTYPE is C for character since the study name is used as the study level.
LABELID FACTORID FNAME STUDYID TRAITID SCALEID TMETHID LTYPE
-1 -1 STUDY -1 201 134 123 C
INSERT INTO factor ( labelid, factorid, fname, studyid, traitid, scaleid,
     tmethid,ltype )VALUES (-1 , -1, 'STUDY' , -1 , 201 , 134 , 123, 'C');
  • Define the study level in the LEVEL_C table.
LABELID FACTORID LEVELNO LVALUE
-1 -1 -1 ISOZYME04
INSERT INTO LEVEL_C ( labelid, factorid, levelno, lvalue )
             VALUES (-1 , -1 , -1 , 'ISOZYME04');
  • Define the study effect in the EFFECT table.
REPRESNO FACTORID EFFECTID
-1 -1 -1
INSERT INTO effect ( represno, factorid, effectid ) 
            VALUES (-1 , -1, -1);
  • Define the observation units for the study effect in the OINDEX table. There is only one since the factor study has only one level and effect -1 is only indexed by the study factor.
INSERT INTO oindex ( ounitid, factorid, levelno, represno ) 
            VALUES (-1, -1, -1, -1);

There are no variates measured at the study level, and there are no labels of the Study factor so we are finished with defining the study.

LOADING THE SAMPLE ALLELE

  • VARIATE table is also assumed to be empty. Define the FREQ variate in the VARIATE table. look up the TRAITID, SCALEID and TMETHID in the central tables or assign new ones with negative IDs in the local TRAIT, SCALE and TMETHOD tables.
VARIATID VNAME STUDYID VTYPE TRAITID SCALEID TMETHID DTYPE
-1 FREQ -1 MV 501 508 508 N
INSERT INTO variate ( variatid, vname, studyid, vtype, traitid, scaleid, tmethid,
dtype )VALUES (-1, 'FREQ', -1, 'MV', 501, 508, 508, 'N');
  • Define the Sample No factor in the factor table and add GID as a label. (Note SampleNo:GID is Many:one) and the factor must have unique levels so Sample No is the factor.
LABELID FACTORID FNAME STUDYID TRAITID SCALEID TMETHID LTYPE
-3 -2 GID -1 251 91 17 N
-2 -2 SAMPLENO -1 212 219 17 N
-4 -2 NAME -1 251 92 17 C
-5 -2 ORIGIN -1 215 95 17 N
-6 -2 SOURCE -1 251 104 17 C

SAMPLENO--

INSERT INTO factor ( labelid, factorid, fname, studyid, traitid, scaleid, tmethid,
ltype ) VALUES (-2, -2, 'SAMPLENO' , -1, 212, 219, 17, 'N');

GID--

INSERT INTO factor ( labelid, factorid, fname, studyid, traitid, scaleid, tmethid, ltype )   
VALUES (-3, -2, 'GID' , -1, 251, 91, 17, 'N');

NAME--

INSERT INTO factor ( labelid, factorid, fname, studyid, traitid, scaleid, tmethid, ltype ) 

VALUES (-4, -2, 'NAME' , -1, 251, 92, 17, 'C'); ORIGIN--

INSERT INTO factor ( labelid, factorid, fname, studyid, traitid, scaleid, tmethid, ltype ) 
 		VALUES (-5, -2, 'ORIGIN' , -1, 215, 95, 17, 'N');

SOURCE

INSERT INTO factor ( labelid, factorid, fname, studyid, traitid, scaleid, tmethid, ltype ) 
       	VALUES (-6, -2, 'SOURCE' , -1, 251, 104, 17, 'C');

Define the allele factor (MVID) in the FACTOR table and allele (MVNAME) label

  • Define the allele factor (MVID) in the FACTOR table
LABELID FACTORID FNAME STUDYID TRAITID SCALEID TMETHID LTYPE
-7 -7 MVID -1 501 503 17 N
-8 -7 ALLELE -1 501 520 17 C
INSERT INTO factor ( labelid, factorid, fname, studyid, traitid, scaleid, tmethid, ltype ) 
       	VALUES (-7, -7, 'MVID' , -1, 501, 503, 17, 'N');

MVNAME (ALLELE)

INSERT INTO factor ( labelid, factorid, fname, studyid, traitid, scaleid, tmethid, ltype ) 
              VALUES (-8, -7, 'ALLELE' , -1, 501, 520, 17, 'C');

Define the Sample by Allele effect in the EFFECT table

  • Define the Sample by Allele effect in the EFFECT table.
REPRESNO FACTORID EFFECTID
-2 -1 -2
-2 -2 -2
-2 -7 -2

STUDY

INSERT INTO effect ( represno, factorid, effectid ) VALUES (-2, -1, -2);

SAMPLENO

INSERT INTO effect ( represno, factorid, effectid ) VALUES (-2, -2, -2);

MVID

INSERT INTO effect ( represno, factorid, effectid ) VALUES (-2, -7, -2);

SET UP THE FACTOR LEVELS

Set up the SAMPLENO factor

  • Create isoser_alabels table with the following fields
CREATE TABLE ISO_alabels (ORDERNO serial,levelno int ,sampleno  double precision,
gid int , origin int , source varchar(255),NAME varchar (255));
  • Populate the isoser_alabels from the ISO table
BEGIN;
INSERT INTO ISO_alabels ( sampleno, gid, origin, source,name )
SELECT order_merge, "GID", "ORIGIN", supplier, varname 
FROM "ISO";
COMMIT;
  • Get the minimum of LEVELNO in LEVEL_C and LEVEL_N tables AND Update LEVELNO in iso_alabels
BEGIN;
UPDATE ISO_alabels 
SET levelno =	(SELECT Min(mina) AS minlevel 
                FROM (SELECT min(levelno) AS mina FROM level_n 
                UNION 
                SELECT min(levelno) AS mina FROM level_c ) AS c) - orderno;
COMMIT;
  • Append labels SAMPLENO, GID and ORIGIN in the LEVEL_N table and labels NAME and SOURCE to level_c table. Data in SAMPLENO, GID and ORIGIN are of Numeric data type while NAME and SOURCE is of Character data type.

Append SAMPLENO label to LEVEL_N table

BEGIN;
INSERT INTO level_n ( labelid, factorid, levelno, lvalue )
SELECT -2 AS labelid, -2 AS factorid,levelno,sampleno
FROM iso_alabels;
COMMIT;

Append GID label to LEVEL_N table

BEGIN;
INSERT INTO level_n ( labelid, factorid, levelno, lvalue )
SELECT -3 AS labelid, -2 AS factorid,levelno, gid
FROM iso_alabels;
COMMIT;

Append ORIGIN label to LEVEL_N table

BEGIN;
INSERT INTO level_n ( labelid, factorid, levelno, lvalue )
SELECT -5 AS labelid, -2 AS factorid,levelno,origin
FROM iso_alabels;
COMMIT;

Append NAME label to LEVEL_C table

INSERT INTO level_c ( labelid, factorid, levelno, lvalue )
SELECT -4 AS labelid, -2 AS factorid,levelno, 
        case when name is null then  ' ' else name end
FROM iso_alabels;
COMMIT;

Append SOURCE label to LEVEL_C table

BEGIN;
INSERT INTO level_c ( labelid, factorid, levelno, lvalue )
SELECT -6 AS labelid, -2 AS factorid,levelno,
       case when source is null then  ' ' else source end
FROM iso_alabels;
COMMIT;

Set up the MVID factor

  • Update the LEVELNO in the ISO_MVNAMES (contains the labels for MVID factor) table
BEGIN;
UPDATE iso_mvnames
SET levelno = (SELECT Min(mina) AS minlevel 
               FROM (SELECT min(levelno) AS mina FROM level_n 
               UNION 
               SELECT min(levelno) AS mina FROM level_c ) AS c) - mvid;
COMMIT;
  • Append MVID factor to LEVEL_N table
BEGIN;
INSERT INTO level_n( labelid, factorid, levelno, lvalue ) 
SELECT -7 AS Expr1, -7 AS Expr2, iso_mvnames.levelno, iso_mvnames.mvid FROM iso_mvnames ;
COMMIT;
  • Append ALLELE label to LEVEL_C table
BEGIN;
INSERT INTO level_c( labelid, factorid, levelno, lvalue ) 
SELECT -8 AS Expr1, -7 AS Expr2, iso_mvnames.levelno, iso_mvnames.mvname FROM iso_mvnames;	
COMMIT;

SET UP THE OBSERVATION UNITS FOR THE SAMPLE ALLELE

Update the ISOSER_ALLELE

  • Update MVLEVEL field in the ISOSER_ALLELE table
BEGIN;
UPDATE isoser_allele
SET mvlevel = level_n.levelno from level_n
WHERE (isoser_allele.mvid = level_n.lvalue)  AND  (level_n.labelid=-7) ;			
COMMIT;
  • Update SNLEVEL in ISOSER_ALLELE
BEGIN;  
UPDATE isoser_allele
SET snlevel = level_n.levelno FROM level_n
WHERE ((isoser_allele.sampleno = level_n.lvalue ) AND (level_n.labelid=-2));
COMMIT;
  • Update MVVALUE field in the ISOSER_ALLELE table. Values in this column should be equal to MVID
BEGIN;
UPDATE isoser_allele
SET mvvalue = level_n.lvalue FROM level_n
WHERE (level_n.labelid=-7) AND (isoser_allele.mvid = level_n.lvalue);
COMMIT;
  • Update SNVALUE field in the ISOSER_ALLELE table. Values in this column should be equal to SAMPLENO
BEGIN;
UPDATE isoser_allele  
SET snvalue = level_n.lvalue FROM level_n
WHERE (level_n.labelid=-2) AND (isoser_allele.sampleno = level_n.lvalue) ;
COMMIT;

Update OUNITID field in ISOSER_ALLELE table

  • Get the minimum of OUNITID from the OINDEX table and update OUNITID field in the ISOSER_ALLELE table
BEGIN;
INSERT INTO oindex ( ounitid, factorid, levelno, represno ) 
SELECT isoser_allele.ounitid, -1 AS Expr3, -1 AS Expr2, -2 AS Expr1 
FROM isoser_allele ;	
COMMIT;

Append STUDY,SAMPLENO and MVID observation units to the OINDEX tables.

  • Append observation units for the study factor
BEGIN;
INSERT INTO oindex ( ounitid, factorid, levelno, represno ) 
SELECT isoser_allele.ounitid, -1 AS Expr3, -1 AS Expr2, -2 AS Expr1 
FROM isoser_allele ;	
COMMIT;
  • Append observation units for the SAMPLENO factor
BEGIN
INSERT INTO oindex ( ounitid, factorid, levelno, represno ) 
SELECT isoser_allele.ounitid, -2 AS Expr3, isoser_allele.snlevel, -2 AS Expr1 
FROM isoser_allele;
COMMIT;
  • Append observation units for the MVID factor
INSERT INTO oindex ( ounitid, factorid, levelno, represno ) 
SELECT isoser_allele.ounitid, -7 AS Expr2, isoser_allele.mvlevel, -2 AS Expr1 
FROM isoser_allele;
COMMIT;

ADD THE FREQUENCY IN THE DATA TABLE

BEGIN;
INSERT INTO data_n ( ounitid, dvalue, variatid ) 
SELECT isoser_allele.ounitid, isoser_allele.freq, -1 AS Expr1 
FROM isoser_allele ;
COMMIT;

LOADING THE SAMPLE BY LOCUS DATA

Personal tools