Emilysimso Week 6

From LMU BioDB 2015
Jump to: navigation, search

Questions to Answer

DDL (create table) statements

  • create table applicationfile(ApplNo int primary key, ApplType varchar, SponsorApplicant varchar, MostRecentLabelAvailableFlag boolean, CurrentPatentFlag boolean, ActionType varchar, Chemical_Type int, Ther_Potential varchar, Orphan_Code varchar)
  • create table Productfile (ApplNo int, ProductNo int, Form varchar, Dosage varchar, ProductMktStatus varchar, TECode varchar, ReferenceDrug int, drugname varchar, activeingred varchar)

sed command sequences

Application

  • cat application.txt | sed "s/^/insert into application3 (ApplNo, ApplType, SponsorApplicant, MostRecentLabel, AvailableFlag, CurrentPatent, Flag, ActionType) values (/g" | sed "s/\r$/);/g" | sed "s/values (......\t/&, /g" | sed "s/ N\t/'N', /g" | sed "s/\t, A/, 'A', /g" | sed "s/False\t/ 'False',/g" | sed "s/,AP\t/ 'AP', /g" | sed "s/S\t/ 'S'/g" | sed "s/P\t/ 'P'/g" | sed "s/'AP', 1\t/'AP', 1,/g" | sed "s/'AP', 3\t/'AP', 3,/g" | sed "s/'AP', 5\t/'AP', 5,/g" | sed "s/'AP', 7\t/'AP', 7,/g" | sed "s/, N\t/, 'N', /g" | sed "s/, N /, 'N', /g" | sed "s/\t, 'N',/, 'N',/g" | sed "s/\t,'N'/, 'N' /g" | sed "s/'N' ,/'N', /g" | sed "s/\t 'False',/, 'False',/g" | sed "s/'N',\t/'N', /g" | sed "s/\t, 'False'/, 'False'/g" | sed "s/'A', \t/'A', '/g" | sed "s/'N', /'N' '/g" | sed "s/'N'/'N',/g" | sed "s/\t//g" | sed "s/ //g" | sed "s/'A', /'A', '/g" | sed "s/, 'False'/', 'False/g" | sed "s/'False 'AP'/'False', 'AP'/g" | sed "s/'False,TA/'False', 'TA'/g"
    • This didn't work...
  • Made a new command sequence
  • cat application.txt | sed "s/^/insert into application (ApplNo, ApplType, SponsorApplicant, MostRecentLabel, AvailableFlag, CurrentPatentFlag, ActionType, Chemical_Type, Ther_Potential, Orphan_Code) values (/g" | sed "s/\r$/);/g" | sed "s/\t/', '/g" | sed "s/ //g" | sed "s/,/null,/g" | sed "s/, ');/);/g" | sed "s/, 'N '/, 'N'/g" | sed "s/', 'N'/, 'N'/g" | sed "s/, 'A '/, 'A'/g" | sed "s/', 'A'/, 'A'/g" | sed "s/'AP', '/'AP', /g" | sed "s/'TA', '/'TA', /g" | sed "s/', 'P'/, 'P'/g" | sed "s/', 'S'/, 'S'/g" | sed "s/, 'V)/, 'V')/g" | sed "s/1',/1,/g" | sed "s/3', /3, /g" | sed "s/8', /8, /g" | sed "s/5', /5, /g" | sed "s/2', /2, /g" | sed "s/6', /6, /g" | sed "s/4', /4, /g" | sed "s/', 'B'/, 'B'/g" | sed "s/0', /0, /g"
    • That also didn't work...
  • Made a new sequence
    • I looked at Anu's for the first part because she explained some of it to me in class but then I didn't understand so I needed the visual....
    • cat application.txt | sed "s/\t/~/g" | sed -r "s/ ( ) {2}//g" | sed "s/\~/~/g" | sed "s/\~ /~/g" | sed "s/~~/~null~/g" | sed "s/~\r$/~null/g" | sed "s/~/,'/g" | sed "s/'N,/'N',/g" | sed "s/'A,/'A',/g" | sed "s/'False/'False'/g" | sed "s/'null/'null'/g" | sed "s/'TA,/'TA',/g" | sed "s/'AP,/'AP',/g" | sed "s/,'S,/','S',/g" | sed "s/,'P,/','P',/g" | sed "s/,'/','/3" | sed "s/^/insert into applicationfile (ApplNo,ApplType,SponsorApplicant,MostRecentLabelAvailableFlag,CurrentPatentFlag,ActionType,Chemical_Type,Ther_Potential,Orphan_Code) values (/g" | sed "s/$/);/g" > ~/public_html/application.sql.txt
    • Didn't work...
  • Made another sequence
  • cat application.txt | sed "s/\t/~/g" | sed -r "s/( ){2}//g" | sed "s/ \~/~/g" | sed "s/\~ /~/g" | sed "s/~~/~null~/g" | sed "s/~~/~null~/g" | sed "s/~\r$/~null/g" | sed "s/~/,'/g" | sed "s/'N,/'N',/g" | sed "s/'A,/'A',/g" | sed "s/'False,/'False',/g" | sed "s/'AP,/'AP',/g" | sed "s/'TA,/'TA',/g" | sed "s/'null/null/g" | sed "s/,'/','/3" | sed "s/,'/','/7" | sed "s/'S,/'S',/g" | sed "s/'P,/'P',/g" | sed "s/^/insert into applicationfile(ApplNo,ApplType,SponsorApplicant,MostRecentLabelAvailableFlag,CurrentPatentFlag,ActionType,Chemical_Type,Ther_Potential,Orphan_Code) values (/g" | sed "s/$/);/g" > ~/public_html/application.sql.txt
    • This also didn't work
  • But then....
  • cat application.txt | sed "s/\t/~/g" | sed -r "s/( ){2}//g" | sed "s/ \~/~/g" | sed "s/\~ /~/g" | sed "s/~~/~null~/g" | sed "s/~~/~null~/g" | sed "s/~\r$/~null/g" | sed "s/~/,'/g" | sed "s/'N,/'N',/g" | sed "s/'A,/'A',/g" | sed "s/'False,/'False',/g" | sed "s/'AP,/'AP',/g" | sed "s/'TA,/'TA',/g" | sed "s/'null/null/g" | sed "s/,'/','/3" | sed "s/,'/','/7" | sed "s/'S,/'S',/g" | sed "s/'P,/'P',/g" | sed "s/P','V/P','V'/g" | sed "s/S','V/S','V'/g" | sed "s/P\*,'V/P\*','V'/g" | sed "s/P\*,/P\*',/g" | sed "s/S\*,/S\*',/g" | sed "s/S\*','V/S\*','V'/g" | sed "s/,/',/7" | sed "s/null'/null/g" | sed "s//'/g" | sed "s/null,'V/null,'V'/g" | sed "s/'B,'/'B','/g" | sed "s/^/insert into applicationfile(ApplNo,ApplType,SponsorApplicant,MostRecentLabelAvailableFlag,CurrentPatentFlag,ActionType,Chemical_Type,Ther_Potential,Orphan_Code) values (/g" | sed "s/$/);/g" > ~/public_html/application.sql.txt
    • THIS ONE WORKED!!!

Product

  • cat Product.txt | sed "s/^/insert into Product2 (ApplNo, ProductNo, Form, Dosage, ProductMktStatus, TECode, ReferenceDrug, drugname, activeingred) values (/g" | sed "s/\t/', /g" | sed "s/TABLET,/TABLET/g" | sed "s/', /', '/g" | sed "s/,/null,/g" | sed "s/', '00/, 00/g" | sed "s/00.',/&',/g" | sed "s/',',/,/g" | sed "s/'.',/'&,',/g" | sed "s///g" | sed "s/',,',/,/g" | sed "s/00., '/&'/g" | sed "s///g" | sed "s/ SODIUM; /SODIUM/g" | sed "s/;//g" | sed "s/\r$/');/g" | sed "s/ 0.., /&'/g" | sed "s/', '0/, 0/g" | sed "s/, 0..',/&'/g" | sed "s/','/,/g" | sed "s/5'/5/g" |sed "s/, 0...MG/'&/g" | sed "s/0...MG/'&/g" | sed "s/, 0..MG/'&/g" | sed "s/0..MG/'/g" | sed "s/, 0....MG/'&/g" | sed "s/0....MG/'&/g" | sed "s/EQ '/EQ /g" | sed "s/%,/%',/g" | sed "s/%','/%',/g" | sed "s/..Federal/', '&/g" | sed "s//null/g" | sed "s/D.H.E. 45,/D.H.E. 45',/g" | sed "s/, 0.'\/ML/', '0.\/ML/g" | sed "s/MG'0/MG0/g" | sed "s/ML'0/ML0/g" | sed "s/'0'\//0\//g" | sed "s/'\//\//g" | sed "s/\/ML',/'ML',/g" | sed "s/UNITS'ML'/UNITS\/ML'/g" | sed "s/BASE'ML'/BASE\/ML'/g" | sed "s/MG'ML'/MG\/ML'/g" | sed "s/IU'ML/IU\/ML/g" | sed "s/UNIT'ML/UNIT\/ML/g" > ~/public_html/Product.sql.txt
  • Made a new command sequence
    • Once again, Anu referenced something in class, so I looked at her sequence to understand
  • cat Product.txt | sed "s/\*\*Federal Register determination that product was not discontinued or withdrawn for safety efficacy reasons\*\*//g" | sed "s/\*\*Federal Register determination that product was not discontinued or withdrawn for safety efficacy\*\*//g" | sed "s/\*\*Federal Register notice determination that product was not discontinued or withdrawn for safety or efficacy reasons\*\*//g" | sed "s/\*\*Federal Registration determination that product was not discontinued or withdrawn for safety or efficacy reasons\*\*//g" | sed "s/See current Annual Edition, 1.8 Description of Special Situations, Levothyroxine Sodium//g" | sed "s/\t/*/g" | sed "s/'//g" | sed "s/( ){2}//g" | sed "s/\* /*/g" | sed "s/ \*/*/g" | sed "s/\*\*/*null*/g" | sed "s/\*\*/*null*/g" | sed "s/\*/,/1" | sed "s/\*/,'/1" | sed "s/\*/','/1" | sed "s/\*/',/1" | sed "s/\*/,'/1" | sed "s/\*/',/1" | sed "s/\*/,'/1" | sed "s/\*/','/1" | sed "s/'null'/null/g" | sed "s/^/insert into Productfile(ApplNo,ProductNo,Form,Dosage,ProductMktStatus,TECode,ReferenceDrug,drugname,activeingred) values (/g" | sed "s/\r$/');/g" > ~/public_html/Product.sql.txt
    • This sequence worked - I feel like I understood doing this sequence much better after working on the first one

Other Questions

  1. Using the command line, how can you determine the number of records in each file? Provide the command.
    • For application:
      • wc application.txt
      • Contains 19747 rows, so 19746 have data
    • For Product:
      • wc product.txt
      • Contains 32771 rows, so 32770 have data
  2. Using SQL, how can you determine the number of records in the table corresponding to the file? Provide the SQL select statement.
    • For applicationfile:
      • select count (*) from applicationfile
      • Has 19746 entries (deleted first row of data)
    • For Productfile:
      • select count (*) from Productfile
      • Has 32770 entries (deleted first row of data)
  3. In your database, are these numbers the same or different? Explain why you think so.
    • These numbers are different because in the command line, there is the first line of data that contains all of the column titles, while this was deleted in SQL.
   For the remaining questions, supply (a) the answer to the question and (b) the SQL statement that you used to answer the question.
  1. What are the names of the drug products that are administered in the form INJECTABLE;INTRAVENOUS, SUBCUTANEOUS?
    • select drugname from Productfile where form = 'INJECTABLE;INTRAVENOUS, SUBCUTANEOUS'
    • LOVENOX, VELCADE, VIDAZA, ENOXAPARIN SODIUM
  2. What are the names of the drug products whose active ingredient (activeingred) is ATROPINE?
    • select drugname from Productfile where activeingred = 'ATROPINE'
    • ATROPEN, ATROPINE
  3. In what forms and dosages can the drug product named BENADRYL be administered?
    • select form, dosage from Productfile where drugname = 'BENADRYL'
      • CAPSULE;ORAL at 50MG
      • ELIXIR;ORAL at 12.5MG/5ML
      • CAPSULE;ORAL at 25MG
      • INJECTABLE;INJECTION at 10MG/ML
      • INJECTABLE; INJECTION at 50MG/ML
  4. Which drug products have a name ending in ESTROL?
    • select drugname from Productfile where drugname like '%ESTROL'
      • DIETHYLSTILLBESTROL, STILBESTROL, DIENESTROL
  5. Produce a table listing all of the known values for the therapeutic_potential column in the application table and how many application records there are of each. (Side note: The therapeutic_potential codes are explained in the ReviewClass_Lookup.txt file, in case you’re interested.)
    • select ther_potential, count (*) from applicationfile where ther_potential like '%' group by ther_potential
      • S* - 3
      • S - 3459
      • P - 928
      • P* - 26
  6. Produce a table listing all of the known values for the chemical_type column in the application table and how many application records there are of each. (Side note: The chemical_type codes are explained in the ChemTypeLookup.txt file, in case you’re interested.)
    • select chemical_type, count (*) from applicationfile where chemical_type >=0 or chemical_type <0 group by chemical_type
      • 6;87
      • 14;34
      • 34;9
      • 8;27
      • 1;1271
      • 2;139
      • 3;1464
      • 24;8
      • 7;35
      • 10;2
      • 23;8
      • 4;323
      • 5;1046
      • 9;1
  7. What are the names of the drug products that are sponsored (sponsor applicant column) by MERCK?
    • select drugname from productfile inner join applicationfile on (productfile.applno = applicationfile.applno) where sponsorapplicant = 'MERCK' group by drugname
      • "DECADRON" "HUMORSOL" "NEO-HYDELTRASOL" "PRINIVIL" "MAXALT-MLT" "HYDROCORTONE" "PERIACTIN" "PROPECIA" "PROSCAR" "CLINORIL" "PRINZIDE" "ELSPAR" "ALDOMET" "ALDORIL D30" "EMEND" "DIUPRES-500" "NEODECADRON" "ALDOCLOR-250" "MEFOXIN IN DEXTROSE 5% IN PLASTIC CONTAINER" "ARAMINE" "TIAMATE" "BLOCADREN" "CANCIDAS" "CHIBROXIN" "CORTONE" "PEPCID" "TRUSOPT" "REDISOL" "VIOXX" "FLOROPRYL" "ALPHAREDISOL" "MEFOXIN IN SODIUM CHLORIDE 0.9% IN PLASTIC CONTAINER" "PEPCID RPD" "NOROXIN" "ZOCOR" "LERITINE" "DECADERM" "DOLOBID" "MANNITOL 25%" "DECADRON W/ XYLOCAINE" "ALDORIL D50" "TIMOLIDE 10-25" "CYCLAINE" "HYDROPRES 25" "AMINOHIPPURATE SODIUM" "MEVACOR" "MODURETIC 5-50" "ALDORIL 25" "SINGULAIR" "COLBENEMID" "DIUPRES-250" "HYDELTRA-TBA" "PRIMAXIN" "BENEMID" "MAXALT" "FOSAMAX PLUS D" "HYDRODIURIL" "HYDELTRASOL" "HYDROPRES 50" "ZOLINZA" "ALDORIL 15" "FOSAMAX" "COGENTIN" "ALDOCLOR-150" "PEPCID PRESERVATIVE FREE"
  1. Which sponsor applicant companies have the text LABS in their names and have products whose active ingredients (activeingred) include both ASPIRIN and CAFFEINE?
    • select sponsorapplicant from applicationfile inner join productfile on (applicationfile.applno = productfile.applno) where sponsorapplicant like '%LABS%' and activeingred like '%ASPIRIN%' and activeingred like '%CAFFEINE%' group by sponsorapplicant
      • ACTAVIS LABS UT INC and WATSON LABS


Weekly Assignment Information

User: Emilysimso

Assignments

Individual Journal Entries

Class Journal Entries

Group Project

Heavy Metal HaterZ