Announcement

Collapse
No announcement yet.
X
  • Filter
  • Time
  • Show
Clear All
new posts

  • Nested Array Structures

    I was wondering if anyone else has had any luck creating nested array structures? Trying to keep memory consumption as low as possible and there is a lot of data repeated an unnecessary amount of times. Storing the data hierarchically should reduce the memory foot print and hopefully make it easier to look up the data elements in order to look up some information a bit more rapidly. I've copied/pasted some of the code I've thrown together to try doing this thus far below.

    Code:
    fh = fopen("~/Desktop/Programs/StataPrograms/StataIPEDSAll/ipedsdb.csv", "r")
    colraw = fget(fh)
    colnms = select(tokens(colraw, ","), tokens(colraw, ",") :!= ",")
    ipedsdb = J(963, 5, "")
    for (i = 1; i <= rows(ipedsdb); i++) {
        line = fget(fh)
        ipedsdb[i, (1..5)] = select(tokens(line, ","), tokens(line, ",") :!= ",")
    }
    
    class AssociativeArray scalar ipedsLookup(string matrix ipedsdb, string colvector schyrs) {
        real scalar i, j, k
        string scalar thisSchoolYear, thisSurvey, thisTitle
        string colvector surveys, titles, links
        class AssociativeArray scalar ipedsArray, schoolYearArray, surveyArray, titleArray
        ipedsArray = AssociativeArray()
        ipedsArray.reinit("string", 1, rows(schyrs))
        schoolYearArray = AssociativeArray()
        schoolYearArray.reinit("string", 1)
        surveyArray = AssociativeArray()
        surveyArray.reinit("string", 1)
        titleArray = AssociativeArray()
        titleArray.reinit("string", 1)
        for(i = 1; i <= rows(schyrs); i++) {
            thisSchoolYear = schyrs[i, 1]
            surveys = uniqrows(select(ipedsdb[., 2], ipedsdb[., 1] :== thisSchoolYear))
            for(j = 1; j <= rows(surveys); j++) {
                thisSurvey = surveys[j, 1]
                titles = select(ipedsdb[., 3], rowsum(ipedsdb[., (1, 2)] :== (thisSchoolYear, thisSurvey)) :== 2)
                for(k = 1; k <= rows(titles); k++) {
                    thisTitle = titles[k, 1]
                    links = select(ipedsdb[., (4, 5)], rowsum(ipedsdb[., (1, 2, 3)] :== (thisSchoolYear, thisSurvey, thisTitle)) :== 3)
                    titleArray.put("dataset", links[1, 1])
                    titleArray.put("script", links[1, 2])
                    surveyArray.put(thisTitle, titleArray)
                    titleArray.clear()
                }
                schoolYearArray.put(thisSurvey, surveyArray)
                surveyArray.clear()
            }
            ipedsArray.put(thisSchoolYear, schoolYearArray)
            schoolYearArray.clear()
        }
        return(ipedsArray)
    }
    ipedsArray = ipedsLookup(ipedsdb, schyrs)
    I've attached a copy of the dataset being read in to memory at the start of the code above to this post in case anyone was wanting to see the exact code.

    Attached Files

  • #2
    I didn't make it through your code, but this toy example works, and so what you want to do is doable in principle.
    Code:
    clear *
    
    mata:
    mata set matastrict on
    
    class AssociativeArray scalar function getAA() {
    
        class AssociativeArray scalar A, B, D, AA
    
        A.reinit("string", 1)
        A.put("A", "A's Value")
    
        B.reinit("string", 1)
        B.put("A", "B's Value")
        
        D.reinit("string", 1)
        D.put("A", "D's Value")
    
        AA.reinit("string", 1)
        AA.put("A", A)
        AA.put("B", B)
        AA.put("D", D)
    
        return(AA)
    }
    
    void function test(string scalar which_aa, string scalar which_value) {
        class AssociativeArray scalar AA, Got
        AA = getAA()
        
        Got = AA.get(which_aa)
    
        string scalar value
        value = Got.get(which_value)
        printf("%5s\n", value)
    }
    
    test("A", "A")
    test("B", "A")
    test("D", "A")
    
    end
    
    exit
    Maybe you've got a syntax error somewhere.

    Comment


    • #3
      Joseph Coveney
      Thanks for the suggestion. It seems like it was working with a couple quick tests that I did using the example you provided above as a template:

      Code:
      clear *
      
      mata:
      
      mata clear 
      mata set matastrict on
      
      class AssociativeArray scalar function getAA() {
      
          class AssociativeArray scalar A, B, C, D, AA
      
          A.reinit("string", 1)
          A.put("A", "A's Value")
      
          B.reinit("string", 1)
          B.put("A", "B's Value")
          
          D.reinit("string", 1)
          D.put("A", "D's Value")
          
          C.reinit("string", 1, 3)
          C.put("A", A)
          C.put("B", B)
          C.put("D", D)
      
          AA.reinit("string", 1)
          AA.put("A", A)
          AA.put("B", B)
          AA.put("C", C)
          AA.put("D", D)
          return(AA)
      }
      
      void function test(string scalar which_aa, string scalar which_value) {
          class AssociativeArray scalar AA, Got
          AA = getAA()
          
          Got = AA.get(which_aa)
      
          string scalar value
          value = Got.get(which_value)
          printf("%5s\n", value)
      }
      
      void function nestedTest(string scalar which_array, string scalar which_aa, 
                              string scalar which_value) {
                              
          class AssociativeArray scalar AA, Getting, Got
          AA = getAA()
          
          Getting = AA.get(which_array)
      
          Got = Getting.get(which_aa)
          
          string scalar value
          value = Got.get(which_value)
          printf("%5s\n", value)
                              
      }
      
      
      test("A", "A")
      test("B", "A")
      test("D", "A")
      nestedTest("C", "A", "A")
      
      end
      Not sure why it wasn't working as anticipated previously, but at least I know there is a way to handle things like this.

      Comment

      Working...
      X